2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/acpi.h>
34 #include <linux/module.h>
35 #include <linux/pci.h>
36 #include <rdma/ib_addr.h>
37 #include <rdma/ib_smi.h>
38 #include <rdma/ib_user_verbs.h>
39 #include <rdma/ib_cache.h>
41 #include "hns_roce_common.h"
42 #include "hns_roce_device.h"
43 #include "hns_roce_hem.h"
44 #include "hns_roce_hw_v2.h"
46 static int hns_roce_set_mac(struct hns_roce_dev
*hr_dev
, u32 port
,
52 if (hr_dev
->pci_dev
->revision
>= PCI_REVISION_ID_HIP09
)
55 if (!memcmp(hr_dev
->dev_addr
[port
], addr
, ETH_ALEN
))
58 for (i
= 0; i
< ETH_ALEN
; i
++)
59 hr_dev
->dev_addr
[port
][i
] = addr
[i
];
61 phy_port
= hr_dev
->iboe
.phy_port
[port
];
62 return hr_dev
->hw
->set_mac(hr_dev
, phy_port
, addr
);
65 static int hns_roce_add_gid(const struct ib_gid_attr
*attr
, void **context
)
67 struct hns_roce_dev
*hr_dev
= to_hr_dev(attr
->device
);
68 u32 port
= attr
->port_num
- 1;
71 if (port
>= hr_dev
->caps
.num_ports
)
74 ret
= hr_dev
->hw
->set_gid(hr_dev
, attr
->index
, &attr
->gid
, attr
);
79 static int hns_roce_del_gid(const struct ib_gid_attr
*attr
, void **context
)
81 struct hns_roce_dev
*hr_dev
= to_hr_dev(attr
->device
);
82 u32 port
= attr
->port_num
- 1;
85 if (port
>= hr_dev
->caps
.num_ports
)
88 ret
= hr_dev
->hw
->set_gid(hr_dev
, attr
->index
, NULL
, NULL
);
93 static int handle_en_event(struct hns_roce_dev
*hr_dev
, u32 port
,
96 struct device
*dev
= hr_dev
->dev
;
97 struct net_device
*netdev
;
100 netdev
= hr_dev
->iboe
.netdevs
[port
];
102 dev_err(dev
, "can't find netdev on port(%u)!\n", port
);
109 case NETDEV_REGISTER
:
110 case NETDEV_CHANGEADDR
:
111 ret
= hns_roce_set_mac(hr_dev
, port
, netdev
->dev_addr
);
115 * In v1 engine, only support all ports closed together.
119 dev_dbg(dev
, "NETDEV event = 0x%x!\n", (u32
)(event
));
126 static int hns_roce_netdev_event(struct notifier_block
*self
,
127 unsigned long event
, void *ptr
)
129 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
130 struct hns_roce_ib_iboe
*iboe
= NULL
;
131 struct hns_roce_dev
*hr_dev
= NULL
;
135 hr_dev
= container_of(self
, struct hns_roce_dev
, iboe
.nb
);
136 iboe
= &hr_dev
->iboe
;
138 for (port
= 0; port
< hr_dev
->caps
.num_ports
; port
++) {
139 if (dev
== iboe
->netdevs
[port
]) {
140 ret
= handle_en_event(hr_dev
, port
, event
);
150 static int hns_roce_setup_mtu_mac(struct hns_roce_dev
*hr_dev
)
155 for (i
= 0; i
< hr_dev
->caps
.num_ports
; i
++) {
156 ret
= hns_roce_set_mac(hr_dev
, i
,
157 hr_dev
->iboe
.netdevs
[i
]->dev_addr
);
165 static int hns_roce_query_device(struct ib_device
*ib_dev
,
166 struct ib_device_attr
*props
,
167 struct ib_udata
*uhw
)
169 struct hns_roce_dev
*hr_dev
= to_hr_dev(ib_dev
);
171 memset(props
, 0, sizeof(*props
));
173 props
->fw_ver
= hr_dev
->caps
.fw_ver
;
174 props
->sys_image_guid
= cpu_to_be64(hr_dev
->sys_image_guid
);
175 props
->max_mr_size
= (u64
)(~(0ULL));
176 props
->page_size_cap
= hr_dev
->caps
.page_size_cap
;
177 props
->vendor_id
= hr_dev
->vendor_id
;
178 props
->vendor_part_id
= hr_dev
->vendor_part_id
;
179 props
->hw_ver
= hr_dev
->hw_rev
;
180 props
->max_qp
= hr_dev
->caps
.num_qps
;
181 props
->max_qp_wr
= hr_dev
->caps
.max_wqes
;
182 props
->device_cap_flags
= IB_DEVICE_PORT_ACTIVE_EVENT
|
183 IB_DEVICE_RC_RNR_NAK_GEN
;
184 props
->max_send_sge
= hr_dev
->caps
.max_sq_sg
;
185 props
->max_recv_sge
= hr_dev
->caps
.max_rq_sg
;
186 props
->max_sge_rd
= 1;
187 props
->max_cq
= hr_dev
->caps
.num_cqs
;
188 props
->max_cqe
= hr_dev
->caps
.max_cqes
;
189 props
->max_mr
= hr_dev
->caps
.num_mtpts
;
190 props
->max_pd
= hr_dev
->caps
.num_pds
;
191 props
->max_qp_rd_atom
= hr_dev
->caps
.max_qp_dest_rdma
;
192 props
->max_qp_init_rd_atom
= hr_dev
->caps
.max_qp_init_rdma
;
193 props
->atomic_cap
= hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_ATOMIC
?
194 IB_ATOMIC_HCA
: IB_ATOMIC_NONE
;
195 props
->max_pkeys
= 1;
196 props
->local_ca_ack_delay
= hr_dev
->caps
.local_ca_ack_delay
;
197 props
->max_ah
= INT_MAX
;
198 props
->cq_caps
.max_cq_moderation_period
= HNS_ROCE_MAX_CQ_PERIOD
;
199 props
->cq_caps
.max_cq_moderation_count
= HNS_ROCE_MAX_CQ_COUNT
;
200 if (hr_dev
->pci_dev
->revision
== PCI_REVISION_ID_HIP08
)
201 props
->cq_caps
.max_cq_moderation_period
= HNS_ROCE_MAX_CQ_PERIOD_HIP08
;
203 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_SRQ
) {
204 props
->max_srq
= hr_dev
->caps
.num_srqs
;
205 props
->max_srq_wr
= hr_dev
->caps
.max_srq_wrs
;
206 props
->max_srq_sge
= hr_dev
->caps
.max_srq_sges
;
209 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_FRMR
&&
210 hr_dev
->pci_dev
->revision
>= PCI_REVISION_ID_HIP09
) {
211 props
->device_cap_flags
|= IB_DEVICE_MEM_MGT_EXTENSIONS
;
212 props
->max_fast_reg_page_list_len
= HNS_ROCE_FRMR_MAX_PA
;
215 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_XRC
)
216 props
->device_cap_flags
|= IB_DEVICE_XRC
;
221 static int hns_roce_query_port(struct ib_device
*ib_dev
, u32 port_num
,
222 struct ib_port_attr
*props
)
224 struct hns_roce_dev
*hr_dev
= to_hr_dev(ib_dev
);
225 struct device
*dev
= hr_dev
->dev
;
226 struct net_device
*net_dev
;
234 /* props being zeroed by the caller, avoid zeroing it here */
236 props
->max_mtu
= hr_dev
->caps
.max_mtu
;
237 props
->gid_tbl_len
= hr_dev
->caps
.gid_table_len
[port
];
238 props
->port_cap_flags
= IB_PORT_CM_SUP
| IB_PORT_REINIT_SUP
|
239 IB_PORT_VENDOR_CLASS_SUP
|
240 IB_PORT_BOOT_MGMT_SUP
;
241 props
->max_msg_sz
= HNS_ROCE_MAX_MSG_LEN
;
242 props
->pkey_tbl_len
= 1;
243 ret
= ib_get_eth_speed(ib_dev
, port_num
, &props
->active_speed
,
244 &props
->active_width
);
246 ibdev_warn(ib_dev
, "failed to get speed, ret = %d.\n", ret
);
248 spin_lock_irqsave(&hr_dev
->iboe
.lock
, flags
);
250 net_dev
= hr_dev
->iboe
.netdevs
[port
];
252 spin_unlock_irqrestore(&hr_dev
->iboe
.lock
, flags
);
253 dev_err(dev
, "find netdev %u failed!\n", port
);
257 mtu
= iboe_get_mtu(net_dev
->mtu
);
258 props
->active_mtu
= mtu
? min(props
->max_mtu
, mtu
) : IB_MTU_256
;
259 props
->state
= netif_running(net_dev
) && netif_carrier_ok(net_dev
) ?
262 props
->phys_state
= props
->state
== IB_PORT_ACTIVE
?
263 IB_PORT_PHYS_STATE_LINK_UP
:
264 IB_PORT_PHYS_STATE_DISABLED
;
266 spin_unlock_irqrestore(&hr_dev
->iboe
.lock
, flags
);
271 static enum rdma_link_layer
hns_roce_get_link_layer(struct ib_device
*device
,
274 return IB_LINK_LAYER_ETHERNET
;
277 static int hns_roce_query_pkey(struct ib_device
*ib_dev
, u32 port
, u16 index
,
288 static int hns_roce_modify_device(struct ib_device
*ib_dev
, int mask
,
289 struct ib_device_modify
*props
)
293 if (mask
& ~IB_DEVICE_MODIFY_NODE_DESC
)
296 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
297 spin_lock_irqsave(&to_hr_dev(ib_dev
)->sm_lock
, flags
);
298 memcpy(ib_dev
->node_desc
, props
->node_desc
, NODE_DESC_SIZE
);
299 spin_unlock_irqrestore(&to_hr_dev(ib_dev
)->sm_lock
, flags
);
305 struct hns_user_mmap_entry
*
306 hns_roce_user_mmap_entry_insert(struct ib_ucontext
*ucontext
, u64 address
,
308 enum hns_roce_mmap_type mmap_type
)
310 struct hns_user_mmap_entry
*entry
;
313 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
317 entry
->address
= address
;
318 entry
->mmap_type
= mmap_type
;
321 /* pgoff 0 must be used by DB for compatibility */
322 case HNS_ROCE_MMAP_TYPE_DB
:
323 ret
= rdma_user_mmap_entry_insert_exact(
324 ucontext
, &entry
->rdma_entry
, length
, 0);
326 case HNS_ROCE_MMAP_TYPE_DWQE
:
327 ret
= rdma_user_mmap_entry_insert_range(
328 ucontext
, &entry
->rdma_entry
, length
, 1,
344 static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext
*context
)
346 if (context
->db_mmap_entry
)
347 rdma_user_mmap_entry_remove(
348 &context
->db_mmap_entry
->rdma_entry
);
351 static int hns_roce_alloc_uar_entry(struct ib_ucontext
*uctx
)
353 struct hns_roce_ucontext
*context
= to_hr_ucontext(uctx
);
356 address
= context
->uar
.pfn
<< PAGE_SHIFT
;
357 context
->db_mmap_entry
= hns_roce_user_mmap_entry_insert(
358 uctx
, address
, PAGE_SIZE
, HNS_ROCE_MMAP_TYPE_DB
);
359 if (!context
->db_mmap_entry
)
365 static int hns_roce_alloc_ucontext(struct ib_ucontext
*uctx
,
366 struct ib_udata
*udata
)
368 struct hns_roce_ucontext
*context
= to_hr_ucontext(uctx
);
369 struct hns_roce_dev
*hr_dev
= to_hr_dev(uctx
->device
);
370 struct hns_roce_ib_alloc_ucontext_resp resp
= {};
371 struct hns_roce_ib_alloc_ucontext ucmd
= {};
377 resp
.qp_tab_size
= hr_dev
->caps
.num_qps
;
378 resp
.srq_tab_size
= hr_dev
->caps
.num_srqs
;
380 ret
= ib_copy_from_udata(&ucmd
, udata
,
381 min(udata
->inlen
, sizeof(ucmd
)));
385 if (hr_dev
->pci_dev
->revision
>= PCI_REVISION_ID_HIP09
)
386 context
->config
= ucmd
.config
& HNS_ROCE_EXSGE_FLAGS
;
388 if (context
->config
& HNS_ROCE_EXSGE_FLAGS
) {
389 resp
.config
|= HNS_ROCE_RSP_EXSGE_FLAGS
;
390 resp
.max_inline_data
= hr_dev
->caps
.max_sq_inline
;
393 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_RQ_INLINE
) {
394 context
->config
|= ucmd
.config
& HNS_ROCE_RQ_INLINE_FLAGS
;
395 if (context
->config
& HNS_ROCE_RQ_INLINE_FLAGS
)
396 resp
.config
|= HNS_ROCE_RSP_RQ_INLINE_FLAGS
;
399 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQE_INLINE
) {
400 context
->config
|= ucmd
.config
& HNS_ROCE_CQE_INLINE_FLAGS
;
401 if (context
->config
& HNS_ROCE_CQE_INLINE_FLAGS
)
402 resp
.config
|= HNS_ROCE_RSP_CQE_INLINE_FLAGS
;
405 if (hr_dev
->pci_dev
->revision
>= PCI_REVISION_ID_HIP09
)
406 resp
.congest_type
= hr_dev
->caps
.cong_cap
;
408 ret
= hns_roce_uar_alloc(hr_dev
, &context
->uar
);
412 ret
= hns_roce_alloc_uar_entry(uctx
);
414 goto error_fail_uar_entry
;
416 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQ_RECORD_DB
||
417 hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_RECORD_DB
) {
418 INIT_LIST_HEAD(&context
->page_list
);
419 mutex_init(&context
->page_mutex
);
422 resp
.cqe_size
= hr_dev
->caps
.cqe_sz
;
424 ret
= ib_copy_to_udata(udata
, &resp
,
425 min(udata
->outlen
, sizeof(resp
)));
427 goto error_fail_copy_to_udata
;
431 error_fail_copy_to_udata
:
432 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQ_RECORD_DB
||
433 hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_RECORD_DB
)
434 mutex_destroy(&context
->page_mutex
);
435 hns_roce_dealloc_uar_entry(context
);
437 error_fail_uar_entry
:
438 ida_free(&hr_dev
->uar_ida
.ida
, (int)context
->uar
.logic_idx
);
441 atomic64_inc(&hr_dev
->dfx_cnt
[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT
]);
446 static void hns_roce_dealloc_ucontext(struct ib_ucontext
*ibcontext
)
448 struct hns_roce_ucontext
*context
= to_hr_ucontext(ibcontext
);
449 struct hns_roce_dev
*hr_dev
= to_hr_dev(ibcontext
->device
);
451 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQ_RECORD_DB
||
452 hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_RECORD_DB
)
453 mutex_destroy(&context
->page_mutex
);
455 hns_roce_dealloc_uar_entry(context
);
457 ida_free(&hr_dev
->uar_ida
.ida
, (int)context
->uar
.logic_idx
);
460 static int hns_roce_mmap(struct ib_ucontext
*uctx
, struct vm_area_struct
*vma
)
462 struct hns_roce_dev
*hr_dev
= to_hr_dev(uctx
->device
);
463 struct rdma_user_mmap_entry
*rdma_entry
;
464 struct hns_user_mmap_entry
*entry
;
469 if (hr_dev
->dis_db
) {
470 atomic64_inc(&hr_dev
->dfx_cnt
[HNS_ROCE_DFX_MMAP_ERR_CNT
]);
474 rdma_entry
= rdma_user_mmap_entry_get_pgoff(uctx
, vma
->vm_pgoff
);
476 atomic64_inc(&hr_dev
->dfx_cnt
[HNS_ROCE_DFX_MMAP_ERR_CNT
]);
480 entry
= to_hns_mmap(rdma_entry
);
481 pfn
= entry
->address
>> PAGE_SHIFT
;
483 switch (entry
->mmap_type
) {
484 case HNS_ROCE_MMAP_TYPE_DB
:
485 case HNS_ROCE_MMAP_TYPE_DWQE
:
486 prot
= pgprot_device(vma
->vm_page_prot
);
493 ret
= rdma_user_mmap_io(uctx
, vma
, pfn
, rdma_entry
->npages
* PAGE_SIZE
,
497 rdma_user_mmap_entry_put(rdma_entry
);
499 atomic64_inc(&hr_dev
->dfx_cnt
[HNS_ROCE_DFX_MMAP_ERR_CNT
]);
504 static void hns_roce_free_mmap(struct rdma_user_mmap_entry
*rdma_entry
)
506 struct hns_user_mmap_entry
*entry
= to_hns_mmap(rdma_entry
);
511 static int hns_roce_port_immutable(struct ib_device
*ib_dev
, u32 port_num
,
512 struct ib_port_immutable
*immutable
)
514 struct ib_port_attr attr
;
517 ret
= ib_query_port(ib_dev
, port_num
, &attr
);
521 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
522 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
524 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
525 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE
;
526 if (to_hr_dev(ib_dev
)->caps
.flags
& HNS_ROCE_CAP_FLAG_ROCE_V1_V2
)
527 immutable
->core_cap_flags
|= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
532 static void hns_roce_disassociate_ucontext(struct ib_ucontext
*ibcontext
)
536 static void hns_roce_get_fw_ver(struct ib_device
*device
, char *str
)
538 u64 fw_ver
= to_hr_dev(device
)->caps
.fw_ver
;
539 unsigned int major
, minor
, sub_minor
;
541 major
= upper_32_bits(fw_ver
);
542 minor
= high_16_bits(lower_32_bits(fw_ver
));
543 sub_minor
= low_16_bits(fw_ver
);
545 snprintf(str
, IB_FW_VERSION_NAME_MAX
, "%u.%u.%04u", major
, minor
,
549 #define HNS_ROCE_HW_CNT(ename, cname) \
550 [HNS_ROCE_HW_##ename##_CNT].name = cname
552 static const struct rdma_stat_desc hns_roce_port_stats_descs
[] = {
553 HNS_ROCE_HW_CNT(RX_RC_PKT
, "rx_rc_pkt"),
554 HNS_ROCE_HW_CNT(RX_UC_PKT
, "rx_uc_pkt"),
555 HNS_ROCE_HW_CNT(RX_UD_PKT
, "rx_ud_pkt"),
556 HNS_ROCE_HW_CNT(RX_XRC_PKT
, "rx_xrc_pkt"),
557 HNS_ROCE_HW_CNT(RX_PKT
, "rx_pkt"),
558 HNS_ROCE_HW_CNT(RX_ERR_PKT
, "rx_err_pkt"),
559 HNS_ROCE_HW_CNT(RX_CNP_PKT
, "rx_cnp_pkt"),
560 HNS_ROCE_HW_CNT(TX_RC_PKT
, "tx_rc_pkt"),
561 HNS_ROCE_HW_CNT(TX_UC_PKT
, "tx_uc_pkt"),
562 HNS_ROCE_HW_CNT(TX_UD_PKT
, "tx_ud_pkt"),
563 HNS_ROCE_HW_CNT(TX_XRC_PKT
, "tx_xrc_pkt"),
564 HNS_ROCE_HW_CNT(TX_PKT
, "tx_pkt"),
565 HNS_ROCE_HW_CNT(TX_ERR_PKT
, "tx_err_pkt"),
566 HNS_ROCE_HW_CNT(TX_CNP_PKT
, "tx_cnp_pkt"),
567 HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT
, "trp_get_mpt_err_pkt"),
568 HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT
, "trp_get_irrl_err_pkt"),
569 HNS_ROCE_HW_CNT(ECN_DB
, "ecn_doorbell"),
570 HNS_ROCE_HW_CNT(RX_BUF
, "rx_buffer"),
571 HNS_ROCE_HW_CNT(TRP_RX_SOF
, "trp_rx_sof"),
572 HNS_ROCE_HW_CNT(CQ_CQE
, "cq_cqe"),
573 HNS_ROCE_HW_CNT(CQ_POE
, "cq_poe"),
574 HNS_ROCE_HW_CNT(CQ_NOTIFY
, "cq_notify"),
577 static struct rdma_hw_stats
*hns_roce_alloc_hw_port_stats(
578 struct ib_device
*device
, u32 port_num
)
580 struct hns_roce_dev
*hr_dev
= to_hr_dev(device
);
582 if (port_num
> hr_dev
->caps
.num_ports
) {
583 ibdev_err(device
, "invalid port num.\n");
587 return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs
,
588 ARRAY_SIZE(hns_roce_port_stats_descs
),
589 RDMA_HW_STATS_DEFAULT_LIFESPAN
);
592 static int hns_roce_get_hw_stats(struct ib_device
*device
,
593 struct rdma_hw_stats
*stats
,
596 struct hns_roce_dev
*hr_dev
= to_hr_dev(device
);
597 int num_counters
= HNS_ROCE_HW_CNT_TOTAL
;
603 if (port
> hr_dev
->caps
.num_ports
)
606 ret
= hr_dev
->hw
->query_hw_counter(hr_dev
, stats
->value
, port
,
609 ibdev_err(device
, "failed to query hw counter, ret = %d\n",
617 static void hns_roce_unregister_device(struct hns_roce_dev
*hr_dev
)
619 struct hns_roce_ib_iboe
*iboe
= &hr_dev
->iboe
;
621 hr_dev
->active
= false;
622 unregister_netdevice_notifier(&iboe
->nb
);
623 ib_unregister_device(&hr_dev
->ib_dev
);
626 static const struct ib_device_ops hns_roce_dev_ops
= {
627 .owner
= THIS_MODULE
,
628 .driver_id
= RDMA_DRIVER_HNS
,
630 .uverbs_no_driver_id_binding
= 1,
632 .get_dev_fw_str
= hns_roce_get_fw_ver
,
633 .add_gid
= hns_roce_add_gid
,
634 .alloc_pd
= hns_roce_alloc_pd
,
635 .alloc_ucontext
= hns_roce_alloc_ucontext
,
636 .create_ah
= hns_roce_create_ah
,
637 .create_user_ah
= hns_roce_create_ah
,
638 .create_cq
= hns_roce_create_cq
,
639 .create_qp
= hns_roce_create_qp
,
640 .dealloc_pd
= hns_roce_dealloc_pd
,
641 .dealloc_ucontext
= hns_roce_dealloc_ucontext
,
642 .del_gid
= hns_roce_del_gid
,
643 .dereg_mr
= hns_roce_dereg_mr
,
644 .destroy_ah
= hns_roce_destroy_ah
,
645 .destroy_cq
= hns_roce_destroy_cq
,
646 .disassociate_ucontext
= hns_roce_disassociate_ucontext
,
647 .get_dma_mr
= hns_roce_get_dma_mr
,
648 .get_link_layer
= hns_roce_get_link_layer
,
649 .get_port_immutable
= hns_roce_port_immutable
,
650 .mmap
= hns_roce_mmap
,
651 .mmap_free
= hns_roce_free_mmap
,
652 .modify_device
= hns_roce_modify_device
,
653 .modify_qp
= hns_roce_modify_qp
,
654 .query_ah
= hns_roce_query_ah
,
655 .query_device
= hns_roce_query_device
,
656 .query_pkey
= hns_roce_query_pkey
,
657 .query_port
= hns_roce_query_port
,
658 .reg_user_mr
= hns_roce_reg_user_mr
,
660 INIT_RDMA_OBJ_SIZE(ib_ah
, hns_roce_ah
, ibah
),
661 INIT_RDMA_OBJ_SIZE(ib_cq
, hns_roce_cq
, ib_cq
),
662 INIT_RDMA_OBJ_SIZE(ib_pd
, hns_roce_pd
, ibpd
),
663 INIT_RDMA_OBJ_SIZE(ib_qp
, hns_roce_qp
, ibqp
),
664 INIT_RDMA_OBJ_SIZE(ib_ucontext
, hns_roce_ucontext
, ibucontext
),
667 static const struct ib_device_ops hns_roce_dev_hw_stats_ops
= {
668 .alloc_hw_port_stats
= hns_roce_alloc_hw_port_stats
,
669 .get_hw_stats
= hns_roce_get_hw_stats
,
672 static const struct ib_device_ops hns_roce_dev_mr_ops
= {
673 .rereg_user_mr
= hns_roce_rereg_user_mr
,
676 static const struct ib_device_ops hns_roce_dev_mw_ops
= {
677 .alloc_mw
= hns_roce_alloc_mw
,
678 .dealloc_mw
= hns_roce_dealloc_mw
,
680 INIT_RDMA_OBJ_SIZE(ib_mw
, hns_roce_mw
, ibmw
),
683 static const struct ib_device_ops hns_roce_dev_frmr_ops
= {
684 .alloc_mr
= hns_roce_alloc_mr
,
685 .map_mr_sg
= hns_roce_map_mr_sg
,
688 static const struct ib_device_ops hns_roce_dev_srq_ops
= {
689 .create_srq
= hns_roce_create_srq
,
690 .destroy_srq
= hns_roce_destroy_srq
,
692 INIT_RDMA_OBJ_SIZE(ib_srq
, hns_roce_srq
, ibsrq
),
695 static const struct ib_device_ops hns_roce_dev_xrcd_ops
= {
696 .alloc_xrcd
= hns_roce_alloc_xrcd
,
697 .dealloc_xrcd
= hns_roce_dealloc_xrcd
,
699 INIT_RDMA_OBJ_SIZE(ib_xrcd
, hns_roce_xrcd
, ibxrcd
),
702 static const struct ib_device_ops hns_roce_dev_restrack_ops
= {
703 .fill_res_cq_entry
= hns_roce_fill_res_cq_entry
,
704 .fill_res_cq_entry_raw
= hns_roce_fill_res_cq_entry_raw
,
705 .fill_res_qp_entry
= hns_roce_fill_res_qp_entry
,
706 .fill_res_qp_entry_raw
= hns_roce_fill_res_qp_entry_raw
,
707 .fill_res_mr_entry
= hns_roce_fill_res_mr_entry
,
708 .fill_res_mr_entry_raw
= hns_roce_fill_res_mr_entry_raw
,
709 .fill_res_srq_entry
= hns_roce_fill_res_srq_entry
,
710 .fill_res_srq_entry_raw
= hns_roce_fill_res_srq_entry_raw
,
713 static int hns_roce_register_device(struct hns_roce_dev
*hr_dev
)
716 struct hns_roce_ib_iboe
*iboe
= NULL
;
717 struct ib_device
*ib_dev
= NULL
;
718 struct device
*dev
= hr_dev
->dev
;
721 iboe
= &hr_dev
->iboe
;
722 spin_lock_init(&iboe
->lock
);
724 ib_dev
= &hr_dev
->ib_dev
;
726 ib_dev
->node_type
= RDMA_NODE_IB_CA
;
727 ib_dev
->dev
.parent
= dev
;
729 ib_dev
->phys_port_cnt
= hr_dev
->caps
.num_ports
;
730 ib_dev
->local_dma_lkey
= hr_dev
->caps
.reserved_lkey
;
731 ib_dev
->num_comp_vectors
= hr_dev
->caps
.num_comp_vectors
;
733 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_REREG_MR
)
734 ib_set_device_ops(ib_dev
, &hns_roce_dev_mr_ops
);
736 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_MW
)
737 ib_set_device_ops(ib_dev
, &hns_roce_dev_mw_ops
);
739 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_FRMR
)
740 ib_set_device_ops(ib_dev
, &hns_roce_dev_frmr_ops
);
742 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_SRQ
) {
743 ib_set_device_ops(ib_dev
, &hns_roce_dev_srq_ops
);
744 ib_set_device_ops(ib_dev
, hr_dev
->hw
->hns_roce_dev_srq_ops
);
747 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_XRC
)
748 ib_set_device_ops(ib_dev
, &hns_roce_dev_xrcd_ops
);
750 if (hr_dev
->pci_dev
->revision
>= PCI_REVISION_ID_HIP09
&&
752 ib_set_device_ops(ib_dev
, &hns_roce_dev_hw_stats_ops
);
754 ib_set_device_ops(ib_dev
, hr_dev
->hw
->hns_roce_dev_ops
);
755 ib_set_device_ops(ib_dev
, &hns_roce_dev_ops
);
756 ib_set_device_ops(ib_dev
, &hns_roce_dev_restrack_ops
);
757 for (i
= 0; i
< hr_dev
->caps
.num_ports
; i
++) {
758 if (!hr_dev
->iboe
.netdevs
[i
])
761 ret
= ib_device_set_netdev(ib_dev
, hr_dev
->iboe
.netdevs
[i
],
766 dma_set_max_seg_size(dev
, UINT_MAX
);
767 ret
= ib_register_device(ib_dev
, "hns_%d", dev
);
769 dev_err(dev
, "ib_register_device failed!\n");
773 ret
= hns_roce_setup_mtu_mac(hr_dev
);
775 dev_err(dev
, "setup_mtu_mac failed!\n");
776 goto error_failed_setup_mtu_mac
;
779 iboe
->nb
.notifier_call
= hns_roce_netdev_event
;
780 ret
= register_netdevice_notifier(&iboe
->nb
);
782 dev_err(dev
, "register_netdevice_notifier failed!\n");
783 goto error_failed_setup_mtu_mac
;
786 hr_dev
->active
= true;
789 error_failed_setup_mtu_mac
:
790 ib_unregister_device(ib_dev
);
795 static int hns_roce_init_hem(struct hns_roce_dev
*hr_dev
)
797 struct device
*dev
= hr_dev
->dev
;
800 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->mr_table
.mtpt_table
,
801 HEM_TYPE_MTPT
, hr_dev
->caps
.mtpt_entry_sz
,
802 hr_dev
->caps
.num_mtpts
);
804 dev_err(dev
, "failed to init MTPT context memory, aborting.\n");
808 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->qp_table
.qp_table
,
809 HEM_TYPE_QPC
, hr_dev
->caps
.qpc_sz
,
810 hr_dev
->caps
.num_qps
);
812 dev_err(dev
, "failed to init QP context memory, aborting.\n");
816 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->qp_table
.irrl_table
,
818 hr_dev
->caps
.irrl_entry_sz
*
819 hr_dev
->caps
.max_qp_init_rdma
,
820 hr_dev
->caps
.num_qps
);
822 dev_err(dev
, "failed to init irrl_table memory, aborting.\n");
826 if (hr_dev
->caps
.trrl_entry_sz
) {
827 ret
= hns_roce_init_hem_table(hr_dev
,
828 &hr_dev
->qp_table
.trrl_table
,
830 hr_dev
->caps
.trrl_entry_sz
*
831 hr_dev
->caps
.max_qp_dest_rdma
,
832 hr_dev
->caps
.num_qps
);
835 "failed to init trrl_table memory, aborting.\n");
840 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->cq_table
.table
,
841 HEM_TYPE_CQC
, hr_dev
->caps
.cqc_entry_sz
,
842 hr_dev
->caps
.num_cqs
);
844 dev_err(dev
, "failed to init CQ context memory, aborting.\n");
848 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_SRQ
) {
849 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->srq_table
.table
,
851 hr_dev
->caps
.srqc_entry_sz
,
852 hr_dev
->caps
.num_srqs
);
855 "failed to init SRQ context memory, aborting.\n");
860 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL
) {
861 ret
= hns_roce_init_hem_table(hr_dev
,
862 &hr_dev
->qp_table
.sccc_table
,
864 hr_dev
->caps
.sccc_sz
,
865 hr_dev
->caps
.num_qps
);
868 "failed to init SCC context memory, aborting.\n");
873 if (hr_dev
->caps
.qpc_timer_entry_sz
) {
874 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->qpc_timer_table
,
876 hr_dev
->caps
.qpc_timer_entry_sz
,
877 hr_dev
->caps
.qpc_timer_bt_num
);
880 "failed to init QPC timer memory, aborting.\n");
885 if (hr_dev
->caps
.cqc_timer_entry_sz
) {
886 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->cqc_timer_table
,
888 hr_dev
->caps
.cqc_timer_entry_sz
,
889 hr_dev
->caps
.cqc_timer_bt_num
);
892 "failed to init CQC timer memory, aborting.\n");
893 goto err_unmap_qpc_timer
;
897 if (hr_dev
->caps
.gmv_entry_sz
) {
898 ret
= hns_roce_init_hem_table(hr_dev
, &hr_dev
->gmv_table
,
900 hr_dev
->caps
.gmv_entry_sz
,
901 hr_dev
->caps
.gmv_entry_num
);
904 "failed to init gmv table memory, ret = %d\n",
906 goto err_unmap_cqc_timer
;
913 if (hr_dev
->caps
.cqc_timer_entry_sz
)
914 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->cqc_timer_table
);
917 if (hr_dev
->caps
.qpc_timer_entry_sz
)
918 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->qpc_timer_table
);
921 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL
)
922 hns_roce_cleanup_hem_table(hr_dev
,
923 &hr_dev
->qp_table
.sccc_table
);
925 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_SRQ
)
926 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->srq_table
.table
);
929 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->cq_table
.table
);
932 if (hr_dev
->caps
.trrl_entry_sz
)
933 hns_roce_cleanup_hem_table(hr_dev
,
934 &hr_dev
->qp_table
.trrl_table
);
937 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->qp_table
.irrl_table
);
940 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->qp_table
.qp_table
);
943 hns_roce_cleanup_hem_table(hr_dev
, &hr_dev
->mr_table
.mtpt_table
);
948 static void hns_roce_teardown_hca(struct hns_roce_dev
*hr_dev
)
950 hns_roce_cleanup_bitmap(hr_dev
);
952 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQ_RECORD_DB
||
953 hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_RECORD_DB
)
954 mutex_destroy(&hr_dev
->pgdir_mutex
);
958 * hns_roce_setup_hca - setup host channel adapter
959 * @hr_dev: pointer to hns roce device
962 static int hns_roce_setup_hca(struct hns_roce_dev
*hr_dev
)
964 struct device
*dev
= hr_dev
->dev
;
967 spin_lock_init(&hr_dev
->sm_lock
);
969 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQ_RECORD_DB
||
970 hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_RECORD_DB
) {
971 INIT_LIST_HEAD(&hr_dev
->pgdir_list
);
972 mutex_init(&hr_dev
->pgdir_mutex
);
975 hns_roce_init_uar_table(hr_dev
);
977 ret
= hns_roce_uar_alloc(hr_dev
, &hr_dev
->priv_uar
);
979 dev_err(dev
, "failed to allocate priv_uar.\n");
980 goto err_uar_table_free
;
983 ret
= hns_roce_init_qp_table(hr_dev
);
985 dev_err(dev
, "failed to init qp_table.\n");
986 goto err_uar_table_free
;
989 hns_roce_init_pd_table(hr_dev
);
991 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_XRC
)
992 hns_roce_init_xrcd_table(hr_dev
);
994 hns_roce_init_mr_table(hr_dev
);
996 hns_roce_init_cq_table(hr_dev
);
998 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_SRQ
)
999 hns_roce_init_srq_table(hr_dev
);
1004 ida_destroy(&hr_dev
->uar_ida
.ida
);
1005 if (hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_CQ_RECORD_DB
||
1006 hr_dev
->caps
.flags
& HNS_ROCE_CAP_FLAG_QP_RECORD_DB
)
1007 mutex_destroy(&hr_dev
->pgdir_mutex
);
1012 static void check_and_get_armed_cq(struct list_head
*cq_list
, struct ib_cq
*cq
)
1014 struct hns_roce_cq
*hr_cq
= to_hr_cq(cq
);
1015 unsigned long flags
;
1017 spin_lock_irqsave(&hr_cq
->lock
, flags
);
1018 if (cq
->comp_handler
) {
1019 if (!hr_cq
->is_armed
) {
1020 hr_cq
->is_armed
= 1;
1021 list_add_tail(&hr_cq
->node
, cq_list
);
1024 spin_unlock_irqrestore(&hr_cq
->lock
, flags
);
1027 void hns_roce_handle_device_err(struct hns_roce_dev
*hr_dev
)
1029 struct hns_roce_qp
*hr_qp
;
1030 struct hns_roce_cq
*hr_cq
;
1031 struct list_head cq_list
;
1032 unsigned long flags_qp
;
1033 unsigned long flags
;
1035 INIT_LIST_HEAD(&cq_list
);
1037 spin_lock_irqsave(&hr_dev
->qp_list_lock
, flags
);
1038 list_for_each_entry(hr_qp
, &hr_dev
->qp_list
, node
) {
1039 spin_lock_irqsave(&hr_qp
->sq
.lock
, flags_qp
);
1040 if (hr_qp
->sq
.tail
!= hr_qp
->sq
.head
)
1041 check_and_get_armed_cq(&cq_list
, hr_qp
->ibqp
.send_cq
);
1042 spin_unlock_irqrestore(&hr_qp
->sq
.lock
, flags_qp
);
1044 spin_lock_irqsave(&hr_qp
->rq
.lock
, flags_qp
);
1045 if ((!hr_qp
->ibqp
.srq
) && (hr_qp
->rq
.tail
!= hr_qp
->rq
.head
))
1046 check_and_get_armed_cq(&cq_list
, hr_qp
->ibqp
.recv_cq
);
1047 spin_unlock_irqrestore(&hr_qp
->rq
.lock
, flags_qp
);
1050 list_for_each_entry(hr_cq
, &cq_list
, node
)
1051 hns_roce_cq_completion(hr_dev
, hr_cq
->cqn
);
1053 spin_unlock_irqrestore(&hr_dev
->qp_list_lock
, flags
);
1056 static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev
*hr_dev
)
1058 hr_dev
->dfx_cnt
= kvcalloc(HNS_ROCE_DFX_CNT_TOTAL
, sizeof(atomic64_t
),
1060 if (!hr_dev
->dfx_cnt
)
1066 static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev
*hr_dev
)
1068 kvfree(hr_dev
->dfx_cnt
);
1071 int hns_roce_init(struct hns_roce_dev
*hr_dev
)
1073 struct device
*dev
= hr_dev
->dev
;
1076 hr_dev
->is_reset
= false;
1078 ret
= hns_roce_alloc_dfx_cnt(hr_dev
);
1082 if (hr_dev
->hw
->cmq_init
) {
1083 ret
= hr_dev
->hw
->cmq_init(hr_dev
);
1085 dev_err(dev
, "init RoCE Command Queue failed!\n");
1086 goto error_failed_alloc_dfx_cnt
;
1090 ret
= hr_dev
->hw
->hw_profile(hr_dev
);
1092 dev_err(dev
, "get RoCE engine profile failed!\n");
1093 goto error_failed_cmd_init
;
1096 ret
= hns_roce_cmd_init(hr_dev
);
1098 dev_err(dev
, "cmd init failed!\n");
1099 goto error_failed_cmd_init
;
1102 /* EQ depends on poll mode, event mode depends on EQ */
1103 ret
= hr_dev
->hw
->init_eq(hr_dev
);
1105 dev_err(dev
, "eq init failed!\n");
1106 goto error_failed_eq_table
;
1109 if (hr_dev
->cmd_mod
) {
1110 ret
= hns_roce_cmd_use_events(hr_dev
);
1113 "Cmd event mode failed, set back to poll!\n");
1116 ret
= hns_roce_init_hem(hr_dev
);
1118 dev_err(dev
, "init HEM(Hardware Entry Memory) failed!\n");
1119 goto error_failed_init_hem
;
1122 ret
= hns_roce_setup_hca(hr_dev
);
1124 dev_err(dev
, "setup hca failed!\n");
1125 goto error_failed_setup_hca
;
1128 if (hr_dev
->hw
->hw_init
) {
1129 ret
= hr_dev
->hw
->hw_init(hr_dev
);
1131 dev_err(dev
, "hw_init failed!\n");
1132 goto error_failed_engine_init
;
1136 INIT_LIST_HEAD(&hr_dev
->qp_list
);
1137 spin_lock_init(&hr_dev
->qp_list_lock
);
1139 ret
= hns_roce_register_device(hr_dev
);
1141 goto error_failed_register_device
;
1143 hns_roce_register_debugfs(hr_dev
);
1147 error_failed_register_device
:
1148 if (hr_dev
->hw
->hw_exit
)
1149 hr_dev
->hw
->hw_exit(hr_dev
);
1151 error_failed_engine_init
:
1152 hns_roce_teardown_hca(hr_dev
);
1154 error_failed_setup_hca
:
1155 hns_roce_cleanup_hem(hr_dev
);
1157 error_failed_init_hem
:
1158 if (hr_dev
->cmd_mod
)
1159 hns_roce_cmd_use_polling(hr_dev
);
1160 hr_dev
->hw
->cleanup_eq(hr_dev
);
1162 error_failed_eq_table
:
1163 hns_roce_cmd_cleanup(hr_dev
);
1165 error_failed_cmd_init
:
1166 if (hr_dev
->hw
->cmq_exit
)
1167 hr_dev
->hw
->cmq_exit(hr_dev
);
1169 error_failed_alloc_dfx_cnt
:
1170 hns_roce_dealloc_dfx_cnt(hr_dev
);
1175 void hns_roce_exit(struct hns_roce_dev
*hr_dev
)
1177 hns_roce_unregister_debugfs(hr_dev
);
1178 hns_roce_unregister_device(hr_dev
);
1180 if (hr_dev
->hw
->hw_exit
)
1181 hr_dev
->hw
->hw_exit(hr_dev
);
1182 hns_roce_teardown_hca(hr_dev
);
1183 hns_roce_cleanup_hem(hr_dev
);
1185 if (hr_dev
->cmd_mod
)
1186 hns_roce_cmd_use_polling(hr_dev
);
1188 hr_dev
->hw
->cleanup_eq(hr_dev
);
1189 hns_roce_cmd_cleanup(hr_dev
);
1190 if (hr_dev
->hw
->cmq_exit
)
1191 hr_dev
->hw
->cmq_exit(hr_dev
);
1192 hns_roce_dealloc_dfx_cnt(hr_dev
);
1195 MODULE_LICENSE("Dual BSD/GPL");
1196 MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
1197 MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>");
1198 MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>");
1199 MODULE_DESCRIPTION("HNS RoCE Driver");