1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
4 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
5 * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
6 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
9 #include <linux/completion.h>
11 #include <linux/in6.h>
12 #include <linux/mutex.h>
13 #include <linux/random.h>
14 #include <linux/igmp.h>
15 #include <linux/xarray.h>
16 #include <linux/inetdevice.h>
17 #include <linux/slab.h>
18 #include <linux/module.h>
19 #include <net/route.h>
21 #include <net/net_namespace.h>
22 #include <net/netns/generic.h>
25 #include <net/ip_fib.h>
26 #include <net/ip6_route.h>
28 #include <rdma/rdma_cm.h>
29 #include <rdma/rdma_cm_ib.h>
30 #include <rdma/rdma_netlink.h>
32 #include <rdma/ib_cache.h>
33 #include <rdma/ib_cm.h>
34 #include <rdma/ib_sa.h>
35 #include <rdma/iw_cm.h>
37 #include "core_priv.h"
39 #include "cma_trace.h"
41 MODULE_AUTHOR("Sean Hefty");
42 MODULE_DESCRIPTION("Generic RDMA CM Agent");
43 MODULE_LICENSE("Dual BSD/GPL");
45 #define CMA_CM_RESPONSE_TIMEOUT 20
46 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
47 #define CMA_MAX_CM_RETRIES 15
48 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
49 #define CMA_IBOE_PACKET_LIFETIME 18
50 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
52 static const char * const cma_events
[] = {
53 [RDMA_CM_EVENT_ADDR_RESOLVED
] = "address resolved",
54 [RDMA_CM_EVENT_ADDR_ERROR
] = "address error",
55 [RDMA_CM_EVENT_ROUTE_RESOLVED
] = "route resolved ",
56 [RDMA_CM_EVENT_ROUTE_ERROR
] = "route error",
57 [RDMA_CM_EVENT_CONNECT_REQUEST
] = "connect request",
58 [RDMA_CM_EVENT_CONNECT_RESPONSE
] = "connect response",
59 [RDMA_CM_EVENT_CONNECT_ERROR
] = "connect error",
60 [RDMA_CM_EVENT_UNREACHABLE
] = "unreachable",
61 [RDMA_CM_EVENT_REJECTED
] = "rejected",
62 [RDMA_CM_EVENT_ESTABLISHED
] = "established",
63 [RDMA_CM_EVENT_DISCONNECTED
] = "disconnected",
64 [RDMA_CM_EVENT_DEVICE_REMOVAL
] = "device removal",
65 [RDMA_CM_EVENT_MULTICAST_JOIN
] = "multicast join",
66 [RDMA_CM_EVENT_MULTICAST_ERROR
] = "multicast error",
67 [RDMA_CM_EVENT_ADDR_CHANGE
] = "address change",
68 [RDMA_CM_EVENT_TIMEWAIT_EXIT
] = "timewait exit",
71 const char *__attribute_const__
rdma_event_msg(enum rdma_cm_event_type event
)
75 return (index
< ARRAY_SIZE(cma_events
) && cma_events
[index
]) ?
76 cma_events
[index
] : "unrecognized event";
78 EXPORT_SYMBOL(rdma_event_msg
);
80 const char *__attribute_const__
rdma_reject_msg(struct rdma_cm_id
*id
,
83 if (rdma_ib_or_roce(id
->device
, id
->port_num
))
84 return ibcm_reject_msg(reason
);
86 if (rdma_protocol_iwarp(id
->device
, id
->port_num
))
87 return iwcm_reject_msg(reason
);
90 return "unrecognized transport";
92 EXPORT_SYMBOL(rdma_reject_msg
);
94 bool rdma_is_consumer_reject(struct rdma_cm_id
*id
, int reason
)
96 if (rdma_ib_or_roce(id
->device
, id
->port_num
))
97 return reason
== IB_CM_REJ_CONSUMER_DEFINED
;
99 if (rdma_protocol_iwarp(id
->device
, id
->port_num
))
100 return reason
== -ECONNREFUSED
;
105 EXPORT_SYMBOL(rdma_is_consumer_reject
);
107 const void *rdma_consumer_reject_data(struct rdma_cm_id
*id
,
108 struct rdma_cm_event
*ev
, u8
*data_len
)
112 if (rdma_is_consumer_reject(id
, ev
->status
)) {
113 *data_len
= ev
->param
.conn
.private_data_len
;
114 p
= ev
->param
.conn
.private_data
;
121 EXPORT_SYMBOL(rdma_consumer_reject_data
);
124 * rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id.
125 * @id: Communication Identifier
127 struct iw_cm_id
*rdma_iw_cm_id(struct rdma_cm_id
*id
)
129 struct rdma_id_private
*id_priv
;
131 id_priv
= container_of(id
, struct rdma_id_private
, id
);
132 if (id
->device
->node_type
== RDMA_NODE_RNIC
)
133 return id_priv
->cm_id
.iw
;
136 EXPORT_SYMBOL(rdma_iw_cm_id
);
139 * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
140 * @res: rdma resource tracking entry pointer
142 struct rdma_cm_id
*rdma_res_to_id(struct rdma_restrack_entry
*res
)
144 struct rdma_id_private
*id_priv
=
145 container_of(res
, struct rdma_id_private
, res
);
149 EXPORT_SYMBOL(rdma_res_to_id
);
151 static void cma_add_one(struct ib_device
*device
);
152 static void cma_remove_one(struct ib_device
*device
, void *client_data
);
154 static struct ib_client cma_client
= {
157 .remove
= cma_remove_one
160 static struct ib_sa_client sa_client
;
161 static LIST_HEAD(dev_list
);
162 static LIST_HEAD(listen_any_list
);
163 static DEFINE_MUTEX(lock
);
164 static struct workqueue_struct
*cma_wq
;
165 static unsigned int cma_pernet_id
;
168 struct xarray tcp_ps
;
169 struct xarray udp_ps
;
170 struct xarray ipoib_ps
;
174 static struct cma_pernet
*cma_pernet(struct net
*net
)
176 return net_generic(net
, cma_pernet_id
);
180 struct xarray
*cma_pernet_xa(struct net
*net
, enum rdma_ucm_port_space ps
)
182 struct cma_pernet
*pernet
= cma_pernet(net
);
186 return &pernet
->tcp_ps
;
188 return &pernet
->udp_ps
;
190 return &pernet
->ipoib_ps
;
192 return &pernet
->ib_ps
;
199 struct list_head list
;
200 struct ib_device
*device
;
201 struct completion comp
;
203 struct list_head id_list
;
204 enum ib_gid_type
*default_gid_type
;
205 u8
*default_roce_tos
;
208 struct rdma_bind_list
{
209 enum rdma_ucm_port_space ps
;
210 struct hlist_head owners
;
214 struct class_port_info_context
{
215 struct ib_class_port_info
*class_port_info
;
216 struct ib_device
*device
;
217 struct completion done
;
218 struct ib_sa_query
*sa_query
;
222 static int cma_ps_alloc(struct net
*net
, enum rdma_ucm_port_space ps
,
223 struct rdma_bind_list
*bind_list
, int snum
)
225 struct xarray
*xa
= cma_pernet_xa(net
, ps
);
227 return xa_insert(xa
, snum
, bind_list
, GFP_KERNEL
);
230 static struct rdma_bind_list
*cma_ps_find(struct net
*net
,
231 enum rdma_ucm_port_space ps
, int snum
)
233 struct xarray
*xa
= cma_pernet_xa(net
, ps
);
235 return xa_load(xa
, snum
);
238 static void cma_ps_remove(struct net
*net
, enum rdma_ucm_port_space ps
,
241 struct xarray
*xa
= cma_pernet_xa(net
, ps
);
250 void cma_ref_dev(struct cma_device
*cma_dev
)
252 atomic_inc(&cma_dev
->refcount
);
255 struct cma_device
*cma_enum_devices_by_ibdev(cma_device_filter filter
,
258 struct cma_device
*cma_dev
;
259 struct cma_device
*found_cma_dev
= NULL
;
263 list_for_each_entry(cma_dev
, &dev_list
, list
)
264 if (filter(cma_dev
->device
, cookie
)) {
265 found_cma_dev
= cma_dev
;
270 cma_ref_dev(found_cma_dev
);
272 return found_cma_dev
;
275 int cma_get_default_gid_type(struct cma_device
*cma_dev
,
278 if (!rdma_is_port_valid(cma_dev
->device
, port
))
281 return cma_dev
->default_gid_type
[port
- rdma_start_port(cma_dev
->device
)];
284 int cma_set_default_gid_type(struct cma_device
*cma_dev
,
286 enum ib_gid_type default_gid_type
)
288 unsigned long supported_gids
;
290 if (!rdma_is_port_valid(cma_dev
->device
, port
))
293 supported_gids
= roce_gid_type_mask_support(cma_dev
->device
, port
);
295 if (!(supported_gids
& 1 << default_gid_type
))
298 cma_dev
->default_gid_type
[port
- rdma_start_port(cma_dev
->device
)] =
304 int cma_get_default_roce_tos(struct cma_device
*cma_dev
, unsigned int port
)
306 if (!rdma_is_port_valid(cma_dev
->device
, port
))
309 return cma_dev
->default_roce_tos
[port
- rdma_start_port(cma_dev
->device
)];
312 int cma_set_default_roce_tos(struct cma_device
*cma_dev
, unsigned int port
,
315 if (!rdma_is_port_valid(cma_dev
->device
, port
))
318 cma_dev
->default_roce_tos
[port
- rdma_start_port(cma_dev
->device
)] =
323 struct ib_device
*cma_get_ib_dev(struct cma_device
*cma_dev
)
325 return cma_dev
->device
;
329 * Device removal can occur at anytime, so we need extra handling to
330 * serialize notifying the user of device removal with other callbacks.
331 * We do this by disabling removal notification while a callback is in process,
332 * and reporting it after the callback completes.
335 struct cma_multicast
{
336 struct rdma_id_private
*id_priv
;
338 struct ib_sa_multicast
*ib
;
340 struct list_head list
;
342 struct sockaddr_storage addr
;
348 struct work_struct work
;
349 struct rdma_id_private
*id
;
350 enum rdma_cm_state old_state
;
351 enum rdma_cm_state new_state
;
352 struct rdma_cm_event event
;
355 struct cma_ndev_work
{
356 struct work_struct work
;
357 struct rdma_id_private
*id
;
358 struct rdma_cm_event event
;
361 struct iboe_mcast_work
{
362 struct work_struct work
;
363 struct rdma_id_private
*id
;
364 struct cma_multicast
*mc
;
377 u8 ip_version
; /* IP version: 7:4 */
379 union cma_ip_addr src_addr
;
380 union cma_ip_addr dst_addr
;
383 #define CMA_VERSION 0x00
385 struct cma_req_info
{
386 struct sockaddr_storage listen_addr_storage
;
387 struct sockaddr_storage src_addr_storage
;
388 struct ib_device
*device
;
389 union ib_gid local_gid
;
396 static int cma_comp(struct rdma_id_private
*id_priv
, enum rdma_cm_state comp
)
401 spin_lock_irqsave(&id_priv
->lock
, flags
);
402 ret
= (id_priv
->state
== comp
);
403 spin_unlock_irqrestore(&id_priv
->lock
, flags
);
407 static int cma_comp_exch(struct rdma_id_private
*id_priv
,
408 enum rdma_cm_state comp
, enum rdma_cm_state exch
)
413 spin_lock_irqsave(&id_priv
->lock
, flags
);
414 if ((ret
= (id_priv
->state
== comp
)))
415 id_priv
->state
= exch
;
416 spin_unlock_irqrestore(&id_priv
->lock
, flags
);
420 static enum rdma_cm_state
cma_exch(struct rdma_id_private
*id_priv
,
421 enum rdma_cm_state exch
)
424 enum rdma_cm_state old
;
426 spin_lock_irqsave(&id_priv
->lock
, flags
);
427 old
= id_priv
->state
;
428 id_priv
->state
= exch
;
429 spin_unlock_irqrestore(&id_priv
->lock
, flags
);
433 static inline u8
cma_get_ip_ver(const struct cma_hdr
*hdr
)
435 return hdr
->ip_version
>> 4;
438 static inline void cma_set_ip_ver(struct cma_hdr
*hdr
, u8 ip_ver
)
440 hdr
->ip_version
= (ip_ver
<< 4) | (hdr
->ip_version
& 0xF);
443 static int cma_igmp_send(struct net_device
*ndev
, union ib_gid
*mgid
, bool join
)
445 struct in_device
*in_dev
= NULL
;
449 in_dev
= __in_dev_get_rtnl(ndev
);
452 ip_mc_inc_group(in_dev
,
453 *(__be32
*)(mgid
->raw
+ 12));
455 ip_mc_dec_group(in_dev
,
456 *(__be32
*)(mgid
->raw
+ 12));
460 return (in_dev
) ? 0 : -ENODEV
;
463 static void _cma_attach_to_dev(struct rdma_id_private
*id_priv
,
464 struct cma_device
*cma_dev
)
466 cma_ref_dev(cma_dev
);
467 id_priv
->cma_dev
= cma_dev
;
468 id_priv
->id
.device
= cma_dev
->device
;
469 id_priv
->id
.route
.addr
.dev_addr
.transport
=
470 rdma_node_get_transport(cma_dev
->device
->node_type
);
471 list_add_tail(&id_priv
->list
, &cma_dev
->id_list
);
472 if (id_priv
->res
.kern_name
)
473 rdma_restrack_kadd(&id_priv
->res
);
475 rdma_restrack_uadd(&id_priv
->res
);
478 static void cma_attach_to_dev(struct rdma_id_private
*id_priv
,
479 struct cma_device
*cma_dev
)
481 _cma_attach_to_dev(id_priv
, cma_dev
);
483 cma_dev
->default_gid_type
[id_priv
->id
.port_num
-
484 rdma_start_port(cma_dev
->device
)];
487 void cma_deref_dev(struct cma_device
*cma_dev
)
489 if (atomic_dec_and_test(&cma_dev
->refcount
))
490 complete(&cma_dev
->comp
);
493 static inline void release_mc(struct kref
*kref
)
495 struct cma_multicast
*mc
= container_of(kref
, struct cma_multicast
, mcref
);
497 kfree(mc
->multicast
.ib
);
501 static void cma_release_dev(struct rdma_id_private
*id_priv
)
504 list_del(&id_priv
->list
);
505 cma_deref_dev(id_priv
->cma_dev
);
506 id_priv
->cma_dev
= NULL
;
510 static inline struct sockaddr
*cma_src_addr(struct rdma_id_private
*id_priv
)
512 return (struct sockaddr
*) &id_priv
->id
.route
.addr
.src_addr
;
515 static inline struct sockaddr
*cma_dst_addr(struct rdma_id_private
*id_priv
)
517 return (struct sockaddr
*) &id_priv
->id
.route
.addr
.dst_addr
;
520 static inline unsigned short cma_family(struct rdma_id_private
*id_priv
)
522 return id_priv
->id
.route
.addr
.src_addr
.ss_family
;
525 static int cma_set_qkey(struct rdma_id_private
*id_priv
, u32 qkey
)
527 struct ib_sa_mcmember_rec rec
;
531 if (qkey
&& id_priv
->qkey
!= qkey
)
537 id_priv
->qkey
= qkey
;
541 switch (id_priv
->id
.ps
) {
544 id_priv
->qkey
= RDMA_UDP_QKEY
;
547 ib_addr_get_mgid(&id_priv
->id
.route
.addr
.dev_addr
, &rec
.mgid
);
548 ret
= ib_sa_get_mcmember_rec(id_priv
->id
.device
,
549 id_priv
->id
.port_num
, &rec
.mgid
,
552 id_priv
->qkey
= be32_to_cpu(rec
.qkey
);
560 static void cma_translate_ib(struct sockaddr_ib
*sib
, struct rdma_dev_addr
*dev_addr
)
562 dev_addr
->dev_type
= ARPHRD_INFINIBAND
;
563 rdma_addr_set_sgid(dev_addr
, (union ib_gid
*) &sib
->sib_addr
);
564 ib_addr_set_pkey(dev_addr
, ntohs(sib
->sib_pkey
));
567 static int cma_translate_addr(struct sockaddr
*addr
, struct rdma_dev_addr
*dev_addr
)
571 if (addr
->sa_family
!= AF_IB
) {
572 ret
= rdma_translate_ip(addr
, dev_addr
);
574 cma_translate_ib((struct sockaddr_ib
*) addr
, dev_addr
);
581 static const struct ib_gid_attr
*
582 cma_validate_port(struct ib_device
*device
, u8 port
,
583 enum ib_gid_type gid_type
,
585 struct rdma_id_private
*id_priv
)
587 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
588 int bound_if_index
= dev_addr
->bound_dev_if
;
589 const struct ib_gid_attr
*sgid_attr
;
590 int dev_type
= dev_addr
->dev_type
;
591 struct net_device
*ndev
= NULL
;
593 if (!rdma_dev_access_netns(device
, id_priv
->id
.route
.addr
.dev_addr
.net
))
594 return ERR_PTR(-ENODEV
);
596 if ((dev_type
== ARPHRD_INFINIBAND
) && !rdma_protocol_ib(device
, port
))
597 return ERR_PTR(-ENODEV
);
599 if ((dev_type
!= ARPHRD_INFINIBAND
) && rdma_protocol_ib(device
, port
))
600 return ERR_PTR(-ENODEV
);
602 if (dev_type
== ARPHRD_ETHER
&& rdma_protocol_roce(device
, port
)) {
603 ndev
= dev_get_by_index(dev_addr
->net
, bound_if_index
);
605 return ERR_PTR(-ENODEV
);
607 gid_type
= IB_GID_TYPE_IB
;
610 sgid_attr
= rdma_find_gid_by_port(device
, gid
, gid_type
, port
, ndev
);
616 static void cma_bind_sgid_attr(struct rdma_id_private
*id_priv
,
617 const struct ib_gid_attr
*sgid_attr
)
619 WARN_ON(id_priv
->id
.route
.addr
.dev_addr
.sgid_attr
);
620 id_priv
->id
.route
.addr
.dev_addr
.sgid_attr
= sgid_attr
;
624 * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
625 * based on source ip address.
626 * @id_priv: cm_id which should be bound to cma device
628 * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
629 * based on source IP address. It returns 0 on success or error code otherwise.
630 * It is applicable to active and passive side cm_id.
632 static int cma_acquire_dev_by_src_ip(struct rdma_id_private
*id_priv
)
634 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
635 const struct ib_gid_attr
*sgid_attr
;
636 union ib_gid gid
, iboe_gid
, *gidp
;
637 struct cma_device
*cma_dev
;
638 enum ib_gid_type gid_type
;
642 if (dev_addr
->dev_type
!= ARPHRD_INFINIBAND
&&
643 id_priv
->id
.ps
== RDMA_PS_IPOIB
)
646 rdma_ip2gid((struct sockaddr
*)&id_priv
->id
.route
.addr
.src_addr
,
649 memcpy(&gid
, dev_addr
->src_dev_addr
+
650 rdma_addr_gid_offset(dev_addr
), sizeof(gid
));
653 list_for_each_entry(cma_dev
, &dev_list
, list
) {
654 rdma_for_each_port (cma_dev
->device
, port
) {
655 gidp
= rdma_protocol_roce(cma_dev
->device
, port
) ?
657 gid_type
= cma_dev
->default_gid_type
[port
- 1];
658 sgid_attr
= cma_validate_port(cma_dev
->device
, port
,
659 gid_type
, gidp
, id_priv
);
660 if (!IS_ERR(sgid_attr
)) {
661 id_priv
->id
.port_num
= port
;
662 cma_bind_sgid_attr(id_priv
, sgid_attr
);
663 cma_attach_to_dev(id_priv
, cma_dev
);
675 * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
676 * @id_priv: cm id to bind to cma device
677 * @listen_id_priv: listener cm id to match against
678 * @req: Pointer to req structure containaining incoming
679 * request information
680 * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
681 * rdma device matches for listen_id and incoming request. It also verifies
682 * that a GID table entry is present for the source address.
683 * Returns 0 on success, or returns error code otherwise.
685 static int cma_ib_acquire_dev(struct rdma_id_private
*id_priv
,
686 const struct rdma_id_private
*listen_id_priv
,
687 struct cma_req_info
*req
)
689 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
690 const struct ib_gid_attr
*sgid_attr
;
691 enum ib_gid_type gid_type
;
694 if (dev_addr
->dev_type
!= ARPHRD_INFINIBAND
&&
695 id_priv
->id
.ps
== RDMA_PS_IPOIB
)
698 if (rdma_protocol_roce(req
->device
, req
->port
))
699 rdma_ip2gid((struct sockaddr
*)&id_priv
->id
.route
.addr
.src_addr
,
702 memcpy(&gid
, dev_addr
->src_dev_addr
+
703 rdma_addr_gid_offset(dev_addr
), sizeof(gid
));
705 gid_type
= listen_id_priv
->cma_dev
->default_gid_type
[req
->port
- 1];
706 sgid_attr
= cma_validate_port(req
->device
, req
->port
,
707 gid_type
, &gid
, id_priv
);
708 if (IS_ERR(sgid_attr
))
709 return PTR_ERR(sgid_attr
);
711 id_priv
->id
.port_num
= req
->port
;
712 cma_bind_sgid_attr(id_priv
, sgid_attr
);
713 /* Need to acquire lock to protect against reader
714 * of cma_dev->id_list such as cma_netdev_callback() and
715 * cma_process_remove().
718 cma_attach_to_dev(id_priv
, listen_id_priv
->cma_dev
);
723 static int cma_iw_acquire_dev(struct rdma_id_private
*id_priv
,
724 const struct rdma_id_private
*listen_id_priv
)
726 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
727 const struct ib_gid_attr
*sgid_attr
;
728 struct cma_device
*cma_dev
;
729 enum ib_gid_type gid_type
;
734 if (dev_addr
->dev_type
!= ARPHRD_INFINIBAND
&&
735 id_priv
->id
.ps
== RDMA_PS_IPOIB
)
738 memcpy(&gid
, dev_addr
->src_dev_addr
+
739 rdma_addr_gid_offset(dev_addr
), sizeof(gid
));
743 cma_dev
= listen_id_priv
->cma_dev
;
744 port
= listen_id_priv
->id
.port_num
;
745 gid_type
= listen_id_priv
->gid_type
;
746 sgid_attr
= cma_validate_port(cma_dev
->device
, port
,
747 gid_type
, &gid
, id_priv
);
748 if (!IS_ERR(sgid_attr
)) {
749 id_priv
->id
.port_num
= port
;
750 cma_bind_sgid_attr(id_priv
, sgid_attr
);
755 list_for_each_entry(cma_dev
, &dev_list
, list
) {
756 for (port
= 1; port
<= cma_dev
->device
->phys_port_cnt
; ++port
) {
757 if (listen_id_priv
->cma_dev
== cma_dev
&&
758 listen_id_priv
->id
.port_num
== port
)
761 gid_type
= cma_dev
->default_gid_type
[port
- 1];
762 sgid_attr
= cma_validate_port(cma_dev
->device
, port
,
763 gid_type
, &gid
, id_priv
);
764 if (!IS_ERR(sgid_attr
)) {
765 id_priv
->id
.port_num
= port
;
766 cma_bind_sgid_attr(id_priv
, sgid_attr
);
775 cma_attach_to_dev(id_priv
, cma_dev
);
782 * Select the source IB device and address to reach the destination IB address.
784 static int cma_resolve_ib_dev(struct rdma_id_private
*id_priv
)
786 struct cma_device
*cma_dev
, *cur_dev
;
787 struct sockaddr_ib
*addr
;
788 union ib_gid gid
, sgid
, *dgid
;
791 enum ib_port_state port_state
;
795 addr
= (struct sockaddr_ib
*) cma_dst_addr(id_priv
);
796 dgid
= (union ib_gid
*) &addr
->sib_addr
;
797 pkey
= ntohs(addr
->sib_pkey
);
800 list_for_each_entry(cur_dev
, &dev_list
, list
) {
801 for (p
= 1; p
<= cur_dev
->device
->phys_port_cnt
; ++p
) {
802 if (!rdma_cap_af_ib(cur_dev
->device
, p
))
805 if (ib_find_cached_pkey(cur_dev
->device
, p
, pkey
, &index
))
808 if (ib_get_cached_port_state(cur_dev
->device
, p
, &port_state
))
810 for (i
= 0; !rdma_query_gid(cur_dev
->device
,
813 if (!memcmp(&gid
, dgid
, sizeof(gid
))) {
816 id_priv
->id
.port_num
= p
;
820 if (!cma_dev
&& (gid
.global
.subnet_prefix
==
821 dgid
->global
.subnet_prefix
) &&
822 port_state
== IB_PORT_ACTIVE
) {
825 id_priv
->id
.port_num
= p
;
835 cma_attach_to_dev(id_priv
, cma_dev
);
837 addr
= (struct sockaddr_ib
*)cma_src_addr(id_priv
);
838 memcpy(&addr
->sib_addr
, &sgid
, sizeof(sgid
));
839 cma_translate_ib(addr
, &id_priv
->id
.route
.addr
.dev_addr
);
843 static void cma_deref_id(struct rdma_id_private
*id_priv
)
845 if (atomic_dec_and_test(&id_priv
->refcount
))
846 complete(&id_priv
->comp
);
849 struct rdma_cm_id
*__rdma_create_id(struct net
*net
,
850 rdma_cm_event_handler event_handler
,
851 void *context
, enum rdma_ucm_port_space ps
,
852 enum ib_qp_type qp_type
, const char *caller
)
854 struct rdma_id_private
*id_priv
;
856 id_priv
= kzalloc(sizeof *id_priv
, GFP_KERNEL
);
858 return ERR_PTR(-ENOMEM
);
860 rdma_restrack_set_task(&id_priv
->res
, caller
);
861 id_priv
->res
.type
= RDMA_RESTRACK_CM_ID
;
862 id_priv
->state
= RDMA_CM_IDLE
;
863 id_priv
->id
.context
= context
;
864 id_priv
->id
.event_handler
= event_handler
;
866 id_priv
->id
.qp_type
= qp_type
;
867 id_priv
->tos_set
= false;
868 id_priv
->timeout_set
= false;
869 id_priv
->gid_type
= IB_GID_TYPE_IB
;
870 spin_lock_init(&id_priv
->lock
);
871 mutex_init(&id_priv
->qp_mutex
);
872 init_completion(&id_priv
->comp
);
873 atomic_set(&id_priv
->refcount
, 1);
874 mutex_init(&id_priv
->handler_mutex
);
875 INIT_LIST_HEAD(&id_priv
->listen_list
);
876 INIT_LIST_HEAD(&id_priv
->mc_list
);
877 get_random_bytes(&id_priv
->seq_num
, sizeof id_priv
->seq_num
);
878 id_priv
->id
.route
.addr
.dev_addr
.net
= get_net(net
);
879 id_priv
->seq_num
&= 0x00ffffff;
881 trace_cm_id_create(id_priv
);
884 EXPORT_SYMBOL(__rdma_create_id
);
886 static int cma_init_ud_qp(struct rdma_id_private
*id_priv
, struct ib_qp
*qp
)
888 struct ib_qp_attr qp_attr
;
889 int qp_attr_mask
, ret
;
891 qp_attr
.qp_state
= IB_QPS_INIT
;
892 ret
= rdma_init_qp_attr(&id_priv
->id
, &qp_attr
, &qp_attr_mask
);
896 ret
= ib_modify_qp(qp
, &qp_attr
, qp_attr_mask
);
900 qp_attr
.qp_state
= IB_QPS_RTR
;
901 ret
= ib_modify_qp(qp
, &qp_attr
, IB_QP_STATE
);
905 qp_attr
.qp_state
= IB_QPS_RTS
;
907 ret
= ib_modify_qp(qp
, &qp_attr
, IB_QP_STATE
| IB_QP_SQ_PSN
);
912 static int cma_init_conn_qp(struct rdma_id_private
*id_priv
, struct ib_qp
*qp
)
914 struct ib_qp_attr qp_attr
;
915 int qp_attr_mask
, ret
;
917 qp_attr
.qp_state
= IB_QPS_INIT
;
918 ret
= rdma_init_qp_attr(&id_priv
->id
, &qp_attr
, &qp_attr_mask
);
922 return ib_modify_qp(qp
, &qp_attr
, qp_attr_mask
);
925 int rdma_create_qp(struct rdma_cm_id
*id
, struct ib_pd
*pd
,
926 struct ib_qp_init_attr
*qp_init_attr
)
928 struct rdma_id_private
*id_priv
;
932 id_priv
= container_of(id
, struct rdma_id_private
, id
);
933 if (id
->device
!= pd
->device
) {
938 qp_init_attr
->port_num
= id
->port_num
;
939 qp
= ib_create_qp(pd
, qp_init_attr
);
945 if (id
->qp_type
== IB_QPT_UD
)
946 ret
= cma_init_ud_qp(id_priv
, qp
);
948 ret
= cma_init_conn_qp(id_priv
, qp
);
953 id_priv
->qp_num
= qp
->qp_num
;
954 id_priv
->srq
= (qp
->srq
!= NULL
);
955 trace_cm_qp_create(id_priv
, pd
, qp_init_attr
, 0);
960 trace_cm_qp_create(id_priv
, pd
, qp_init_attr
, ret
);
963 EXPORT_SYMBOL(rdma_create_qp
);
965 void rdma_destroy_qp(struct rdma_cm_id
*id
)
967 struct rdma_id_private
*id_priv
;
969 id_priv
= container_of(id
, struct rdma_id_private
, id
);
970 trace_cm_qp_destroy(id_priv
);
971 mutex_lock(&id_priv
->qp_mutex
);
972 ib_destroy_qp(id_priv
->id
.qp
);
973 id_priv
->id
.qp
= NULL
;
974 mutex_unlock(&id_priv
->qp_mutex
);
976 EXPORT_SYMBOL(rdma_destroy_qp
);
978 static int cma_modify_qp_rtr(struct rdma_id_private
*id_priv
,
979 struct rdma_conn_param
*conn_param
)
981 struct ib_qp_attr qp_attr
;
982 int qp_attr_mask
, ret
;
984 mutex_lock(&id_priv
->qp_mutex
);
985 if (!id_priv
->id
.qp
) {
990 /* Need to update QP attributes from default values. */
991 qp_attr
.qp_state
= IB_QPS_INIT
;
992 ret
= rdma_init_qp_attr(&id_priv
->id
, &qp_attr
, &qp_attr_mask
);
996 ret
= ib_modify_qp(id_priv
->id
.qp
, &qp_attr
, qp_attr_mask
);
1000 qp_attr
.qp_state
= IB_QPS_RTR
;
1001 ret
= rdma_init_qp_attr(&id_priv
->id
, &qp_attr
, &qp_attr_mask
);
1005 BUG_ON(id_priv
->cma_dev
->device
!= id_priv
->id
.device
);
1008 qp_attr
.max_dest_rd_atomic
= conn_param
->responder_resources
;
1009 ret
= ib_modify_qp(id_priv
->id
.qp
, &qp_attr
, qp_attr_mask
);
1011 mutex_unlock(&id_priv
->qp_mutex
);
1015 static int cma_modify_qp_rts(struct rdma_id_private
*id_priv
,
1016 struct rdma_conn_param
*conn_param
)
1018 struct ib_qp_attr qp_attr
;
1019 int qp_attr_mask
, ret
;
1021 mutex_lock(&id_priv
->qp_mutex
);
1022 if (!id_priv
->id
.qp
) {
1027 qp_attr
.qp_state
= IB_QPS_RTS
;
1028 ret
= rdma_init_qp_attr(&id_priv
->id
, &qp_attr
, &qp_attr_mask
);
1033 qp_attr
.max_rd_atomic
= conn_param
->initiator_depth
;
1034 ret
= ib_modify_qp(id_priv
->id
.qp
, &qp_attr
, qp_attr_mask
);
1036 mutex_unlock(&id_priv
->qp_mutex
);
1040 static int cma_modify_qp_err(struct rdma_id_private
*id_priv
)
1042 struct ib_qp_attr qp_attr
;
1045 mutex_lock(&id_priv
->qp_mutex
);
1046 if (!id_priv
->id
.qp
) {
1051 qp_attr
.qp_state
= IB_QPS_ERR
;
1052 ret
= ib_modify_qp(id_priv
->id
.qp
, &qp_attr
, IB_QP_STATE
);
1054 mutex_unlock(&id_priv
->qp_mutex
);
1058 static int cma_ib_init_qp_attr(struct rdma_id_private
*id_priv
,
1059 struct ib_qp_attr
*qp_attr
, int *qp_attr_mask
)
1061 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
1065 if (rdma_cap_eth_ah(id_priv
->id
.device
, id_priv
->id
.port_num
))
1068 pkey
= ib_addr_get_pkey(dev_addr
);
1070 ret
= ib_find_cached_pkey(id_priv
->id
.device
, id_priv
->id
.port_num
,
1071 pkey
, &qp_attr
->pkey_index
);
1075 qp_attr
->port_num
= id_priv
->id
.port_num
;
1076 *qp_attr_mask
= IB_QP_STATE
| IB_QP_PKEY_INDEX
| IB_QP_PORT
;
1078 if (id_priv
->id
.qp_type
== IB_QPT_UD
) {
1079 ret
= cma_set_qkey(id_priv
, 0);
1083 qp_attr
->qkey
= id_priv
->qkey
;
1084 *qp_attr_mask
|= IB_QP_QKEY
;
1086 qp_attr
->qp_access_flags
= 0;
1087 *qp_attr_mask
|= IB_QP_ACCESS_FLAGS
;
1092 int rdma_init_qp_attr(struct rdma_cm_id
*id
, struct ib_qp_attr
*qp_attr
,
1095 struct rdma_id_private
*id_priv
;
1098 id_priv
= container_of(id
, struct rdma_id_private
, id
);
1099 if (rdma_cap_ib_cm(id
->device
, id
->port_num
)) {
1100 if (!id_priv
->cm_id
.ib
|| (id_priv
->id
.qp_type
== IB_QPT_UD
))
1101 ret
= cma_ib_init_qp_attr(id_priv
, qp_attr
, qp_attr_mask
);
1103 ret
= ib_cm_init_qp_attr(id_priv
->cm_id
.ib
, qp_attr
,
1106 if (qp_attr
->qp_state
== IB_QPS_RTR
)
1107 qp_attr
->rq_psn
= id_priv
->seq_num
;
1108 } else if (rdma_cap_iw_cm(id
->device
, id
->port_num
)) {
1109 if (!id_priv
->cm_id
.iw
) {
1110 qp_attr
->qp_access_flags
= 0;
1111 *qp_attr_mask
= IB_QP_STATE
| IB_QP_ACCESS_FLAGS
;
1113 ret
= iw_cm_init_qp_attr(id_priv
->cm_id
.iw
, qp_attr
,
1115 qp_attr
->port_num
= id_priv
->id
.port_num
;
1116 *qp_attr_mask
|= IB_QP_PORT
;
1120 if ((*qp_attr_mask
& IB_QP_TIMEOUT
) && id_priv
->timeout_set
)
1121 qp_attr
->timeout
= id_priv
->timeout
;
1125 EXPORT_SYMBOL(rdma_init_qp_attr
);
1127 static inline bool cma_zero_addr(const struct sockaddr
*addr
)
1129 switch (addr
->sa_family
) {
1131 return ipv4_is_zeronet(((struct sockaddr_in
*)addr
)->sin_addr
.s_addr
);
1133 return ipv6_addr_any(&((struct sockaddr_in6
*)addr
)->sin6_addr
);
1135 return ib_addr_any(&((struct sockaddr_ib
*)addr
)->sib_addr
);
1141 static inline bool cma_loopback_addr(const struct sockaddr
*addr
)
1143 switch (addr
->sa_family
) {
1145 return ipv4_is_loopback(
1146 ((struct sockaddr_in
*)addr
)->sin_addr
.s_addr
);
1148 return ipv6_addr_loopback(
1149 &((struct sockaddr_in6
*)addr
)->sin6_addr
);
1151 return ib_addr_loopback(
1152 &((struct sockaddr_ib
*)addr
)->sib_addr
);
1158 static inline bool cma_any_addr(const struct sockaddr
*addr
)
1160 return cma_zero_addr(addr
) || cma_loopback_addr(addr
);
1163 static int cma_addr_cmp(const struct sockaddr
*src
, const struct sockaddr
*dst
)
1165 if (src
->sa_family
!= dst
->sa_family
)
1168 switch (src
->sa_family
) {
1170 return ((struct sockaddr_in
*)src
)->sin_addr
.s_addr
!=
1171 ((struct sockaddr_in
*)dst
)->sin_addr
.s_addr
;
1173 struct sockaddr_in6
*src_addr6
= (struct sockaddr_in6
*)src
;
1174 struct sockaddr_in6
*dst_addr6
= (struct sockaddr_in6
*)dst
;
1177 if (ipv6_addr_cmp(&src_addr6
->sin6_addr
,
1178 &dst_addr6
->sin6_addr
))
1180 link_local
= ipv6_addr_type(&dst_addr6
->sin6_addr
) &
1181 IPV6_ADDR_LINKLOCAL
;
1182 /* Link local must match their scope_ids */
1183 return link_local
? (src_addr6
->sin6_scope_id
!=
1184 dst_addr6
->sin6_scope_id
) :
1189 return ib_addr_cmp(&((struct sockaddr_ib
*) src
)->sib_addr
,
1190 &((struct sockaddr_ib
*) dst
)->sib_addr
);
1194 static __be16
cma_port(const struct sockaddr
*addr
)
1196 struct sockaddr_ib
*sib
;
1198 switch (addr
->sa_family
) {
1200 return ((struct sockaddr_in
*) addr
)->sin_port
;
1202 return ((struct sockaddr_in6
*) addr
)->sin6_port
;
1204 sib
= (struct sockaddr_ib
*) addr
;
1205 return htons((u16
) (be64_to_cpu(sib
->sib_sid
) &
1206 be64_to_cpu(sib
->sib_sid_mask
)));
1212 static inline int cma_any_port(const struct sockaddr
*addr
)
1214 return !cma_port(addr
);
1217 static void cma_save_ib_info(struct sockaddr
*src_addr
,
1218 struct sockaddr
*dst_addr
,
1219 const struct rdma_cm_id
*listen_id
,
1220 const struct sa_path_rec
*path
)
1222 struct sockaddr_ib
*listen_ib
, *ib
;
1224 listen_ib
= (struct sockaddr_ib
*) &listen_id
->route
.addr
.src_addr
;
1226 ib
= (struct sockaddr_ib
*)src_addr
;
1227 ib
->sib_family
= AF_IB
;
1229 ib
->sib_pkey
= path
->pkey
;
1230 ib
->sib_flowinfo
= path
->flow_label
;
1231 memcpy(&ib
->sib_addr
, &path
->sgid
, 16);
1232 ib
->sib_sid
= path
->service_id
;
1233 ib
->sib_scope_id
= 0;
1235 ib
->sib_pkey
= listen_ib
->sib_pkey
;
1236 ib
->sib_flowinfo
= listen_ib
->sib_flowinfo
;
1237 ib
->sib_addr
= listen_ib
->sib_addr
;
1238 ib
->sib_sid
= listen_ib
->sib_sid
;
1239 ib
->sib_scope_id
= listen_ib
->sib_scope_id
;
1241 ib
->sib_sid_mask
= cpu_to_be64(0xffffffffffffffffULL
);
1244 ib
= (struct sockaddr_ib
*)dst_addr
;
1245 ib
->sib_family
= AF_IB
;
1247 ib
->sib_pkey
= path
->pkey
;
1248 ib
->sib_flowinfo
= path
->flow_label
;
1249 memcpy(&ib
->sib_addr
, &path
->dgid
, 16);
1254 static void cma_save_ip4_info(struct sockaddr_in
*src_addr
,
1255 struct sockaddr_in
*dst_addr
,
1256 struct cma_hdr
*hdr
,
1260 *src_addr
= (struct sockaddr_in
) {
1261 .sin_family
= AF_INET
,
1262 .sin_addr
.s_addr
= hdr
->dst_addr
.ip4
.addr
,
1263 .sin_port
= local_port
,
1268 *dst_addr
= (struct sockaddr_in
) {
1269 .sin_family
= AF_INET
,
1270 .sin_addr
.s_addr
= hdr
->src_addr
.ip4
.addr
,
1271 .sin_port
= hdr
->port
,
1276 static void cma_save_ip6_info(struct sockaddr_in6
*src_addr
,
1277 struct sockaddr_in6
*dst_addr
,
1278 struct cma_hdr
*hdr
,
1282 *src_addr
= (struct sockaddr_in6
) {
1283 .sin6_family
= AF_INET6
,
1284 .sin6_addr
= hdr
->dst_addr
.ip6
,
1285 .sin6_port
= local_port
,
1290 *dst_addr
= (struct sockaddr_in6
) {
1291 .sin6_family
= AF_INET6
,
1292 .sin6_addr
= hdr
->src_addr
.ip6
,
1293 .sin6_port
= hdr
->port
,
1298 static u16
cma_port_from_service_id(__be64 service_id
)
1300 return (u16
)be64_to_cpu(service_id
);
1303 static int cma_save_ip_info(struct sockaddr
*src_addr
,
1304 struct sockaddr
*dst_addr
,
1305 const struct ib_cm_event
*ib_event
,
1308 struct cma_hdr
*hdr
;
1311 hdr
= ib_event
->private_data
;
1312 if (hdr
->cma_version
!= CMA_VERSION
)
1315 port
= htons(cma_port_from_service_id(service_id
));
1317 switch (cma_get_ip_ver(hdr
)) {
1319 cma_save_ip4_info((struct sockaddr_in
*)src_addr
,
1320 (struct sockaddr_in
*)dst_addr
, hdr
, port
);
1323 cma_save_ip6_info((struct sockaddr_in6
*)src_addr
,
1324 (struct sockaddr_in6
*)dst_addr
, hdr
, port
);
1327 return -EAFNOSUPPORT
;
1333 static int cma_save_net_info(struct sockaddr
*src_addr
,
1334 struct sockaddr
*dst_addr
,
1335 const struct rdma_cm_id
*listen_id
,
1336 const struct ib_cm_event
*ib_event
,
1337 sa_family_t sa_family
, __be64 service_id
)
1339 if (sa_family
== AF_IB
) {
1340 if (ib_event
->event
== IB_CM_REQ_RECEIVED
)
1341 cma_save_ib_info(src_addr
, dst_addr
, listen_id
,
1342 ib_event
->param
.req_rcvd
.primary_path
);
1343 else if (ib_event
->event
== IB_CM_SIDR_REQ_RECEIVED
)
1344 cma_save_ib_info(src_addr
, dst_addr
, listen_id
, NULL
);
1348 return cma_save_ip_info(src_addr
, dst_addr
, ib_event
, service_id
);
1351 static int cma_save_req_info(const struct ib_cm_event
*ib_event
,
1352 struct cma_req_info
*req
)
1354 const struct ib_cm_req_event_param
*req_param
=
1355 &ib_event
->param
.req_rcvd
;
1356 const struct ib_cm_sidr_req_event_param
*sidr_param
=
1357 &ib_event
->param
.sidr_req_rcvd
;
1359 switch (ib_event
->event
) {
1360 case IB_CM_REQ_RECEIVED
:
1361 req
->device
= req_param
->listen_id
->device
;
1362 req
->port
= req_param
->port
;
1363 memcpy(&req
->local_gid
, &req_param
->primary_path
->sgid
,
1364 sizeof(req
->local_gid
));
1365 req
->has_gid
= true;
1366 req
->service_id
= req_param
->primary_path
->service_id
;
1367 req
->pkey
= be16_to_cpu(req_param
->primary_path
->pkey
);
1368 if (req
->pkey
!= req_param
->bth_pkey
)
1369 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
1370 "RDMA CMA: in the future this may cause the request to be dropped\n",
1371 req_param
->bth_pkey
, req
->pkey
);
1373 case IB_CM_SIDR_REQ_RECEIVED
:
1374 req
->device
= sidr_param
->listen_id
->device
;
1375 req
->port
= sidr_param
->port
;
1376 req
->has_gid
= false;
1377 req
->service_id
= sidr_param
->service_id
;
1378 req
->pkey
= sidr_param
->pkey
;
1379 if (req
->pkey
!= sidr_param
->bth_pkey
)
1380 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
1381 "RDMA CMA: in the future this may cause the request to be dropped\n",
1382 sidr_param
->bth_pkey
, req
->pkey
);
1391 static bool validate_ipv4_net_dev(struct net_device
*net_dev
,
1392 const struct sockaddr_in
*dst_addr
,
1393 const struct sockaddr_in
*src_addr
)
1395 __be32 daddr
= dst_addr
->sin_addr
.s_addr
,
1396 saddr
= src_addr
->sin_addr
.s_addr
;
1397 struct fib_result res
;
1402 if (ipv4_is_multicast(saddr
) || ipv4_is_lbcast(saddr
) ||
1403 ipv4_is_lbcast(daddr
) || ipv4_is_zeronet(saddr
) ||
1404 ipv4_is_zeronet(daddr
) || ipv4_is_loopback(daddr
) ||
1405 ipv4_is_loopback(saddr
))
1408 memset(&fl4
, 0, sizeof(fl4
));
1409 fl4
.flowi4_iif
= net_dev
->ifindex
;
1414 err
= fib_lookup(dev_net(net_dev
), &fl4
, &res
, 0);
1415 ret
= err
== 0 && FIB_RES_DEV(res
) == net_dev
;
1421 static bool validate_ipv6_net_dev(struct net_device
*net_dev
,
1422 const struct sockaddr_in6
*dst_addr
,
1423 const struct sockaddr_in6
*src_addr
)
1425 #if IS_ENABLED(CONFIG_IPV6)
1426 const int strict
= ipv6_addr_type(&dst_addr
->sin6_addr
) &
1427 IPV6_ADDR_LINKLOCAL
;
1428 struct rt6_info
*rt
= rt6_lookup(dev_net(net_dev
), &dst_addr
->sin6_addr
,
1429 &src_addr
->sin6_addr
, net_dev
->ifindex
,
1436 ret
= rt
->rt6i_idev
->dev
== net_dev
;
1445 static bool validate_net_dev(struct net_device
*net_dev
,
1446 const struct sockaddr
*daddr
,
1447 const struct sockaddr
*saddr
)
1449 const struct sockaddr_in
*daddr4
= (const struct sockaddr_in
*)daddr
;
1450 const struct sockaddr_in
*saddr4
= (const struct sockaddr_in
*)saddr
;
1451 const struct sockaddr_in6
*daddr6
= (const struct sockaddr_in6
*)daddr
;
1452 const struct sockaddr_in6
*saddr6
= (const struct sockaddr_in6
*)saddr
;
1454 switch (daddr
->sa_family
) {
1456 return saddr
->sa_family
== AF_INET
&&
1457 validate_ipv4_net_dev(net_dev
, daddr4
, saddr4
);
1460 return saddr
->sa_family
== AF_INET6
&&
1461 validate_ipv6_net_dev(net_dev
, daddr6
, saddr6
);
1468 static struct net_device
*
1469 roce_get_net_dev_by_cm_event(const struct ib_cm_event
*ib_event
)
1471 const struct ib_gid_attr
*sgid_attr
= NULL
;
1472 struct net_device
*ndev
;
1474 if (ib_event
->event
== IB_CM_REQ_RECEIVED
)
1475 sgid_attr
= ib_event
->param
.req_rcvd
.ppath_sgid_attr
;
1476 else if (ib_event
->event
== IB_CM_SIDR_REQ_RECEIVED
)
1477 sgid_attr
= ib_event
->param
.sidr_req_rcvd
.sgid_attr
;
1483 ndev
= rdma_read_gid_attr_ndev_rcu(sgid_attr
);
1492 static struct net_device
*cma_get_net_dev(const struct ib_cm_event
*ib_event
,
1493 struct cma_req_info
*req
)
1495 struct sockaddr
*listen_addr
=
1496 (struct sockaddr
*)&req
->listen_addr_storage
;
1497 struct sockaddr
*src_addr
= (struct sockaddr
*)&req
->src_addr_storage
;
1498 struct net_device
*net_dev
;
1499 const union ib_gid
*gid
= req
->has_gid
? &req
->local_gid
: NULL
;
1502 err
= cma_save_ip_info(listen_addr
, src_addr
, ib_event
,
1505 return ERR_PTR(err
);
1507 if (rdma_protocol_roce(req
->device
, req
->port
))
1508 net_dev
= roce_get_net_dev_by_cm_event(ib_event
);
1510 net_dev
= ib_get_net_dev_by_params(req
->device
, req
->port
,
1514 return ERR_PTR(-ENODEV
);
1519 static enum rdma_ucm_port_space
rdma_ps_from_service_id(__be64 service_id
)
1521 return (be64_to_cpu(service_id
) >> 16) & 0xffff;
1524 static bool cma_match_private_data(struct rdma_id_private
*id_priv
,
1525 const struct cma_hdr
*hdr
)
1527 struct sockaddr
*addr
= cma_src_addr(id_priv
);
1529 struct in6_addr ip6_addr
;
1531 if (cma_any_addr(addr
) && !id_priv
->afonly
)
1534 switch (addr
->sa_family
) {
1536 ip4_addr
= ((struct sockaddr_in
*)addr
)->sin_addr
.s_addr
;
1537 if (cma_get_ip_ver(hdr
) != 4)
1539 if (!cma_any_addr(addr
) &&
1540 hdr
->dst_addr
.ip4
.addr
!= ip4_addr
)
1544 ip6_addr
= ((struct sockaddr_in6
*)addr
)->sin6_addr
;
1545 if (cma_get_ip_ver(hdr
) != 6)
1547 if (!cma_any_addr(addr
) &&
1548 memcmp(&hdr
->dst_addr
.ip6
, &ip6_addr
, sizeof(ip6_addr
)))
1560 static bool cma_protocol_roce(const struct rdma_cm_id
*id
)
1562 struct ib_device
*device
= id
->device
;
1563 const int port_num
= id
->port_num
?: rdma_start_port(device
);
1565 return rdma_protocol_roce(device
, port_num
);
1568 static bool cma_is_req_ipv6_ll(const struct cma_req_info
*req
)
1570 const struct sockaddr
*daddr
=
1571 (const struct sockaddr
*)&req
->listen_addr_storage
;
1572 const struct sockaddr_in6
*daddr6
= (const struct sockaddr_in6
*)daddr
;
1574 /* Returns true if the req is for IPv6 link local */
1575 return (daddr
->sa_family
== AF_INET6
&&
1576 (ipv6_addr_type(&daddr6
->sin6_addr
) & IPV6_ADDR_LINKLOCAL
));
1579 static bool cma_match_net_dev(const struct rdma_cm_id
*id
,
1580 const struct net_device
*net_dev
,
1581 const struct cma_req_info
*req
)
1583 const struct rdma_addr
*addr
= &id
->route
.addr
;
1586 /* This request is an AF_IB request */
1587 return (!id
->port_num
|| id
->port_num
== req
->port
) &&
1588 (addr
->src_addr
.ss_family
== AF_IB
);
1591 * If the request is not for IPv6 link local, allow matching
1592 * request to any netdevice of the one or multiport rdma device.
1594 if (!cma_is_req_ipv6_ll(req
))
1597 * Net namespaces must match, and if the listner is listening
1598 * on a specific netdevice than netdevice must match as well.
1600 if (net_eq(dev_net(net_dev
), addr
->dev_addr
.net
) &&
1601 (!!addr
->dev_addr
.bound_dev_if
==
1602 (addr
->dev_addr
.bound_dev_if
== net_dev
->ifindex
)))
1608 static struct rdma_id_private
*cma_find_listener(
1609 const struct rdma_bind_list
*bind_list
,
1610 const struct ib_cm_id
*cm_id
,
1611 const struct ib_cm_event
*ib_event
,
1612 const struct cma_req_info
*req
,
1613 const struct net_device
*net_dev
)
1615 struct rdma_id_private
*id_priv
, *id_priv_dev
;
1618 return ERR_PTR(-EINVAL
);
1620 hlist_for_each_entry(id_priv
, &bind_list
->owners
, node
) {
1621 if (cma_match_private_data(id_priv
, ib_event
->private_data
)) {
1622 if (id_priv
->id
.device
== cm_id
->device
&&
1623 cma_match_net_dev(&id_priv
->id
, net_dev
, req
))
1625 list_for_each_entry(id_priv_dev
,
1626 &id_priv
->listen_list
,
1628 if (id_priv_dev
->id
.device
== cm_id
->device
&&
1629 cma_match_net_dev(&id_priv_dev
->id
,
1636 return ERR_PTR(-EINVAL
);
1639 static struct rdma_id_private
*
1640 cma_ib_id_from_event(struct ib_cm_id
*cm_id
,
1641 const struct ib_cm_event
*ib_event
,
1642 struct cma_req_info
*req
,
1643 struct net_device
**net_dev
)
1645 struct rdma_bind_list
*bind_list
;
1646 struct rdma_id_private
*id_priv
;
1649 err
= cma_save_req_info(ib_event
, req
);
1651 return ERR_PTR(err
);
1653 *net_dev
= cma_get_net_dev(ib_event
, req
);
1654 if (IS_ERR(*net_dev
)) {
1655 if (PTR_ERR(*net_dev
) == -EAFNOSUPPORT
) {
1656 /* Assuming the protocol is AF_IB */
1659 return ERR_CAST(*net_dev
);
1664 * Net namespace might be getting deleted while route lookup,
1665 * cm_id lookup is in progress. Therefore, perform netdevice
1666 * validation, cm_id lookup under rcu lock.
1667 * RCU lock along with netdevice state check, synchronizes with
1668 * netdevice migrating to different net namespace and also avoids
1669 * case where net namespace doesn't get deleted while lookup is in
1671 * If the device state is not IFF_UP, its properties such as ifindex
1672 * and nd_net cannot be trusted to remain valid without rcu lock.
1673 * net/core/dev.c change_net_namespace() ensures to synchronize with
1674 * ongoing operations on net device after device is closed using
1675 * synchronize_net().
1680 * If netdevice is down, it is likely that it is administratively
1681 * down or it might be migrating to different namespace.
1682 * In that case avoid further processing, as the net namespace
1683 * or ifindex may change.
1685 if (((*net_dev
)->flags
& IFF_UP
) == 0) {
1686 id_priv
= ERR_PTR(-EHOSTUNREACH
);
1690 if (!validate_net_dev(*net_dev
,
1691 (struct sockaddr
*)&req
->listen_addr_storage
,
1692 (struct sockaddr
*)&req
->src_addr_storage
)) {
1693 id_priv
= ERR_PTR(-EHOSTUNREACH
);
1698 bind_list
= cma_ps_find(*net_dev
? dev_net(*net_dev
) : &init_net
,
1699 rdma_ps_from_service_id(req
->service_id
),
1700 cma_port_from_service_id(req
->service_id
));
1701 id_priv
= cma_find_listener(bind_list
, cm_id
, ib_event
, req
, *net_dev
);
1704 if (IS_ERR(id_priv
) && *net_dev
) {
1711 static inline u8
cma_user_data_offset(struct rdma_id_private
*id_priv
)
1713 return cma_family(id_priv
) == AF_IB
? 0 : sizeof(struct cma_hdr
);
1716 static void cma_cancel_route(struct rdma_id_private
*id_priv
)
1718 if (rdma_cap_ib_sa(id_priv
->id
.device
, id_priv
->id
.port_num
)) {
1720 ib_sa_cancel_query(id_priv
->query_id
, id_priv
->query
);
1724 static void cma_cancel_listens(struct rdma_id_private
*id_priv
)
1726 struct rdma_id_private
*dev_id_priv
;
1729 * Remove from listen_any_list to prevent added devices from spawning
1730 * additional listen requests.
1733 list_del(&id_priv
->list
);
1735 while (!list_empty(&id_priv
->listen_list
)) {
1736 dev_id_priv
= list_entry(id_priv
->listen_list
.next
,
1737 struct rdma_id_private
, listen_list
);
1738 /* sync with device removal to avoid duplicate destruction */
1739 list_del_init(&dev_id_priv
->list
);
1740 list_del(&dev_id_priv
->listen_list
);
1741 mutex_unlock(&lock
);
1743 rdma_destroy_id(&dev_id_priv
->id
);
1746 mutex_unlock(&lock
);
1749 static void cma_cancel_operation(struct rdma_id_private
*id_priv
,
1750 enum rdma_cm_state state
)
1753 case RDMA_CM_ADDR_QUERY
:
1754 rdma_addr_cancel(&id_priv
->id
.route
.addr
.dev_addr
);
1756 case RDMA_CM_ROUTE_QUERY
:
1757 cma_cancel_route(id_priv
);
1759 case RDMA_CM_LISTEN
:
1760 if (cma_any_addr(cma_src_addr(id_priv
)) && !id_priv
->cma_dev
)
1761 cma_cancel_listens(id_priv
);
1768 static void cma_release_port(struct rdma_id_private
*id_priv
)
1770 struct rdma_bind_list
*bind_list
= id_priv
->bind_list
;
1771 struct net
*net
= id_priv
->id
.route
.addr
.dev_addr
.net
;
1777 hlist_del(&id_priv
->node
);
1778 if (hlist_empty(&bind_list
->owners
)) {
1779 cma_ps_remove(net
, bind_list
->ps
, bind_list
->port
);
1782 mutex_unlock(&lock
);
1785 static void cma_leave_roce_mc_group(struct rdma_id_private
*id_priv
,
1786 struct cma_multicast
*mc
)
1788 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
1789 struct net_device
*ndev
= NULL
;
1791 if (dev_addr
->bound_dev_if
)
1792 ndev
= dev_get_by_index(dev_addr
->net
, dev_addr
->bound_dev_if
);
1794 cma_igmp_send(ndev
, &mc
->multicast
.ib
->rec
.mgid
, false);
1797 kref_put(&mc
->mcref
, release_mc
);
1800 static void cma_leave_mc_groups(struct rdma_id_private
*id_priv
)
1802 struct cma_multicast
*mc
;
1804 while (!list_empty(&id_priv
->mc_list
)) {
1805 mc
= container_of(id_priv
->mc_list
.next
,
1806 struct cma_multicast
, list
);
1807 list_del(&mc
->list
);
1808 if (rdma_cap_ib_mcast(id_priv
->cma_dev
->device
,
1809 id_priv
->id
.port_num
)) {
1810 ib_sa_free_multicast(mc
->multicast
.ib
);
1813 cma_leave_roce_mc_group(id_priv
, mc
);
1818 void rdma_destroy_id(struct rdma_cm_id
*id
)
1820 struct rdma_id_private
*id_priv
;
1821 enum rdma_cm_state state
;
1823 id_priv
= container_of(id
, struct rdma_id_private
, id
);
1824 trace_cm_id_destroy(id_priv
);
1825 state
= cma_exch(id_priv
, RDMA_CM_DESTROYING
);
1826 cma_cancel_operation(id_priv
, state
);
1829 * Wait for any active callback to finish. New callbacks will find
1830 * the id_priv state set to destroying and abort.
1832 mutex_lock(&id_priv
->handler_mutex
);
1833 mutex_unlock(&id_priv
->handler_mutex
);
1835 rdma_restrack_del(&id_priv
->res
);
1836 if (id_priv
->cma_dev
) {
1837 if (rdma_cap_ib_cm(id_priv
->id
.device
, 1)) {
1838 if (id_priv
->cm_id
.ib
)
1839 ib_destroy_cm_id(id_priv
->cm_id
.ib
);
1840 } else if (rdma_cap_iw_cm(id_priv
->id
.device
, 1)) {
1841 if (id_priv
->cm_id
.iw
)
1842 iw_destroy_cm_id(id_priv
->cm_id
.iw
);
1844 cma_leave_mc_groups(id_priv
);
1845 cma_release_dev(id_priv
);
1848 cma_release_port(id_priv
);
1849 cma_deref_id(id_priv
);
1850 wait_for_completion(&id_priv
->comp
);
1852 if (id_priv
->internal_id
)
1853 cma_deref_id(id_priv
->id
.context
);
1855 kfree(id_priv
->id
.route
.path_rec
);
1857 if (id_priv
->id
.route
.addr
.dev_addr
.sgid_attr
)
1858 rdma_put_gid_attr(id_priv
->id
.route
.addr
.dev_addr
.sgid_attr
);
1860 put_net(id_priv
->id
.route
.addr
.dev_addr
.net
);
1863 EXPORT_SYMBOL(rdma_destroy_id
);
1865 static int cma_rep_recv(struct rdma_id_private
*id_priv
)
1869 ret
= cma_modify_qp_rtr(id_priv
, NULL
);
1873 ret
= cma_modify_qp_rts(id_priv
, NULL
);
1877 trace_cm_send_rtu(id_priv
);
1878 ret
= ib_send_cm_rtu(id_priv
->cm_id
.ib
, NULL
, 0);
1884 pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret
);
1885 cma_modify_qp_err(id_priv
);
1886 trace_cm_send_rej(id_priv
);
1887 ib_send_cm_rej(id_priv
->cm_id
.ib
, IB_CM_REJ_CONSUMER_DEFINED
,
1892 static void cma_set_rep_event_data(struct rdma_cm_event
*event
,
1893 const struct ib_cm_rep_event_param
*rep_data
,
1896 event
->param
.conn
.private_data
= private_data
;
1897 event
->param
.conn
.private_data_len
= IB_CM_REP_PRIVATE_DATA_SIZE
;
1898 event
->param
.conn
.responder_resources
= rep_data
->responder_resources
;
1899 event
->param
.conn
.initiator_depth
= rep_data
->initiator_depth
;
1900 event
->param
.conn
.flow_control
= rep_data
->flow_control
;
1901 event
->param
.conn
.rnr_retry_count
= rep_data
->rnr_retry_count
;
1902 event
->param
.conn
.srq
= rep_data
->srq
;
1903 event
->param
.conn
.qp_num
= rep_data
->remote_qpn
;
1906 static int cma_cm_event_handler(struct rdma_id_private
*id_priv
,
1907 struct rdma_cm_event
*event
)
1911 trace_cm_event_handler(id_priv
, event
);
1912 ret
= id_priv
->id
.event_handler(&id_priv
->id
, event
);
1913 trace_cm_event_done(id_priv
, event
, ret
);
1917 static int cma_ib_handler(struct ib_cm_id
*cm_id
,
1918 const struct ib_cm_event
*ib_event
)
1920 struct rdma_id_private
*id_priv
= cm_id
->context
;
1921 struct rdma_cm_event event
= {};
1924 mutex_lock(&id_priv
->handler_mutex
);
1925 if ((ib_event
->event
!= IB_CM_TIMEWAIT_EXIT
&&
1926 id_priv
->state
!= RDMA_CM_CONNECT
) ||
1927 (ib_event
->event
== IB_CM_TIMEWAIT_EXIT
&&
1928 id_priv
->state
!= RDMA_CM_DISCONNECT
))
1931 switch (ib_event
->event
) {
1932 case IB_CM_REQ_ERROR
:
1933 case IB_CM_REP_ERROR
:
1934 event
.event
= RDMA_CM_EVENT_UNREACHABLE
;
1935 event
.status
= -ETIMEDOUT
;
1937 case IB_CM_REP_RECEIVED
:
1938 if (cma_comp(id_priv
, RDMA_CM_CONNECT
) &&
1939 (id_priv
->id
.qp_type
!= IB_QPT_UD
)) {
1940 trace_cm_send_mra(id_priv
);
1941 ib_send_cm_mra(cm_id
, CMA_CM_MRA_SETTING
, NULL
, 0);
1943 if (id_priv
->id
.qp
) {
1944 event
.status
= cma_rep_recv(id_priv
);
1945 event
.event
= event
.status
? RDMA_CM_EVENT_CONNECT_ERROR
:
1946 RDMA_CM_EVENT_ESTABLISHED
;
1948 event
.event
= RDMA_CM_EVENT_CONNECT_RESPONSE
;
1950 cma_set_rep_event_data(&event
, &ib_event
->param
.rep_rcvd
,
1951 ib_event
->private_data
);
1953 case IB_CM_RTU_RECEIVED
:
1954 case IB_CM_USER_ESTABLISHED
:
1955 event
.event
= RDMA_CM_EVENT_ESTABLISHED
;
1957 case IB_CM_DREQ_ERROR
:
1958 event
.status
= -ETIMEDOUT
; /* fall through */
1959 case IB_CM_DREQ_RECEIVED
:
1960 case IB_CM_DREP_RECEIVED
:
1961 if (!cma_comp_exch(id_priv
, RDMA_CM_CONNECT
,
1962 RDMA_CM_DISCONNECT
))
1964 event
.event
= RDMA_CM_EVENT_DISCONNECTED
;
1966 case IB_CM_TIMEWAIT_EXIT
:
1967 event
.event
= RDMA_CM_EVENT_TIMEWAIT_EXIT
;
1969 case IB_CM_MRA_RECEIVED
:
1972 case IB_CM_REJ_RECEIVED
:
1973 pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv
->id
,
1974 ib_event
->param
.rej_rcvd
.reason
));
1975 cma_modify_qp_err(id_priv
);
1976 event
.status
= ib_event
->param
.rej_rcvd
.reason
;
1977 event
.event
= RDMA_CM_EVENT_REJECTED
;
1978 event
.param
.conn
.private_data
= ib_event
->private_data
;
1979 event
.param
.conn
.private_data_len
= IB_CM_REJ_PRIVATE_DATA_SIZE
;
1982 pr_err("RDMA CMA: unexpected IB CM event: %d\n",
1987 ret
= cma_cm_event_handler(id_priv
, &event
);
1989 /* Destroy the CM ID by returning a non-zero value. */
1990 id_priv
->cm_id
.ib
= NULL
;
1991 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
1992 mutex_unlock(&id_priv
->handler_mutex
);
1993 rdma_destroy_id(&id_priv
->id
);
1997 mutex_unlock(&id_priv
->handler_mutex
);
2001 static struct rdma_id_private
*
2002 cma_ib_new_conn_id(const struct rdma_cm_id
*listen_id
,
2003 const struct ib_cm_event
*ib_event
,
2004 struct net_device
*net_dev
)
2006 struct rdma_id_private
*listen_id_priv
;
2007 struct rdma_id_private
*id_priv
;
2008 struct rdma_cm_id
*id
;
2009 struct rdma_route
*rt
;
2010 const sa_family_t ss_family
= listen_id
->route
.addr
.src_addr
.ss_family
;
2011 struct sa_path_rec
*path
= ib_event
->param
.req_rcvd
.primary_path
;
2012 const __be64 service_id
=
2013 ib_event
->param
.req_rcvd
.primary_path
->service_id
;
2016 listen_id_priv
= container_of(listen_id
, struct rdma_id_private
, id
);
2017 id
= __rdma_create_id(listen_id
->route
.addr
.dev_addr
.net
,
2018 listen_id
->event_handler
, listen_id
->context
,
2019 listen_id
->ps
, ib_event
->param
.req_rcvd
.qp_type
,
2020 listen_id_priv
->res
.kern_name
);
2024 id_priv
= container_of(id
, struct rdma_id_private
, id
);
2025 if (cma_save_net_info((struct sockaddr
*)&id
->route
.addr
.src_addr
,
2026 (struct sockaddr
*)&id
->route
.addr
.dst_addr
,
2027 listen_id
, ib_event
, ss_family
, service_id
))
2031 rt
->num_paths
= ib_event
->param
.req_rcvd
.alternate_path
? 2 : 1;
2032 rt
->path_rec
= kmalloc_array(rt
->num_paths
, sizeof(*rt
->path_rec
),
2037 rt
->path_rec
[0] = *path
;
2038 if (rt
->num_paths
== 2)
2039 rt
->path_rec
[1] = *ib_event
->param
.req_rcvd
.alternate_path
;
2042 rdma_copy_src_l2_addr(&rt
->addr
.dev_addr
, net_dev
);
2044 if (!cma_protocol_roce(listen_id
) &&
2045 cma_any_addr(cma_src_addr(id_priv
))) {
2046 rt
->addr
.dev_addr
.dev_type
= ARPHRD_INFINIBAND
;
2047 rdma_addr_set_sgid(&rt
->addr
.dev_addr
, &rt
->path_rec
[0].sgid
);
2048 ib_addr_set_pkey(&rt
->addr
.dev_addr
, be16_to_cpu(rt
->path_rec
[0].pkey
));
2049 } else if (!cma_any_addr(cma_src_addr(id_priv
))) {
2050 ret
= cma_translate_addr(cma_src_addr(id_priv
), &rt
->addr
.dev_addr
);
2055 rdma_addr_set_dgid(&rt
->addr
.dev_addr
, &rt
->path_rec
[0].dgid
);
2057 id_priv
->state
= RDMA_CM_CONNECT
;
2061 rdma_destroy_id(id
);
2065 static struct rdma_id_private
*
2066 cma_ib_new_udp_id(const struct rdma_cm_id
*listen_id
,
2067 const struct ib_cm_event
*ib_event
,
2068 struct net_device
*net_dev
)
2070 const struct rdma_id_private
*listen_id_priv
;
2071 struct rdma_id_private
*id_priv
;
2072 struct rdma_cm_id
*id
;
2073 const sa_family_t ss_family
= listen_id
->route
.addr
.src_addr
.ss_family
;
2074 struct net
*net
= listen_id
->route
.addr
.dev_addr
.net
;
2077 listen_id_priv
= container_of(listen_id
, struct rdma_id_private
, id
);
2078 id
= __rdma_create_id(net
, listen_id
->event_handler
, listen_id
->context
,
2079 listen_id
->ps
, IB_QPT_UD
,
2080 listen_id_priv
->res
.kern_name
);
2084 id_priv
= container_of(id
, struct rdma_id_private
, id
);
2085 if (cma_save_net_info((struct sockaddr
*)&id
->route
.addr
.src_addr
,
2086 (struct sockaddr
*)&id
->route
.addr
.dst_addr
,
2087 listen_id
, ib_event
, ss_family
,
2088 ib_event
->param
.sidr_req_rcvd
.service_id
))
2092 rdma_copy_src_l2_addr(&id
->route
.addr
.dev_addr
, net_dev
);
2094 if (!cma_any_addr(cma_src_addr(id_priv
))) {
2095 ret
= cma_translate_addr(cma_src_addr(id_priv
),
2096 &id
->route
.addr
.dev_addr
);
2102 id_priv
->state
= RDMA_CM_CONNECT
;
2105 rdma_destroy_id(id
);
2109 static void cma_set_req_event_data(struct rdma_cm_event
*event
,
2110 const struct ib_cm_req_event_param
*req_data
,
2111 void *private_data
, int offset
)
2113 event
->param
.conn
.private_data
= private_data
+ offset
;
2114 event
->param
.conn
.private_data_len
= IB_CM_REQ_PRIVATE_DATA_SIZE
- offset
;
2115 event
->param
.conn
.responder_resources
= req_data
->responder_resources
;
2116 event
->param
.conn
.initiator_depth
= req_data
->initiator_depth
;
2117 event
->param
.conn
.flow_control
= req_data
->flow_control
;
2118 event
->param
.conn
.retry_count
= req_data
->retry_count
;
2119 event
->param
.conn
.rnr_retry_count
= req_data
->rnr_retry_count
;
2120 event
->param
.conn
.srq
= req_data
->srq
;
2121 event
->param
.conn
.qp_num
= req_data
->remote_qpn
;
2124 static int cma_ib_check_req_qp_type(const struct rdma_cm_id
*id
,
2125 const struct ib_cm_event
*ib_event
)
2127 return (((ib_event
->event
== IB_CM_REQ_RECEIVED
) &&
2128 (ib_event
->param
.req_rcvd
.qp_type
== id
->qp_type
)) ||
2129 ((ib_event
->event
== IB_CM_SIDR_REQ_RECEIVED
) &&
2130 (id
->qp_type
== IB_QPT_UD
)) ||
2134 static int cma_ib_req_handler(struct ib_cm_id
*cm_id
,
2135 const struct ib_cm_event
*ib_event
)
2137 struct rdma_id_private
*listen_id
, *conn_id
= NULL
;
2138 struct rdma_cm_event event
= {};
2139 struct cma_req_info req
= {};
2140 struct net_device
*net_dev
;
2144 listen_id
= cma_ib_id_from_event(cm_id
, ib_event
, &req
, &net_dev
);
2145 if (IS_ERR(listen_id
))
2146 return PTR_ERR(listen_id
);
2148 trace_cm_req_handler(listen_id
, ib_event
->event
);
2149 if (!cma_ib_check_req_qp_type(&listen_id
->id
, ib_event
)) {
2154 mutex_lock(&listen_id
->handler_mutex
);
2155 if (listen_id
->state
!= RDMA_CM_LISTEN
) {
2156 ret
= -ECONNABORTED
;
2160 offset
= cma_user_data_offset(listen_id
);
2161 event
.event
= RDMA_CM_EVENT_CONNECT_REQUEST
;
2162 if (ib_event
->event
== IB_CM_SIDR_REQ_RECEIVED
) {
2163 conn_id
= cma_ib_new_udp_id(&listen_id
->id
, ib_event
, net_dev
);
2164 event
.param
.ud
.private_data
= ib_event
->private_data
+ offset
;
2165 event
.param
.ud
.private_data_len
=
2166 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE
- offset
;
2168 conn_id
= cma_ib_new_conn_id(&listen_id
->id
, ib_event
, net_dev
);
2169 cma_set_req_event_data(&event
, &ib_event
->param
.req_rcvd
,
2170 ib_event
->private_data
, offset
);
2177 mutex_lock_nested(&conn_id
->handler_mutex
, SINGLE_DEPTH_NESTING
);
2178 ret
= cma_ib_acquire_dev(conn_id
, listen_id
, &req
);
2182 conn_id
->cm_id
.ib
= cm_id
;
2183 cm_id
->context
= conn_id
;
2184 cm_id
->cm_handler
= cma_ib_handler
;
2187 * Protect against the user destroying conn_id from another thread
2188 * until we're done accessing it.
2190 atomic_inc(&conn_id
->refcount
);
2191 ret
= cma_cm_event_handler(conn_id
, &event
);
2195 * Acquire mutex to prevent user executing rdma_destroy_id()
2196 * while we're accessing the cm_id.
2199 if (cma_comp(conn_id
, RDMA_CM_CONNECT
) &&
2200 (conn_id
->id
.qp_type
!= IB_QPT_UD
)) {
2201 trace_cm_send_mra(cm_id
->context
);
2202 ib_send_cm_mra(cm_id
, CMA_CM_MRA_SETTING
, NULL
, 0);
2204 mutex_unlock(&lock
);
2205 mutex_unlock(&conn_id
->handler_mutex
);
2206 mutex_unlock(&listen_id
->handler_mutex
);
2207 cma_deref_id(conn_id
);
2213 cma_deref_id(conn_id
);
2214 /* Destroy the CM ID by returning a non-zero value. */
2215 conn_id
->cm_id
.ib
= NULL
;
2217 cma_exch(conn_id
, RDMA_CM_DESTROYING
);
2218 mutex_unlock(&conn_id
->handler_mutex
);
2220 mutex_unlock(&listen_id
->handler_mutex
);
2222 rdma_destroy_id(&conn_id
->id
);
2231 __be64
rdma_get_service_id(struct rdma_cm_id
*id
, struct sockaddr
*addr
)
2233 if (addr
->sa_family
== AF_IB
)
2234 return ((struct sockaddr_ib
*) addr
)->sib_sid
;
2236 return cpu_to_be64(((u64
)id
->ps
<< 16) + be16_to_cpu(cma_port(addr
)));
2238 EXPORT_SYMBOL(rdma_get_service_id
);
2240 void rdma_read_gids(struct rdma_cm_id
*cm_id
, union ib_gid
*sgid
,
2243 struct rdma_addr
*addr
= &cm_id
->route
.addr
;
2245 if (!cm_id
->device
) {
2247 memset(sgid
, 0, sizeof(*sgid
));
2249 memset(dgid
, 0, sizeof(*dgid
));
2253 if (rdma_protocol_roce(cm_id
->device
, cm_id
->port_num
)) {
2255 rdma_ip2gid((struct sockaddr
*)&addr
->src_addr
, sgid
);
2257 rdma_ip2gid((struct sockaddr
*)&addr
->dst_addr
, dgid
);
2260 rdma_addr_get_sgid(&addr
->dev_addr
, sgid
);
2262 rdma_addr_get_dgid(&addr
->dev_addr
, dgid
);
2265 EXPORT_SYMBOL(rdma_read_gids
);
2267 static int cma_iw_handler(struct iw_cm_id
*iw_id
, struct iw_cm_event
*iw_event
)
2269 struct rdma_id_private
*id_priv
= iw_id
->context
;
2270 struct rdma_cm_event event
= {};
2272 struct sockaddr
*laddr
= (struct sockaddr
*)&iw_event
->local_addr
;
2273 struct sockaddr
*raddr
= (struct sockaddr
*)&iw_event
->remote_addr
;
2275 mutex_lock(&id_priv
->handler_mutex
);
2276 if (id_priv
->state
!= RDMA_CM_CONNECT
)
2279 switch (iw_event
->event
) {
2280 case IW_CM_EVENT_CLOSE
:
2281 event
.event
= RDMA_CM_EVENT_DISCONNECTED
;
2283 case IW_CM_EVENT_CONNECT_REPLY
:
2284 memcpy(cma_src_addr(id_priv
), laddr
,
2285 rdma_addr_size(laddr
));
2286 memcpy(cma_dst_addr(id_priv
), raddr
,
2287 rdma_addr_size(raddr
));
2288 switch (iw_event
->status
) {
2290 event
.event
= RDMA_CM_EVENT_ESTABLISHED
;
2291 event
.param
.conn
.initiator_depth
= iw_event
->ird
;
2292 event
.param
.conn
.responder_resources
= iw_event
->ord
;
2296 event
.event
= RDMA_CM_EVENT_REJECTED
;
2299 event
.event
= RDMA_CM_EVENT_UNREACHABLE
;
2302 event
.event
= RDMA_CM_EVENT_CONNECT_ERROR
;
2306 case IW_CM_EVENT_ESTABLISHED
:
2307 event
.event
= RDMA_CM_EVENT_ESTABLISHED
;
2308 event
.param
.conn
.initiator_depth
= iw_event
->ird
;
2309 event
.param
.conn
.responder_resources
= iw_event
->ord
;
2315 event
.status
= iw_event
->status
;
2316 event
.param
.conn
.private_data
= iw_event
->private_data
;
2317 event
.param
.conn
.private_data_len
= iw_event
->private_data_len
;
2318 ret
= cma_cm_event_handler(id_priv
, &event
);
2320 /* Destroy the CM ID by returning a non-zero value. */
2321 id_priv
->cm_id
.iw
= NULL
;
2322 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
2323 mutex_unlock(&id_priv
->handler_mutex
);
2324 rdma_destroy_id(&id_priv
->id
);
2329 mutex_unlock(&id_priv
->handler_mutex
);
2333 static int iw_conn_req_handler(struct iw_cm_id
*cm_id
,
2334 struct iw_cm_event
*iw_event
)
2336 struct rdma_cm_id
*new_cm_id
;
2337 struct rdma_id_private
*listen_id
, *conn_id
;
2338 struct rdma_cm_event event
= {};
2339 int ret
= -ECONNABORTED
;
2340 struct sockaddr
*laddr
= (struct sockaddr
*)&iw_event
->local_addr
;
2341 struct sockaddr
*raddr
= (struct sockaddr
*)&iw_event
->remote_addr
;
2343 event
.event
= RDMA_CM_EVENT_CONNECT_REQUEST
;
2344 event
.param
.conn
.private_data
= iw_event
->private_data
;
2345 event
.param
.conn
.private_data_len
= iw_event
->private_data_len
;
2346 event
.param
.conn
.initiator_depth
= iw_event
->ird
;
2347 event
.param
.conn
.responder_resources
= iw_event
->ord
;
2349 listen_id
= cm_id
->context
;
2351 mutex_lock(&listen_id
->handler_mutex
);
2352 if (listen_id
->state
!= RDMA_CM_LISTEN
)
2355 /* Create a new RDMA id for the new IW CM ID */
2356 new_cm_id
= __rdma_create_id(listen_id
->id
.route
.addr
.dev_addr
.net
,
2357 listen_id
->id
.event_handler
,
2358 listen_id
->id
.context
,
2359 RDMA_PS_TCP
, IB_QPT_RC
,
2360 listen_id
->res
.kern_name
);
2361 if (IS_ERR(new_cm_id
)) {
2365 conn_id
= container_of(new_cm_id
, struct rdma_id_private
, id
);
2366 mutex_lock_nested(&conn_id
->handler_mutex
, SINGLE_DEPTH_NESTING
);
2367 conn_id
->state
= RDMA_CM_CONNECT
;
2369 ret
= rdma_translate_ip(laddr
, &conn_id
->id
.route
.addr
.dev_addr
);
2371 mutex_unlock(&conn_id
->handler_mutex
);
2372 rdma_destroy_id(new_cm_id
);
2376 ret
= cma_iw_acquire_dev(conn_id
, listen_id
);
2378 mutex_unlock(&conn_id
->handler_mutex
);
2379 rdma_destroy_id(new_cm_id
);
2383 conn_id
->cm_id
.iw
= cm_id
;
2384 cm_id
->context
= conn_id
;
2385 cm_id
->cm_handler
= cma_iw_handler
;
2387 memcpy(cma_src_addr(conn_id
), laddr
, rdma_addr_size(laddr
));
2388 memcpy(cma_dst_addr(conn_id
), raddr
, rdma_addr_size(raddr
));
2391 * Protect against the user destroying conn_id from another thread
2392 * until we're done accessing it.
2394 atomic_inc(&conn_id
->refcount
);
2395 ret
= cma_cm_event_handler(conn_id
, &event
);
2397 /* User wants to destroy the CM ID */
2398 conn_id
->cm_id
.iw
= NULL
;
2399 cma_exch(conn_id
, RDMA_CM_DESTROYING
);
2400 mutex_unlock(&conn_id
->handler_mutex
);
2401 mutex_unlock(&listen_id
->handler_mutex
);
2402 cma_deref_id(conn_id
);
2403 rdma_destroy_id(&conn_id
->id
);
2407 mutex_unlock(&conn_id
->handler_mutex
);
2408 cma_deref_id(conn_id
);
2411 mutex_unlock(&listen_id
->handler_mutex
);
2415 static int cma_ib_listen(struct rdma_id_private
*id_priv
)
2417 struct sockaddr
*addr
;
2418 struct ib_cm_id
*id
;
2421 addr
= cma_src_addr(id_priv
);
2422 svc_id
= rdma_get_service_id(&id_priv
->id
, addr
);
2423 id
= ib_cm_insert_listen(id_priv
->id
.device
,
2424 cma_ib_req_handler
, svc_id
);
2427 id_priv
->cm_id
.ib
= id
;
2432 static int cma_iw_listen(struct rdma_id_private
*id_priv
, int backlog
)
2435 struct iw_cm_id
*id
;
2437 id
= iw_create_cm_id(id_priv
->id
.device
,
2438 iw_conn_req_handler
,
2443 id
->tos
= id_priv
->tos
;
2444 id
->tos_set
= id_priv
->tos_set
;
2445 id_priv
->cm_id
.iw
= id
;
2447 memcpy(&id_priv
->cm_id
.iw
->local_addr
, cma_src_addr(id_priv
),
2448 rdma_addr_size(cma_src_addr(id_priv
)));
2450 ret
= iw_cm_listen(id_priv
->cm_id
.iw
, backlog
);
2453 iw_destroy_cm_id(id_priv
->cm_id
.iw
);
2454 id_priv
->cm_id
.iw
= NULL
;
2460 static int cma_listen_handler(struct rdma_cm_id
*id
,
2461 struct rdma_cm_event
*event
)
2463 struct rdma_id_private
*id_priv
= id
->context
;
2465 id
->context
= id_priv
->id
.context
;
2466 id
->event_handler
= id_priv
->id
.event_handler
;
2467 trace_cm_event_handler(id_priv
, event
);
2468 return id_priv
->id
.event_handler(id
, event
);
2471 static void cma_listen_on_dev(struct rdma_id_private
*id_priv
,
2472 struct cma_device
*cma_dev
)
2474 struct rdma_id_private
*dev_id_priv
;
2475 struct rdma_cm_id
*id
;
2476 struct net
*net
= id_priv
->id
.route
.addr
.dev_addr
.net
;
2479 if (cma_family(id_priv
) == AF_IB
&& !rdma_cap_ib_cm(cma_dev
->device
, 1))
2482 id
= __rdma_create_id(net
, cma_listen_handler
, id_priv
, id_priv
->id
.ps
,
2483 id_priv
->id
.qp_type
, id_priv
->res
.kern_name
);
2487 dev_id_priv
= container_of(id
, struct rdma_id_private
, id
);
2489 dev_id_priv
->state
= RDMA_CM_ADDR_BOUND
;
2490 memcpy(cma_src_addr(dev_id_priv
), cma_src_addr(id_priv
),
2491 rdma_addr_size(cma_src_addr(id_priv
)));
2493 _cma_attach_to_dev(dev_id_priv
, cma_dev
);
2494 list_add_tail(&dev_id_priv
->listen_list
, &id_priv
->listen_list
);
2495 atomic_inc(&id_priv
->refcount
);
2496 dev_id_priv
->internal_id
= 1;
2497 dev_id_priv
->afonly
= id_priv
->afonly
;
2498 dev_id_priv
->tos_set
= id_priv
->tos_set
;
2499 dev_id_priv
->tos
= id_priv
->tos
;
2501 ret
= rdma_listen(id
, id_priv
->backlog
);
2503 dev_warn(&cma_dev
->device
->dev
,
2504 "RDMA CMA: cma_listen_on_dev, error %d\n", ret
);
2507 static void cma_listen_on_all(struct rdma_id_private
*id_priv
)
2509 struct cma_device
*cma_dev
;
2512 list_add_tail(&id_priv
->list
, &listen_any_list
);
2513 list_for_each_entry(cma_dev
, &dev_list
, list
)
2514 cma_listen_on_dev(id_priv
, cma_dev
);
2515 mutex_unlock(&lock
);
2518 void rdma_set_service_type(struct rdma_cm_id
*id
, int tos
)
2520 struct rdma_id_private
*id_priv
;
2522 id_priv
= container_of(id
, struct rdma_id_private
, id
);
2523 id_priv
->tos
= (u8
) tos
;
2524 id_priv
->tos_set
= true;
2526 EXPORT_SYMBOL(rdma_set_service_type
);
2529 * rdma_set_ack_timeout() - Set the ack timeout of QP associated
2530 * with a connection identifier.
2531 * @id: Communication identifier to associated with service type.
2532 * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
2534 * This function should be called before rdma_connect() on active side,
2535 * and on passive side before rdma_accept(). It is applicable to primary
2536 * path only. The timeout will affect the local side of the QP, it is not
2537 * negotiated with remote side and zero disables the timer. In case it is
2538 * set before rdma_resolve_route, the value will also be used to determine
2539 * PacketLifeTime for RoCE.
2541 * Return: 0 for success
2543 int rdma_set_ack_timeout(struct rdma_cm_id
*id
, u8 timeout
)
2545 struct rdma_id_private
*id_priv
;
2547 if (id
->qp_type
!= IB_QPT_RC
)
2550 id_priv
= container_of(id
, struct rdma_id_private
, id
);
2551 id_priv
->timeout
= timeout
;
2552 id_priv
->timeout_set
= true;
2556 EXPORT_SYMBOL(rdma_set_ack_timeout
);
2558 static void cma_query_handler(int status
, struct sa_path_rec
*path_rec
,
2561 struct cma_work
*work
= context
;
2562 struct rdma_route
*route
;
2564 route
= &work
->id
->id
.route
;
2567 route
->num_paths
= 1;
2568 *route
->path_rec
= *path_rec
;
2570 work
->old_state
= RDMA_CM_ROUTE_QUERY
;
2571 work
->new_state
= RDMA_CM_ADDR_RESOLVED
;
2572 work
->event
.event
= RDMA_CM_EVENT_ROUTE_ERROR
;
2573 work
->event
.status
= status
;
2574 pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
2578 queue_work(cma_wq
, &work
->work
);
2581 static int cma_query_ib_route(struct rdma_id_private
*id_priv
,
2582 unsigned long timeout_ms
, struct cma_work
*work
)
2584 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
2585 struct sa_path_rec path_rec
;
2586 ib_sa_comp_mask comp_mask
;
2587 struct sockaddr_in6
*sin6
;
2588 struct sockaddr_ib
*sib
;
2590 memset(&path_rec
, 0, sizeof path_rec
);
2592 if (rdma_cap_opa_ah(id_priv
->id
.device
, id_priv
->id
.port_num
))
2593 path_rec
.rec_type
= SA_PATH_REC_TYPE_OPA
;
2595 path_rec
.rec_type
= SA_PATH_REC_TYPE_IB
;
2596 rdma_addr_get_sgid(dev_addr
, &path_rec
.sgid
);
2597 rdma_addr_get_dgid(dev_addr
, &path_rec
.dgid
);
2598 path_rec
.pkey
= cpu_to_be16(ib_addr_get_pkey(dev_addr
));
2599 path_rec
.numb_path
= 1;
2600 path_rec
.reversible
= 1;
2601 path_rec
.service_id
= rdma_get_service_id(&id_priv
->id
,
2602 cma_dst_addr(id_priv
));
2604 comp_mask
= IB_SA_PATH_REC_DGID
| IB_SA_PATH_REC_SGID
|
2605 IB_SA_PATH_REC_PKEY
| IB_SA_PATH_REC_NUMB_PATH
|
2606 IB_SA_PATH_REC_REVERSIBLE
| IB_SA_PATH_REC_SERVICE_ID
;
2608 switch (cma_family(id_priv
)) {
2610 path_rec
.qos_class
= cpu_to_be16((u16
) id_priv
->tos
);
2611 comp_mask
|= IB_SA_PATH_REC_QOS_CLASS
;
2614 sin6
= (struct sockaddr_in6
*) cma_src_addr(id_priv
);
2615 path_rec
.traffic_class
= (u8
) (be32_to_cpu(sin6
->sin6_flowinfo
) >> 20);
2616 comp_mask
|= IB_SA_PATH_REC_TRAFFIC_CLASS
;
2619 sib
= (struct sockaddr_ib
*) cma_src_addr(id_priv
);
2620 path_rec
.traffic_class
= (u8
) (be32_to_cpu(sib
->sib_flowinfo
) >> 20);
2621 comp_mask
|= IB_SA_PATH_REC_TRAFFIC_CLASS
;
2625 id_priv
->query_id
= ib_sa_path_rec_get(&sa_client
, id_priv
->id
.device
,
2626 id_priv
->id
.port_num
, &path_rec
,
2627 comp_mask
, timeout_ms
,
2628 GFP_KERNEL
, cma_query_handler
,
2629 work
, &id_priv
->query
);
2631 return (id_priv
->query_id
< 0) ? id_priv
->query_id
: 0;
2634 static void cma_work_handler(struct work_struct
*_work
)
2636 struct cma_work
*work
= container_of(_work
, struct cma_work
, work
);
2637 struct rdma_id_private
*id_priv
= work
->id
;
2640 mutex_lock(&id_priv
->handler_mutex
);
2641 if (!cma_comp_exch(id_priv
, work
->old_state
, work
->new_state
))
2644 if (cma_cm_event_handler(id_priv
, &work
->event
)) {
2645 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
2649 mutex_unlock(&id_priv
->handler_mutex
);
2650 cma_deref_id(id_priv
);
2652 rdma_destroy_id(&id_priv
->id
);
2656 static void cma_ndev_work_handler(struct work_struct
*_work
)
2658 struct cma_ndev_work
*work
= container_of(_work
, struct cma_ndev_work
, work
);
2659 struct rdma_id_private
*id_priv
= work
->id
;
2662 mutex_lock(&id_priv
->handler_mutex
);
2663 if (id_priv
->state
== RDMA_CM_DESTROYING
||
2664 id_priv
->state
== RDMA_CM_DEVICE_REMOVAL
)
2667 if (cma_cm_event_handler(id_priv
, &work
->event
)) {
2668 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
2673 mutex_unlock(&id_priv
->handler_mutex
);
2674 cma_deref_id(id_priv
);
2676 rdma_destroy_id(&id_priv
->id
);
2680 static void cma_init_resolve_route_work(struct cma_work
*work
,
2681 struct rdma_id_private
*id_priv
)
2684 INIT_WORK(&work
->work
, cma_work_handler
);
2685 work
->old_state
= RDMA_CM_ROUTE_QUERY
;
2686 work
->new_state
= RDMA_CM_ROUTE_RESOLVED
;
2687 work
->event
.event
= RDMA_CM_EVENT_ROUTE_RESOLVED
;
2690 static void cma_init_resolve_addr_work(struct cma_work
*work
,
2691 struct rdma_id_private
*id_priv
)
2694 INIT_WORK(&work
->work
, cma_work_handler
);
2695 work
->old_state
= RDMA_CM_ADDR_QUERY
;
2696 work
->new_state
= RDMA_CM_ADDR_RESOLVED
;
2697 work
->event
.event
= RDMA_CM_EVENT_ADDR_RESOLVED
;
2700 static int cma_resolve_ib_route(struct rdma_id_private
*id_priv
,
2701 unsigned long timeout_ms
)
2703 struct rdma_route
*route
= &id_priv
->id
.route
;
2704 struct cma_work
*work
;
2707 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
2711 cma_init_resolve_route_work(work
, id_priv
);
2713 route
->path_rec
= kmalloc(sizeof *route
->path_rec
, GFP_KERNEL
);
2714 if (!route
->path_rec
) {
2719 ret
= cma_query_ib_route(id_priv
, timeout_ms
, work
);
2725 kfree(route
->path_rec
);
2726 route
->path_rec
= NULL
;
2732 static enum ib_gid_type
cma_route_gid_type(enum rdma_network_type network_type
,
2733 unsigned long supported_gids
,
2734 enum ib_gid_type default_gid
)
2736 if ((network_type
== RDMA_NETWORK_IPV4
||
2737 network_type
== RDMA_NETWORK_IPV6
) &&
2738 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP
, &supported_gids
))
2739 return IB_GID_TYPE_ROCE_UDP_ENCAP
;
2745 * cma_iboe_set_path_rec_l2_fields() is helper function which sets
2746 * path record type based on GID type.
2747 * It also sets up other L2 fields which includes destination mac address
2748 * netdev ifindex, of the path record.
2749 * It returns the netdev of the bound interface for this path record entry.
2751 static struct net_device
*
2752 cma_iboe_set_path_rec_l2_fields(struct rdma_id_private
*id_priv
)
2754 struct rdma_route
*route
= &id_priv
->id
.route
;
2755 enum ib_gid_type gid_type
= IB_GID_TYPE_ROCE
;
2756 struct rdma_addr
*addr
= &route
->addr
;
2757 unsigned long supported_gids
;
2758 struct net_device
*ndev
;
2760 if (!addr
->dev_addr
.bound_dev_if
)
2763 ndev
= dev_get_by_index(addr
->dev_addr
.net
,
2764 addr
->dev_addr
.bound_dev_if
);
2768 supported_gids
= roce_gid_type_mask_support(id_priv
->id
.device
,
2769 id_priv
->id
.port_num
);
2770 gid_type
= cma_route_gid_type(addr
->dev_addr
.network
,
2773 /* Use the hint from IP Stack to select GID Type */
2774 if (gid_type
< ib_network_to_gid_type(addr
->dev_addr
.network
))
2775 gid_type
= ib_network_to_gid_type(addr
->dev_addr
.network
);
2776 route
->path_rec
->rec_type
= sa_conv_gid_to_pathrec_type(gid_type
);
2778 route
->path_rec
->roce
.route_resolved
= true;
2779 sa_path_set_dmac(route
->path_rec
, addr
->dev_addr
.dst_dev_addr
);
2783 int rdma_set_ib_path(struct rdma_cm_id
*id
,
2784 struct sa_path_rec
*path_rec
)
2786 struct rdma_id_private
*id_priv
;
2787 struct net_device
*ndev
;
2790 id_priv
= container_of(id
, struct rdma_id_private
, id
);
2791 if (!cma_comp_exch(id_priv
, RDMA_CM_ADDR_RESOLVED
,
2792 RDMA_CM_ROUTE_RESOLVED
))
2795 id
->route
.path_rec
= kmemdup(path_rec
, sizeof(*path_rec
),
2797 if (!id
->route
.path_rec
) {
2802 if (rdma_protocol_roce(id
->device
, id
->port_num
)) {
2803 ndev
= cma_iboe_set_path_rec_l2_fields(id_priv
);
2811 id
->route
.num_paths
= 1;
2815 kfree(id
->route
.path_rec
);
2816 id
->route
.path_rec
= NULL
;
2818 cma_comp_exch(id_priv
, RDMA_CM_ROUTE_RESOLVED
, RDMA_CM_ADDR_RESOLVED
);
2821 EXPORT_SYMBOL(rdma_set_ib_path
);
2823 static int cma_resolve_iw_route(struct rdma_id_private
*id_priv
)
2825 struct cma_work
*work
;
2827 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
2831 cma_init_resolve_route_work(work
, id_priv
);
2832 queue_work(cma_wq
, &work
->work
);
2836 static int get_vlan_ndev_tc(struct net_device
*vlan_ndev
, int prio
)
2838 struct net_device
*dev
;
2840 dev
= vlan_dev_real_dev(vlan_ndev
);
2842 return netdev_get_prio_tc_map(dev
, prio
);
2844 return (vlan_dev_get_egress_qos_mask(vlan_ndev
, prio
) &
2845 VLAN_PRIO_MASK
) >> VLAN_PRIO_SHIFT
;
2848 struct iboe_prio_tc_map
{
2854 static int get_lower_vlan_dev_tc(struct net_device
*dev
, void *data
)
2856 struct iboe_prio_tc_map
*map
= data
;
2858 if (is_vlan_dev(dev
))
2859 map
->output_tc
= get_vlan_ndev_tc(dev
, map
->input_prio
);
2860 else if (dev
->num_tc
)
2861 map
->output_tc
= netdev_get_prio_tc_map(dev
, map
->input_prio
);
2864 /* We are interested only in first level VLAN device, so always
2865 * return 1 to stop iterating over next level devices.
2871 static int iboe_tos_to_sl(struct net_device
*ndev
, int tos
)
2873 struct iboe_prio_tc_map prio_tc_map
= {};
2874 int prio
= rt_tos2priority(tos
);
2876 /* If VLAN device, get it directly from the VLAN netdev */
2877 if (is_vlan_dev(ndev
))
2878 return get_vlan_ndev_tc(ndev
, prio
);
2880 prio_tc_map
.input_prio
= prio
;
2882 netdev_walk_all_lower_dev_rcu(ndev
,
2883 get_lower_vlan_dev_tc
,
2886 /* If map is found from lower device, use it; Otherwise
2887 * continue with the current netdevice to get priority to tc map.
2889 if (prio_tc_map
.found
)
2890 return prio_tc_map
.output_tc
;
2891 else if (ndev
->num_tc
)
2892 return netdev_get_prio_tc_map(ndev
, prio
);
2897 static int cma_resolve_iboe_route(struct rdma_id_private
*id_priv
)
2899 struct rdma_route
*route
= &id_priv
->id
.route
;
2900 struct rdma_addr
*addr
= &route
->addr
;
2901 struct cma_work
*work
;
2903 struct net_device
*ndev
;
2905 u8 default_roce_tos
= id_priv
->cma_dev
->default_roce_tos
[id_priv
->id
.port_num
-
2906 rdma_start_port(id_priv
->cma_dev
->device
)];
2907 u8 tos
= id_priv
->tos_set
? id_priv
->tos
: default_roce_tos
;
2910 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
2914 route
->path_rec
= kzalloc(sizeof *route
->path_rec
, GFP_KERNEL
);
2915 if (!route
->path_rec
) {
2920 route
->num_paths
= 1;
2922 ndev
= cma_iboe_set_path_rec_l2_fields(id_priv
);
2928 rdma_ip2gid((struct sockaddr
*)&id_priv
->id
.route
.addr
.src_addr
,
2929 &route
->path_rec
->sgid
);
2930 rdma_ip2gid((struct sockaddr
*)&id_priv
->id
.route
.addr
.dst_addr
,
2931 &route
->path_rec
->dgid
);
2933 if (((struct sockaddr
*)&id_priv
->id
.route
.addr
.dst_addr
)->sa_family
!= AF_IB
)
2934 /* TODO: get the hoplimit from the inet/inet6 device */
2935 route
->path_rec
->hop_limit
= addr
->dev_addr
.hoplimit
;
2937 route
->path_rec
->hop_limit
= 1;
2938 route
->path_rec
->reversible
= 1;
2939 route
->path_rec
->pkey
= cpu_to_be16(0xffff);
2940 route
->path_rec
->mtu_selector
= IB_SA_EQ
;
2941 route
->path_rec
->sl
= iboe_tos_to_sl(ndev
, tos
);
2942 route
->path_rec
->traffic_class
= tos
;
2943 route
->path_rec
->mtu
= iboe_get_mtu(ndev
->mtu
);
2944 route
->path_rec
->rate_selector
= IB_SA_EQ
;
2945 route
->path_rec
->rate
= iboe_get_rate(ndev
);
2947 route
->path_rec
->packet_life_time_selector
= IB_SA_EQ
;
2948 /* In case ACK timeout is set, use this value to calculate
2949 * PacketLifeTime. As per IBTA 12.7.34,
2950 * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
2951 * Assuming a negligible local ACK delay, we can use
2952 * PacketLifeTime = local ACK timeout/2
2953 * as a reasonable approximation for RoCE networks.
2955 route
->path_rec
->packet_life_time
= id_priv
->timeout_set
?
2956 id_priv
->timeout
- 1 : CMA_IBOE_PACKET_LIFETIME
;
2958 if (!route
->path_rec
->mtu
) {
2963 cma_init_resolve_route_work(work
, id_priv
);
2964 queue_work(cma_wq
, &work
->work
);
2969 kfree(route
->path_rec
);
2970 route
->path_rec
= NULL
;
2976 int rdma_resolve_route(struct rdma_cm_id
*id
, unsigned long timeout_ms
)
2978 struct rdma_id_private
*id_priv
;
2981 id_priv
= container_of(id
, struct rdma_id_private
, id
);
2982 if (!cma_comp_exch(id_priv
, RDMA_CM_ADDR_RESOLVED
, RDMA_CM_ROUTE_QUERY
))
2985 atomic_inc(&id_priv
->refcount
);
2986 if (rdma_cap_ib_sa(id
->device
, id
->port_num
))
2987 ret
= cma_resolve_ib_route(id_priv
, timeout_ms
);
2988 else if (rdma_protocol_roce(id
->device
, id
->port_num
))
2989 ret
= cma_resolve_iboe_route(id_priv
);
2990 else if (rdma_protocol_iwarp(id
->device
, id
->port_num
))
2991 ret
= cma_resolve_iw_route(id_priv
);
3000 cma_comp_exch(id_priv
, RDMA_CM_ROUTE_QUERY
, RDMA_CM_ADDR_RESOLVED
);
3001 cma_deref_id(id_priv
);
3004 EXPORT_SYMBOL(rdma_resolve_route
);
3006 static void cma_set_loopback(struct sockaddr
*addr
)
3008 switch (addr
->sa_family
) {
3010 ((struct sockaddr_in
*) addr
)->sin_addr
.s_addr
= htonl(INADDR_LOOPBACK
);
3013 ipv6_addr_set(&((struct sockaddr_in6
*) addr
)->sin6_addr
,
3017 ib_addr_set(&((struct sockaddr_ib
*) addr
)->sib_addr
,
3023 static int cma_bind_loopback(struct rdma_id_private
*id_priv
)
3025 struct cma_device
*cma_dev
, *cur_dev
;
3027 enum ib_port_state port_state
;
3034 list_for_each_entry(cur_dev
, &dev_list
, list
) {
3035 if (cma_family(id_priv
) == AF_IB
&&
3036 !rdma_cap_ib_cm(cur_dev
->device
, 1))
3042 for (p
= 1; p
<= cur_dev
->device
->phys_port_cnt
; ++p
) {
3043 if (!ib_get_cached_port_state(cur_dev
->device
, p
, &port_state
) &&
3044 port_state
== IB_PORT_ACTIVE
) {
3059 ret
= rdma_query_gid(cma_dev
->device
, p
, 0, &gid
);
3063 ret
= ib_get_cached_pkey(cma_dev
->device
, p
, 0, &pkey
);
3067 id_priv
->id
.route
.addr
.dev_addr
.dev_type
=
3068 (rdma_protocol_ib(cma_dev
->device
, p
)) ?
3069 ARPHRD_INFINIBAND
: ARPHRD_ETHER
;
3071 rdma_addr_set_sgid(&id_priv
->id
.route
.addr
.dev_addr
, &gid
);
3072 ib_addr_set_pkey(&id_priv
->id
.route
.addr
.dev_addr
, pkey
);
3073 id_priv
->id
.port_num
= p
;
3074 cma_attach_to_dev(id_priv
, cma_dev
);
3075 cma_set_loopback(cma_src_addr(id_priv
));
3077 mutex_unlock(&lock
);
3081 static void addr_handler(int status
, struct sockaddr
*src_addr
,
3082 struct rdma_dev_addr
*dev_addr
, void *context
)
3084 struct rdma_id_private
*id_priv
= context
;
3085 struct rdma_cm_event event
= {};
3086 struct sockaddr
*addr
;
3087 struct sockaddr_storage old_addr
;
3089 mutex_lock(&id_priv
->handler_mutex
);
3090 if (!cma_comp_exch(id_priv
, RDMA_CM_ADDR_QUERY
,
3091 RDMA_CM_ADDR_RESOLVED
))
3095 * Store the previous src address, so that if we fail to acquire
3096 * matching rdma device, old address can be restored back, which helps
3097 * to cancel the cma listen operation correctly.
3099 addr
= cma_src_addr(id_priv
);
3100 memcpy(&old_addr
, addr
, rdma_addr_size(addr
));
3101 memcpy(addr
, src_addr
, rdma_addr_size(src_addr
));
3102 if (!status
&& !id_priv
->cma_dev
) {
3103 status
= cma_acquire_dev_by_src_ip(id_priv
);
3105 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
3107 } else if (status
) {
3108 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status
);
3112 memcpy(addr
, &old_addr
,
3113 rdma_addr_size((struct sockaddr
*)&old_addr
));
3114 if (!cma_comp_exch(id_priv
, RDMA_CM_ADDR_RESOLVED
,
3115 RDMA_CM_ADDR_BOUND
))
3117 event
.event
= RDMA_CM_EVENT_ADDR_ERROR
;
3118 event
.status
= status
;
3120 event
.event
= RDMA_CM_EVENT_ADDR_RESOLVED
;
3122 if (cma_cm_event_handler(id_priv
, &event
)) {
3123 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
3124 mutex_unlock(&id_priv
->handler_mutex
);
3125 rdma_destroy_id(&id_priv
->id
);
3129 mutex_unlock(&id_priv
->handler_mutex
);
3132 static int cma_resolve_loopback(struct rdma_id_private
*id_priv
)
3134 struct cma_work
*work
;
3138 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
3142 if (!id_priv
->cma_dev
) {
3143 ret
= cma_bind_loopback(id_priv
);
3148 rdma_addr_get_sgid(&id_priv
->id
.route
.addr
.dev_addr
, &gid
);
3149 rdma_addr_set_dgid(&id_priv
->id
.route
.addr
.dev_addr
, &gid
);
3151 atomic_inc(&id_priv
->refcount
);
3152 cma_init_resolve_addr_work(work
, id_priv
);
3153 queue_work(cma_wq
, &work
->work
);
3160 static int cma_resolve_ib_addr(struct rdma_id_private
*id_priv
)
3162 struct cma_work
*work
;
3165 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
3169 if (!id_priv
->cma_dev
) {
3170 ret
= cma_resolve_ib_dev(id_priv
);
3175 rdma_addr_set_dgid(&id_priv
->id
.route
.addr
.dev_addr
, (union ib_gid
*)
3176 &(((struct sockaddr_ib
*) &id_priv
->id
.route
.addr
.dst_addr
)->sib_addr
));
3178 atomic_inc(&id_priv
->refcount
);
3179 cma_init_resolve_addr_work(work
, id_priv
);
3180 queue_work(cma_wq
, &work
->work
);
3187 static int cma_bind_addr(struct rdma_cm_id
*id
, struct sockaddr
*src_addr
,
3188 const struct sockaddr
*dst_addr
)
3190 if (!src_addr
|| !src_addr
->sa_family
) {
3191 src_addr
= (struct sockaddr
*) &id
->route
.addr
.src_addr
;
3192 src_addr
->sa_family
= dst_addr
->sa_family
;
3193 if (IS_ENABLED(CONFIG_IPV6
) &&
3194 dst_addr
->sa_family
== AF_INET6
) {
3195 struct sockaddr_in6
*src_addr6
= (struct sockaddr_in6
*) src_addr
;
3196 struct sockaddr_in6
*dst_addr6
= (struct sockaddr_in6
*) dst_addr
;
3197 src_addr6
->sin6_scope_id
= dst_addr6
->sin6_scope_id
;
3198 if (ipv6_addr_type(&dst_addr6
->sin6_addr
) & IPV6_ADDR_LINKLOCAL
)
3199 id
->route
.addr
.dev_addr
.bound_dev_if
= dst_addr6
->sin6_scope_id
;
3200 } else if (dst_addr
->sa_family
== AF_IB
) {
3201 ((struct sockaddr_ib
*) src_addr
)->sib_pkey
=
3202 ((struct sockaddr_ib
*) dst_addr
)->sib_pkey
;
3205 return rdma_bind_addr(id
, src_addr
);
3208 int rdma_resolve_addr(struct rdma_cm_id
*id
, struct sockaddr
*src_addr
,
3209 const struct sockaddr
*dst_addr
, unsigned long timeout_ms
)
3211 struct rdma_id_private
*id_priv
;
3214 id_priv
= container_of(id
, struct rdma_id_private
, id
);
3215 if (id_priv
->state
== RDMA_CM_IDLE
) {
3216 ret
= cma_bind_addr(id
, src_addr
, dst_addr
);
3221 if (cma_family(id_priv
) != dst_addr
->sa_family
)
3224 if (!cma_comp_exch(id_priv
, RDMA_CM_ADDR_BOUND
, RDMA_CM_ADDR_QUERY
))
3227 memcpy(cma_dst_addr(id_priv
), dst_addr
, rdma_addr_size(dst_addr
));
3228 if (cma_any_addr(dst_addr
)) {
3229 ret
= cma_resolve_loopback(id_priv
);
3231 if (dst_addr
->sa_family
== AF_IB
) {
3232 ret
= cma_resolve_ib_addr(id_priv
);
3234 ret
= rdma_resolve_ip(cma_src_addr(id_priv
), dst_addr
,
3235 &id
->route
.addr
.dev_addr
,
3236 timeout_ms
, addr_handler
,
3245 cma_comp_exch(id_priv
, RDMA_CM_ADDR_QUERY
, RDMA_CM_ADDR_BOUND
);
3248 EXPORT_SYMBOL(rdma_resolve_addr
);
3250 int rdma_set_reuseaddr(struct rdma_cm_id
*id
, int reuse
)
3252 struct rdma_id_private
*id_priv
;
3253 unsigned long flags
;
3256 id_priv
= container_of(id
, struct rdma_id_private
, id
);
3257 spin_lock_irqsave(&id_priv
->lock
, flags
);
3258 if (reuse
|| id_priv
->state
== RDMA_CM_IDLE
) {
3259 id_priv
->reuseaddr
= reuse
;
3264 spin_unlock_irqrestore(&id_priv
->lock
, flags
);
3267 EXPORT_SYMBOL(rdma_set_reuseaddr
);
3269 int rdma_set_afonly(struct rdma_cm_id
*id
, int afonly
)
3271 struct rdma_id_private
*id_priv
;
3272 unsigned long flags
;
3275 id_priv
= container_of(id
, struct rdma_id_private
, id
);
3276 spin_lock_irqsave(&id_priv
->lock
, flags
);
3277 if (id_priv
->state
== RDMA_CM_IDLE
|| id_priv
->state
== RDMA_CM_ADDR_BOUND
) {
3278 id_priv
->options
|= (1 << CMA_OPTION_AFONLY
);
3279 id_priv
->afonly
= afonly
;
3284 spin_unlock_irqrestore(&id_priv
->lock
, flags
);
3287 EXPORT_SYMBOL(rdma_set_afonly
);
3289 static void cma_bind_port(struct rdma_bind_list
*bind_list
,
3290 struct rdma_id_private
*id_priv
)
3292 struct sockaddr
*addr
;
3293 struct sockaddr_ib
*sib
;
3297 addr
= cma_src_addr(id_priv
);
3298 port
= htons(bind_list
->port
);
3300 switch (addr
->sa_family
) {
3302 ((struct sockaddr_in
*) addr
)->sin_port
= port
;
3305 ((struct sockaddr_in6
*) addr
)->sin6_port
= port
;
3308 sib
= (struct sockaddr_ib
*) addr
;
3309 sid
= be64_to_cpu(sib
->sib_sid
);
3310 mask
= be64_to_cpu(sib
->sib_sid_mask
);
3311 sib
->sib_sid
= cpu_to_be64((sid
& mask
) | (u64
) ntohs(port
));
3312 sib
->sib_sid_mask
= cpu_to_be64(~0ULL);
3315 id_priv
->bind_list
= bind_list
;
3316 hlist_add_head(&id_priv
->node
, &bind_list
->owners
);
3319 static int cma_alloc_port(enum rdma_ucm_port_space ps
,
3320 struct rdma_id_private
*id_priv
, unsigned short snum
)
3322 struct rdma_bind_list
*bind_list
;
3325 bind_list
= kzalloc(sizeof *bind_list
, GFP_KERNEL
);
3329 ret
= cma_ps_alloc(id_priv
->id
.route
.addr
.dev_addr
.net
, ps
, bind_list
,
3335 bind_list
->port
= snum
;
3336 cma_bind_port(bind_list
, id_priv
);
3340 return ret
== -ENOSPC
? -EADDRNOTAVAIL
: ret
;
3343 static int cma_port_is_unique(struct rdma_bind_list
*bind_list
,
3344 struct rdma_id_private
*id_priv
)
3346 struct rdma_id_private
*cur_id
;
3347 struct sockaddr
*daddr
= cma_dst_addr(id_priv
);
3348 struct sockaddr
*saddr
= cma_src_addr(id_priv
);
3349 __be16 dport
= cma_port(daddr
);
3351 hlist_for_each_entry(cur_id
, &bind_list
->owners
, node
) {
3352 struct sockaddr
*cur_daddr
= cma_dst_addr(cur_id
);
3353 struct sockaddr
*cur_saddr
= cma_src_addr(cur_id
);
3354 __be16 cur_dport
= cma_port(cur_daddr
);
3356 if (id_priv
== cur_id
)
3359 /* different dest port -> unique */
3360 if (!cma_any_port(daddr
) &&
3361 !cma_any_port(cur_daddr
) &&
3362 (dport
!= cur_dport
))
3365 /* different src address -> unique */
3366 if (!cma_any_addr(saddr
) &&
3367 !cma_any_addr(cur_saddr
) &&
3368 cma_addr_cmp(saddr
, cur_saddr
))
3371 /* different dst address -> unique */
3372 if (!cma_any_addr(daddr
) &&
3373 !cma_any_addr(cur_daddr
) &&
3374 cma_addr_cmp(daddr
, cur_daddr
))
3377 return -EADDRNOTAVAIL
;
3382 static int cma_alloc_any_port(enum rdma_ucm_port_space ps
,
3383 struct rdma_id_private
*id_priv
)
3385 static unsigned int last_used_port
;
3386 int low
, high
, remaining
;
3388 struct net
*net
= id_priv
->id
.route
.addr
.dev_addr
.net
;
3390 inet_get_local_port_range(net
, &low
, &high
);
3391 remaining
= (high
- low
) + 1;
3392 rover
= prandom_u32() % remaining
+ low
;
3394 if (last_used_port
!= rover
) {
3395 struct rdma_bind_list
*bind_list
;
3398 bind_list
= cma_ps_find(net
, ps
, (unsigned short)rover
);
3401 ret
= cma_alloc_port(ps
, id_priv
, rover
);
3403 ret
= cma_port_is_unique(bind_list
, id_priv
);
3405 cma_bind_port(bind_list
, id_priv
);
3408 * Remember previously used port number in order to avoid
3409 * re-using same port immediately after it is closed.
3412 last_used_port
= rover
;
3413 if (ret
!= -EADDRNOTAVAIL
)
3418 if ((rover
< low
) || (rover
> high
))
3422 return -EADDRNOTAVAIL
;
3426 * Check that the requested port is available. This is called when trying to
3427 * bind to a specific port, or when trying to listen on a bound port. In
3428 * the latter case, the provided id_priv may already be on the bind_list, but
3429 * we still need to check that it's okay to start listening.
3431 static int cma_check_port(struct rdma_bind_list
*bind_list
,
3432 struct rdma_id_private
*id_priv
, uint8_t reuseaddr
)
3434 struct rdma_id_private
*cur_id
;
3435 struct sockaddr
*addr
, *cur_addr
;
3437 addr
= cma_src_addr(id_priv
);
3438 hlist_for_each_entry(cur_id
, &bind_list
->owners
, node
) {
3439 if (id_priv
== cur_id
)
3442 if ((cur_id
->state
!= RDMA_CM_LISTEN
) && reuseaddr
&&
3446 cur_addr
= cma_src_addr(cur_id
);
3447 if (id_priv
->afonly
&& cur_id
->afonly
&&
3448 (addr
->sa_family
!= cur_addr
->sa_family
))
3451 if (cma_any_addr(addr
) || cma_any_addr(cur_addr
))
3452 return -EADDRNOTAVAIL
;
3454 if (!cma_addr_cmp(addr
, cur_addr
))
3460 static int cma_use_port(enum rdma_ucm_port_space ps
,
3461 struct rdma_id_private
*id_priv
)
3463 struct rdma_bind_list
*bind_list
;
3464 unsigned short snum
;
3467 snum
= ntohs(cma_port(cma_src_addr(id_priv
)));
3468 if (snum
< PROT_SOCK
&& !capable(CAP_NET_BIND_SERVICE
))
3471 bind_list
= cma_ps_find(id_priv
->id
.route
.addr
.dev_addr
.net
, ps
, snum
);
3473 ret
= cma_alloc_port(ps
, id_priv
, snum
);
3475 ret
= cma_check_port(bind_list
, id_priv
, id_priv
->reuseaddr
);
3477 cma_bind_port(bind_list
, id_priv
);
3482 static int cma_bind_listen(struct rdma_id_private
*id_priv
)
3484 struct rdma_bind_list
*bind_list
= id_priv
->bind_list
;
3488 if (bind_list
->owners
.first
->next
)
3489 ret
= cma_check_port(bind_list
, id_priv
, 0);
3490 mutex_unlock(&lock
);
3494 static enum rdma_ucm_port_space
3495 cma_select_inet_ps(struct rdma_id_private
*id_priv
)
3497 switch (id_priv
->id
.ps
) {
3502 return id_priv
->id
.ps
;
3509 static enum rdma_ucm_port_space
3510 cma_select_ib_ps(struct rdma_id_private
*id_priv
)
3512 enum rdma_ucm_port_space ps
= 0;
3513 struct sockaddr_ib
*sib
;
3514 u64 sid_ps
, mask
, sid
;
3516 sib
= (struct sockaddr_ib
*) cma_src_addr(id_priv
);
3517 mask
= be64_to_cpu(sib
->sib_sid_mask
) & RDMA_IB_IP_PS_MASK
;
3518 sid
= be64_to_cpu(sib
->sib_sid
) & mask
;
3520 if ((id_priv
->id
.ps
== RDMA_PS_IB
) && (sid
== (RDMA_IB_IP_PS_IB
& mask
))) {
3521 sid_ps
= RDMA_IB_IP_PS_IB
;
3523 } else if (((id_priv
->id
.ps
== RDMA_PS_IB
) || (id_priv
->id
.ps
== RDMA_PS_TCP
)) &&
3524 (sid
== (RDMA_IB_IP_PS_TCP
& mask
))) {
3525 sid_ps
= RDMA_IB_IP_PS_TCP
;
3527 } else if (((id_priv
->id
.ps
== RDMA_PS_IB
) || (id_priv
->id
.ps
== RDMA_PS_UDP
)) &&
3528 (sid
== (RDMA_IB_IP_PS_UDP
& mask
))) {
3529 sid_ps
= RDMA_IB_IP_PS_UDP
;
3534 sib
->sib_sid
= cpu_to_be64(sid_ps
| ntohs(cma_port((struct sockaddr
*) sib
)));
3535 sib
->sib_sid_mask
= cpu_to_be64(RDMA_IB_IP_PS_MASK
|
3536 be64_to_cpu(sib
->sib_sid_mask
));
3541 static int cma_get_port(struct rdma_id_private
*id_priv
)
3543 enum rdma_ucm_port_space ps
;
3546 if (cma_family(id_priv
) != AF_IB
)
3547 ps
= cma_select_inet_ps(id_priv
);
3549 ps
= cma_select_ib_ps(id_priv
);
3551 return -EPROTONOSUPPORT
;
3554 if (cma_any_port(cma_src_addr(id_priv
)))
3555 ret
= cma_alloc_any_port(ps
, id_priv
);
3557 ret
= cma_use_port(ps
, id_priv
);
3558 mutex_unlock(&lock
);
3563 static int cma_check_linklocal(struct rdma_dev_addr
*dev_addr
,
3564 struct sockaddr
*addr
)
3566 #if IS_ENABLED(CONFIG_IPV6)
3567 struct sockaddr_in6
*sin6
;
3569 if (addr
->sa_family
!= AF_INET6
)
3572 sin6
= (struct sockaddr_in6
*) addr
;
3574 if (!(ipv6_addr_type(&sin6
->sin6_addr
) & IPV6_ADDR_LINKLOCAL
))
3577 if (!sin6
->sin6_scope_id
)
3580 dev_addr
->bound_dev_if
= sin6
->sin6_scope_id
;
3585 int rdma_listen(struct rdma_cm_id
*id
, int backlog
)
3587 struct rdma_id_private
*id_priv
;
3590 id_priv
= container_of(id
, struct rdma_id_private
, id
);
3591 if (id_priv
->state
== RDMA_CM_IDLE
) {
3592 id
->route
.addr
.src_addr
.ss_family
= AF_INET
;
3593 ret
= rdma_bind_addr(id
, cma_src_addr(id_priv
));
3598 if (!cma_comp_exch(id_priv
, RDMA_CM_ADDR_BOUND
, RDMA_CM_LISTEN
))
3601 if (id_priv
->reuseaddr
) {
3602 ret
= cma_bind_listen(id_priv
);
3607 id_priv
->backlog
= backlog
;
3609 if (rdma_cap_ib_cm(id
->device
, 1)) {
3610 ret
= cma_ib_listen(id_priv
);
3613 } else if (rdma_cap_iw_cm(id
->device
, 1)) {
3614 ret
= cma_iw_listen(id_priv
, backlog
);
3622 cma_listen_on_all(id_priv
);
3626 id_priv
->backlog
= 0;
3627 cma_comp_exch(id_priv
, RDMA_CM_LISTEN
, RDMA_CM_ADDR_BOUND
);
3630 EXPORT_SYMBOL(rdma_listen
);
3632 int rdma_bind_addr(struct rdma_cm_id
*id
, struct sockaddr
*addr
)
3634 struct rdma_id_private
*id_priv
;
3636 struct sockaddr
*daddr
;
3638 if (addr
->sa_family
!= AF_INET
&& addr
->sa_family
!= AF_INET6
&&
3639 addr
->sa_family
!= AF_IB
)
3640 return -EAFNOSUPPORT
;
3642 id_priv
= container_of(id
, struct rdma_id_private
, id
);
3643 if (!cma_comp_exch(id_priv
, RDMA_CM_IDLE
, RDMA_CM_ADDR_BOUND
))
3646 ret
= cma_check_linklocal(&id
->route
.addr
.dev_addr
, addr
);
3650 memcpy(cma_src_addr(id_priv
), addr
, rdma_addr_size(addr
));
3651 if (!cma_any_addr(addr
)) {
3652 ret
= cma_translate_addr(addr
, &id
->route
.addr
.dev_addr
);
3656 ret
= cma_acquire_dev_by_src_ip(id_priv
);
3661 if (!(id_priv
->options
& (1 << CMA_OPTION_AFONLY
))) {
3662 if (addr
->sa_family
== AF_INET
)
3663 id_priv
->afonly
= 1;
3664 #if IS_ENABLED(CONFIG_IPV6)
3665 else if (addr
->sa_family
== AF_INET6
) {
3666 struct net
*net
= id_priv
->id
.route
.addr
.dev_addr
.net
;
3668 id_priv
->afonly
= net
->ipv6
.sysctl
.bindv6only
;
3672 daddr
= cma_dst_addr(id_priv
);
3673 daddr
->sa_family
= addr
->sa_family
;
3675 ret
= cma_get_port(id_priv
);
3681 rdma_restrack_del(&id_priv
->res
);
3682 if (id_priv
->cma_dev
)
3683 cma_release_dev(id_priv
);
3685 cma_comp_exch(id_priv
, RDMA_CM_ADDR_BOUND
, RDMA_CM_IDLE
);
3688 EXPORT_SYMBOL(rdma_bind_addr
);
3690 static int cma_format_hdr(void *hdr
, struct rdma_id_private
*id_priv
)
3692 struct cma_hdr
*cma_hdr
;
3695 cma_hdr
->cma_version
= CMA_VERSION
;
3696 if (cma_family(id_priv
) == AF_INET
) {
3697 struct sockaddr_in
*src4
, *dst4
;
3699 src4
= (struct sockaddr_in
*) cma_src_addr(id_priv
);
3700 dst4
= (struct sockaddr_in
*) cma_dst_addr(id_priv
);
3702 cma_set_ip_ver(cma_hdr
, 4);
3703 cma_hdr
->src_addr
.ip4
.addr
= src4
->sin_addr
.s_addr
;
3704 cma_hdr
->dst_addr
.ip4
.addr
= dst4
->sin_addr
.s_addr
;
3705 cma_hdr
->port
= src4
->sin_port
;
3706 } else if (cma_family(id_priv
) == AF_INET6
) {
3707 struct sockaddr_in6
*src6
, *dst6
;
3709 src6
= (struct sockaddr_in6
*) cma_src_addr(id_priv
);
3710 dst6
= (struct sockaddr_in6
*) cma_dst_addr(id_priv
);
3712 cma_set_ip_ver(cma_hdr
, 6);
3713 cma_hdr
->src_addr
.ip6
= src6
->sin6_addr
;
3714 cma_hdr
->dst_addr
.ip6
= dst6
->sin6_addr
;
3715 cma_hdr
->port
= src6
->sin6_port
;
3720 static int cma_sidr_rep_handler(struct ib_cm_id
*cm_id
,
3721 const struct ib_cm_event
*ib_event
)
3723 struct rdma_id_private
*id_priv
= cm_id
->context
;
3724 struct rdma_cm_event event
= {};
3725 const struct ib_cm_sidr_rep_event_param
*rep
=
3726 &ib_event
->param
.sidr_rep_rcvd
;
3729 mutex_lock(&id_priv
->handler_mutex
);
3730 if (id_priv
->state
!= RDMA_CM_CONNECT
)
3733 switch (ib_event
->event
) {
3734 case IB_CM_SIDR_REQ_ERROR
:
3735 event
.event
= RDMA_CM_EVENT_UNREACHABLE
;
3736 event
.status
= -ETIMEDOUT
;
3738 case IB_CM_SIDR_REP_RECEIVED
:
3739 event
.param
.ud
.private_data
= ib_event
->private_data
;
3740 event
.param
.ud
.private_data_len
= IB_CM_SIDR_REP_PRIVATE_DATA_SIZE
;
3741 if (rep
->status
!= IB_SIDR_SUCCESS
) {
3742 event
.event
= RDMA_CM_EVENT_UNREACHABLE
;
3743 event
.status
= ib_event
->param
.sidr_rep_rcvd
.status
;
3744 pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
3748 ret
= cma_set_qkey(id_priv
, rep
->qkey
);
3750 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret
);
3751 event
.event
= RDMA_CM_EVENT_ADDR_ERROR
;
3755 ib_init_ah_attr_from_path(id_priv
->id
.device
,
3756 id_priv
->id
.port_num
,
3757 id_priv
->id
.route
.path_rec
,
3758 &event
.param
.ud
.ah_attr
,
3760 event
.param
.ud
.qp_num
= rep
->qpn
;
3761 event
.param
.ud
.qkey
= rep
->qkey
;
3762 event
.event
= RDMA_CM_EVENT_ESTABLISHED
;
3766 pr_err("RDMA CMA: unexpected IB CM event: %d\n",
3771 ret
= cma_cm_event_handler(id_priv
, &event
);
3773 rdma_destroy_ah_attr(&event
.param
.ud
.ah_attr
);
3775 /* Destroy the CM ID by returning a non-zero value. */
3776 id_priv
->cm_id
.ib
= NULL
;
3777 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
3778 mutex_unlock(&id_priv
->handler_mutex
);
3779 rdma_destroy_id(&id_priv
->id
);
3783 mutex_unlock(&id_priv
->handler_mutex
);
3787 static int cma_resolve_ib_udp(struct rdma_id_private
*id_priv
,
3788 struct rdma_conn_param
*conn_param
)
3790 struct ib_cm_sidr_req_param req
;
3791 struct ib_cm_id
*id
;
3796 memset(&req
, 0, sizeof req
);
3797 offset
= cma_user_data_offset(id_priv
);
3798 req
.private_data_len
= offset
+ conn_param
->private_data_len
;
3799 if (req
.private_data_len
< conn_param
->private_data_len
)
3802 if (req
.private_data_len
) {
3803 private_data
= kzalloc(req
.private_data_len
, GFP_ATOMIC
);
3807 private_data
= NULL
;
3810 if (conn_param
->private_data
&& conn_param
->private_data_len
)
3811 memcpy(private_data
+ offset
, conn_param
->private_data
,
3812 conn_param
->private_data_len
);
3815 ret
= cma_format_hdr(private_data
, id_priv
);
3818 req
.private_data
= private_data
;
3821 id
= ib_create_cm_id(id_priv
->id
.device
, cma_sidr_rep_handler
,
3827 id_priv
->cm_id
.ib
= id
;
3829 req
.path
= id_priv
->id
.route
.path_rec
;
3830 req
.sgid_attr
= id_priv
->id
.route
.addr
.dev_addr
.sgid_attr
;
3831 req
.service_id
= rdma_get_service_id(&id_priv
->id
, cma_dst_addr(id_priv
));
3832 req
.timeout_ms
= 1 << (CMA_CM_RESPONSE_TIMEOUT
- 8);
3833 req
.max_cm_retries
= CMA_MAX_CM_RETRIES
;
3835 trace_cm_send_sidr_req(id_priv
);
3836 ret
= ib_send_cm_sidr_req(id_priv
->cm_id
.ib
, &req
);
3838 ib_destroy_cm_id(id_priv
->cm_id
.ib
);
3839 id_priv
->cm_id
.ib
= NULL
;
3842 kfree(private_data
);
3846 static int cma_connect_ib(struct rdma_id_private
*id_priv
,
3847 struct rdma_conn_param
*conn_param
)
3849 struct ib_cm_req_param req
;
3850 struct rdma_route
*route
;
3852 struct ib_cm_id
*id
;
3856 memset(&req
, 0, sizeof req
);
3857 offset
= cma_user_data_offset(id_priv
);
3858 req
.private_data_len
= offset
+ conn_param
->private_data_len
;
3859 if (req
.private_data_len
< conn_param
->private_data_len
)
3862 if (req
.private_data_len
) {
3863 private_data
= kzalloc(req
.private_data_len
, GFP_ATOMIC
);
3867 private_data
= NULL
;
3870 if (conn_param
->private_data
&& conn_param
->private_data_len
)
3871 memcpy(private_data
+ offset
, conn_param
->private_data
,
3872 conn_param
->private_data_len
);
3874 id
= ib_create_cm_id(id_priv
->id
.device
, cma_ib_handler
, id_priv
);
3879 id_priv
->cm_id
.ib
= id
;
3881 route
= &id_priv
->id
.route
;
3883 ret
= cma_format_hdr(private_data
, id_priv
);
3886 req
.private_data
= private_data
;
3889 req
.primary_path
= &route
->path_rec
[0];
3890 if (route
->num_paths
== 2)
3891 req
.alternate_path
= &route
->path_rec
[1];
3893 req
.ppath_sgid_attr
= id_priv
->id
.route
.addr
.dev_addr
.sgid_attr
;
3894 /* Alternate path SGID attribute currently unsupported */
3895 req
.service_id
= rdma_get_service_id(&id_priv
->id
, cma_dst_addr(id_priv
));
3896 req
.qp_num
= id_priv
->qp_num
;
3897 req
.qp_type
= id_priv
->id
.qp_type
;
3898 req
.starting_psn
= id_priv
->seq_num
;
3899 req
.responder_resources
= conn_param
->responder_resources
;
3900 req
.initiator_depth
= conn_param
->initiator_depth
;
3901 req
.flow_control
= conn_param
->flow_control
;
3902 req
.retry_count
= min_t(u8
, 7, conn_param
->retry_count
);
3903 req
.rnr_retry_count
= min_t(u8
, 7, conn_param
->rnr_retry_count
);
3904 req
.remote_cm_response_timeout
= CMA_CM_RESPONSE_TIMEOUT
;
3905 req
.local_cm_response_timeout
= CMA_CM_RESPONSE_TIMEOUT
;
3906 req
.max_cm_retries
= CMA_MAX_CM_RETRIES
;
3907 req
.srq
= id_priv
->srq
? 1 : 0;
3909 trace_cm_send_req(id_priv
);
3910 ret
= ib_send_cm_req(id_priv
->cm_id
.ib
, &req
);
3912 if (ret
&& !IS_ERR(id
)) {
3913 ib_destroy_cm_id(id
);
3914 id_priv
->cm_id
.ib
= NULL
;
3917 kfree(private_data
);
3921 static int cma_connect_iw(struct rdma_id_private
*id_priv
,
3922 struct rdma_conn_param
*conn_param
)
3924 struct iw_cm_id
*cm_id
;
3926 struct iw_cm_conn_param iw_param
;
3928 cm_id
= iw_create_cm_id(id_priv
->id
.device
, cma_iw_handler
, id_priv
);
3930 return PTR_ERR(cm_id
);
3932 cm_id
->tos
= id_priv
->tos
;
3933 cm_id
->tos_set
= id_priv
->tos_set
;
3934 id_priv
->cm_id
.iw
= cm_id
;
3936 memcpy(&cm_id
->local_addr
, cma_src_addr(id_priv
),
3937 rdma_addr_size(cma_src_addr(id_priv
)));
3938 memcpy(&cm_id
->remote_addr
, cma_dst_addr(id_priv
),
3939 rdma_addr_size(cma_dst_addr(id_priv
)));
3941 ret
= cma_modify_qp_rtr(id_priv
, conn_param
);
3946 iw_param
.ord
= conn_param
->initiator_depth
;
3947 iw_param
.ird
= conn_param
->responder_resources
;
3948 iw_param
.private_data
= conn_param
->private_data
;
3949 iw_param
.private_data_len
= conn_param
->private_data_len
;
3950 iw_param
.qpn
= id_priv
->id
.qp
? id_priv
->qp_num
: conn_param
->qp_num
;
3952 memset(&iw_param
, 0, sizeof iw_param
);
3953 iw_param
.qpn
= id_priv
->qp_num
;
3955 ret
= iw_cm_connect(cm_id
, &iw_param
);
3958 iw_destroy_cm_id(cm_id
);
3959 id_priv
->cm_id
.iw
= NULL
;
3964 int rdma_connect(struct rdma_cm_id
*id
, struct rdma_conn_param
*conn_param
)
3966 struct rdma_id_private
*id_priv
;
3969 id_priv
= container_of(id
, struct rdma_id_private
, id
);
3970 if (!cma_comp_exch(id_priv
, RDMA_CM_ROUTE_RESOLVED
, RDMA_CM_CONNECT
))
3974 id_priv
->qp_num
= conn_param
->qp_num
;
3975 id_priv
->srq
= conn_param
->srq
;
3978 if (rdma_cap_ib_cm(id
->device
, id
->port_num
)) {
3979 if (id
->qp_type
== IB_QPT_UD
)
3980 ret
= cma_resolve_ib_udp(id_priv
, conn_param
);
3982 ret
= cma_connect_ib(id_priv
, conn_param
);
3983 } else if (rdma_cap_iw_cm(id
->device
, id
->port_num
))
3984 ret
= cma_connect_iw(id_priv
, conn_param
);
3992 cma_comp_exch(id_priv
, RDMA_CM_CONNECT
, RDMA_CM_ROUTE_RESOLVED
);
3995 EXPORT_SYMBOL(rdma_connect
);
3997 static int cma_accept_ib(struct rdma_id_private
*id_priv
,
3998 struct rdma_conn_param
*conn_param
)
4000 struct ib_cm_rep_param rep
;
4003 ret
= cma_modify_qp_rtr(id_priv
, conn_param
);
4007 ret
= cma_modify_qp_rts(id_priv
, conn_param
);
4011 memset(&rep
, 0, sizeof rep
);
4012 rep
.qp_num
= id_priv
->qp_num
;
4013 rep
.starting_psn
= id_priv
->seq_num
;
4014 rep
.private_data
= conn_param
->private_data
;
4015 rep
.private_data_len
= conn_param
->private_data_len
;
4016 rep
.responder_resources
= conn_param
->responder_resources
;
4017 rep
.initiator_depth
= conn_param
->initiator_depth
;
4018 rep
.failover_accepted
= 0;
4019 rep
.flow_control
= conn_param
->flow_control
;
4020 rep
.rnr_retry_count
= min_t(u8
, 7, conn_param
->rnr_retry_count
);
4021 rep
.srq
= id_priv
->srq
? 1 : 0;
4023 trace_cm_send_rep(id_priv
);
4024 ret
= ib_send_cm_rep(id_priv
->cm_id
.ib
, &rep
);
4029 static int cma_accept_iw(struct rdma_id_private
*id_priv
,
4030 struct rdma_conn_param
*conn_param
)
4032 struct iw_cm_conn_param iw_param
;
4038 ret
= cma_modify_qp_rtr(id_priv
, conn_param
);
4042 iw_param
.ord
= conn_param
->initiator_depth
;
4043 iw_param
.ird
= conn_param
->responder_resources
;
4044 iw_param
.private_data
= conn_param
->private_data
;
4045 iw_param
.private_data_len
= conn_param
->private_data_len
;
4046 if (id_priv
->id
.qp
) {
4047 iw_param
.qpn
= id_priv
->qp_num
;
4049 iw_param
.qpn
= conn_param
->qp_num
;
4051 return iw_cm_accept(id_priv
->cm_id
.iw
, &iw_param
);
4054 static int cma_send_sidr_rep(struct rdma_id_private
*id_priv
,
4055 enum ib_cm_sidr_status status
, u32 qkey
,
4056 const void *private_data
, int private_data_len
)
4058 struct ib_cm_sidr_rep_param rep
;
4061 memset(&rep
, 0, sizeof rep
);
4062 rep
.status
= status
;
4063 if (status
== IB_SIDR_SUCCESS
) {
4064 ret
= cma_set_qkey(id_priv
, qkey
);
4067 rep
.qp_num
= id_priv
->qp_num
;
4068 rep
.qkey
= id_priv
->qkey
;
4070 rep
.private_data
= private_data
;
4071 rep
.private_data_len
= private_data_len
;
4073 trace_cm_send_sidr_rep(id_priv
);
4074 return ib_send_cm_sidr_rep(id_priv
->cm_id
.ib
, &rep
);
4077 int __rdma_accept(struct rdma_cm_id
*id
, struct rdma_conn_param
*conn_param
,
4080 struct rdma_id_private
*id_priv
;
4083 id_priv
= container_of(id
, struct rdma_id_private
, id
);
4085 rdma_restrack_set_task(&id_priv
->res
, caller
);
4087 if (!cma_comp(id_priv
, RDMA_CM_CONNECT
))
4090 if (!id
->qp
&& conn_param
) {
4091 id_priv
->qp_num
= conn_param
->qp_num
;
4092 id_priv
->srq
= conn_param
->srq
;
4095 if (rdma_cap_ib_cm(id
->device
, id
->port_num
)) {
4096 if (id
->qp_type
== IB_QPT_UD
) {
4098 ret
= cma_send_sidr_rep(id_priv
, IB_SIDR_SUCCESS
,
4100 conn_param
->private_data
,
4101 conn_param
->private_data_len
);
4103 ret
= cma_send_sidr_rep(id_priv
, IB_SIDR_SUCCESS
,
4107 ret
= cma_accept_ib(id_priv
, conn_param
);
4109 ret
= cma_rep_recv(id_priv
);
4111 } else if (rdma_cap_iw_cm(id
->device
, id
->port_num
))
4112 ret
= cma_accept_iw(id_priv
, conn_param
);
4121 cma_modify_qp_err(id_priv
);
4122 rdma_reject(id
, NULL
, 0);
4125 EXPORT_SYMBOL(__rdma_accept
);
4127 int rdma_notify(struct rdma_cm_id
*id
, enum ib_event_type event
)
4129 struct rdma_id_private
*id_priv
;
4132 id_priv
= container_of(id
, struct rdma_id_private
, id
);
4133 if (!id_priv
->cm_id
.ib
)
4136 switch (id
->device
->node_type
) {
4137 case RDMA_NODE_IB_CA
:
4138 ret
= ib_cm_notify(id_priv
->cm_id
.ib
, event
);
4146 EXPORT_SYMBOL(rdma_notify
);
4148 int rdma_reject(struct rdma_cm_id
*id
, const void *private_data
,
4149 u8 private_data_len
)
4151 struct rdma_id_private
*id_priv
;
4154 id_priv
= container_of(id
, struct rdma_id_private
, id
);
4155 if (!id_priv
->cm_id
.ib
)
4158 if (rdma_cap_ib_cm(id
->device
, id
->port_num
)) {
4159 if (id
->qp_type
== IB_QPT_UD
) {
4160 ret
= cma_send_sidr_rep(id_priv
, IB_SIDR_REJECT
, 0,
4161 private_data
, private_data_len
);
4163 trace_cm_send_rej(id_priv
);
4164 ret
= ib_send_cm_rej(id_priv
->cm_id
.ib
,
4165 IB_CM_REJ_CONSUMER_DEFINED
, NULL
,
4166 0, private_data
, private_data_len
);
4168 } else if (rdma_cap_iw_cm(id
->device
, id
->port_num
)) {
4169 ret
= iw_cm_reject(id_priv
->cm_id
.iw
,
4170 private_data
, private_data_len
);
4176 EXPORT_SYMBOL(rdma_reject
);
4178 int rdma_disconnect(struct rdma_cm_id
*id
)
4180 struct rdma_id_private
*id_priv
;
4183 id_priv
= container_of(id
, struct rdma_id_private
, id
);
4184 if (!id_priv
->cm_id
.ib
)
4187 if (rdma_cap_ib_cm(id
->device
, id
->port_num
)) {
4188 ret
= cma_modify_qp_err(id_priv
);
4191 /* Initiate or respond to a disconnect. */
4192 trace_cm_disconnect(id_priv
);
4193 if (ib_send_cm_dreq(id_priv
->cm_id
.ib
, NULL
, 0)) {
4194 if (!ib_send_cm_drep(id_priv
->cm_id
.ib
, NULL
, 0))
4195 trace_cm_sent_drep(id_priv
);
4197 trace_cm_sent_dreq(id_priv
);
4199 } else if (rdma_cap_iw_cm(id
->device
, id
->port_num
)) {
4200 ret
= iw_cm_disconnect(id_priv
->cm_id
.iw
, 0);
4207 EXPORT_SYMBOL(rdma_disconnect
);
4209 static int cma_ib_mc_handler(int status
, struct ib_sa_multicast
*multicast
)
4211 struct rdma_id_private
*id_priv
;
4212 struct cma_multicast
*mc
= multicast
->context
;
4213 struct rdma_cm_event event
= {};
4216 id_priv
= mc
->id_priv
;
4217 mutex_lock(&id_priv
->handler_mutex
);
4218 if (id_priv
->state
!= RDMA_CM_ADDR_BOUND
&&
4219 id_priv
->state
!= RDMA_CM_ADDR_RESOLVED
)
4223 status
= cma_set_qkey(id_priv
, be32_to_cpu(multicast
->rec
.qkey
));
4225 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
4227 mutex_lock(&id_priv
->qp_mutex
);
4228 if (!status
&& id_priv
->id
.qp
) {
4229 status
= ib_attach_mcast(id_priv
->id
.qp
, &multicast
->rec
.mgid
,
4230 be16_to_cpu(multicast
->rec
.mlid
));
4232 pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
4235 mutex_unlock(&id_priv
->qp_mutex
);
4237 event
.status
= status
;
4238 event
.param
.ud
.private_data
= mc
->context
;
4240 struct rdma_dev_addr
*dev_addr
=
4241 &id_priv
->id
.route
.addr
.dev_addr
;
4242 struct net_device
*ndev
=
4243 dev_get_by_index(dev_addr
->net
, dev_addr
->bound_dev_if
);
4244 enum ib_gid_type gid_type
=
4245 id_priv
->cma_dev
->default_gid_type
[id_priv
->id
.port_num
-
4246 rdma_start_port(id_priv
->cma_dev
->device
)];
4248 event
.event
= RDMA_CM_EVENT_MULTICAST_JOIN
;
4249 ret
= ib_init_ah_from_mcmember(id_priv
->id
.device
,
4250 id_priv
->id
.port_num
,
4253 &event
.param
.ud
.ah_attr
);
4255 event
.event
= RDMA_CM_EVENT_MULTICAST_ERROR
;
4257 event
.param
.ud
.qp_num
= 0xFFFFFF;
4258 event
.param
.ud
.qkey
= be32_to_cpu(multicast
->rec
.qkey
);
4262 event
.event
= RDMA_CM_EVENT_MULTICAST_ERROR
;
4264 ret
= cma_cm_event_handler(id_priv
, &event
);
4266 rdma_destroy_ah_attr(&event
.param
.ud
.ah_attr
);
4268 cma_exch(id_priv
, RDMA_CM_DESTROYING
);
4269 mutex_unlock(&id_priv
->handler_mutex
);
4270 rdma_destroy_id(&id_priv
->id
);
4275 mutex_unlock(&id_priv
->handler_mutex
);
4279 static void cma_set_mgid(struct rdma_id_private
*id_priv
,
4280 struct sockaddr
*addr
, union ib_gid
*mgid
)
4282 unsigned char mc_map
[MAX_ADDR_LEN
];
4283 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
4284 struct sockaddr_in
*sin
= (struct sockaddr_in
*) addr
;
4285 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*) addr
;
4287 if (cma_any_addr(addr
)) {
4288 memset(mgid
, 0, sizeof *mgid
);
4289 } else if ((addr
->sa_family
== AF_INET6
) &&
4290 ((be32_to_cpu(sin6
->sin6_addr
.s6_addr32
[0]) & 0xFFF0FFFF) ==
4292 /* IPv6 address is an SA assigned MGID. */
4293 memcpy(mgid
, &sin6
->sin6_addr
, sizeof *mgid
);
4294 } else if (addr
->sa_family
== AF_IB
) {
4295 memcpy(mgid
, &((struct sockaddr_ib
*) addr
)->sib_addr
, sizeof *mgid
);
4296 } else if (addr
->sa_family
== AF_INET6
) {
4297 ipv6_ib_mc_map(&sin6
->sin6_addr
, dev_addr
->broadcast
, mc_map
);
4298 if (id_priv
->id
.ps
== RDMA_PS_UDP
)
4299 mc_map
[7] = 0x01; /* Use RDMA CM signature */
4300 *mgid
= *(union ib_gid
*) (mc_map
+ 4);
4302 ip_ib_mc_map(sin
->sin_addr
.s_addr
, dev_addr
->broadcast
, mc_map
);
4303 if (id_priv
->id
.ps
== RDMA_PS_UDP
)
4304 mc_map
[7] = 0x01; /* Use RDMA CM signature */
4305 *mgid
= *(union ib_gid
*) (mc_map
+ 4);
4309 static int cma_join_ib_multicast(struct rdma_id_private
*id_priv
,
4310 struct cma_multicast
*mc
)
4312 struct ib_sa_mcmember_rec rec
;
4313 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
4314 ib_sa_comp_mask comp_mask
;
4317 ib_addr_get_mgid(dev_addr
, &rec
.mgid
);
4318 ret
= ib_sa_get_mcmember_rec(id_priv
->id
.device
, id_priv
->id
.port_num
,
4323 ret
= cma_set_qkey(id_priv
, 0);
4327 cma_set_mgid(id_priv
, (struct sockaddr
*) &mc
->addr
, &rec
.mgid
);
4328 rec
.qkey
= cpu_to_be32(id_priv
->qkey
);
4329 rdma_addr_get_sgid(dev_addr
, &rec
.port_gid
);
4330 rec
.pkey
= cpu_to_be16(ib_addr_get_pkey(dev_addr
));
4331 rec
.join_state
= mc
->join_state
;
4333 if ((rec
.join_state
== BIT(SENDONLY_FULLMEMBER_JOIN
)) &&
4334 (!ib_sa_sendonly_fullmem_support(&sa_client
,
4336 id_priv
->id
.port_num
))) {
4338 &id_priv
->id
.device
->dev
,
4339 "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
4340 id_priv
->id
.port_num
);
4344 comp_mask
= IB_SA_MCMEMBER_REC_MGID
| IB_SA_MCMEMBER_REC_PORT_GID
|
4345 IB_SA_MCMEMBER_REC_PKEY
| IB_SA_MCMEMBER_REC_JOIN_STATE
|
4346 IB_SA_MCMEMBER_REC_QKEY
| IB_SA_MCMEMBER_REC_SL
|
4347 IB_SA_MCMEMBER_REC_FLOW_LABEL
|
4348 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS
;
4350 if (id_priv
->id
.ps
== RDMA_PS_IPOIB
)
4351 comp_mask
|= IB_SA_MCMEMBER_REC_RATE
|
4352 IB_SA_MCMEMBER_REC_RATE_SELECTOR
|
4353 IB_SA_MCMEMBER_REC_MTU_SELECTOR
|
4354 IB_SA_MCMEMBER_REC_MTU
|
4355 IB_SA_MCMEMBER_REC_HOP_LIMIT
;
4357 mc
->multicast
.ib
= ib_sa_join_multicast(&sa_client
, id_priv
->id
.device
,
4358 id_priv
->id
.port_num
, &rec
,
4359 comp_mask
, GFP_KERNEL
,
4360 cma_ib_mc_handler
, mc
);
4361 return PTR_ERR_OR_ZERO(mc
->multicast
.ib
);
4364 static void iboe_mcast_work_handler(struct work_struct
*work
)
4366 struct iboe_mcast_work
*mw
= container_of(work
, struct iboe_mcast_work
, work
);
4367 struct cma_multicast
*mc
= mw
->mc
;
4368 struct ib_sa_multicast
*m
= mc
->multicast
.ib
;
4370 mc
->multicast
.ib
->context
= mc
;
4371 cma_ib_mc_handler(0, m
);
4372 kref_put(&mc
->mcref
, release_mc
);
4376 static void cma_iboe_set_mgid(struct sockaddr
*addr
, union ib_gid
*mgid
,
4377 enum ib_gid_type gid_type
)
4379 struct sockaddr_in
*sin
= (struct sockaddr_in
*)addr
;
4380 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)addr
;
4382 if (cma_any_addr(addr
)) {
4383 memset(mgid
, 0, sizeof *mgid
);
4384 } else if (addr
->sa_family
== AF_INET6
) {
4385 memcpy(mgid
, &sin6
->sin6_addr
, sizeof *mgid
);
4388 (gid_type
== IB_GID_TYPE_ROCE_UDP_ENCAP
) ? 0 : 0xff;
4390 (gid_type
== IB_GID_TYPE_ROCE_UDP_ENCAP
) ? 0 : 0x0e;
4399 mgid
->raw
[10] = 0xff;
4400 mgid
->raw
[11] = 0xff;
4401 *(__be32
*)(&mgid
->raw
[12]) = sin
->sin_addr
.s_addr
;
4405 static int cma_iboe_join_multicast(struct rdma_id_private
*id_priv
,
4406 struct cma_multicast
*mc
)
4408 struct iboe_mcast_work
*work
;
4409 struct rdma_dev_addr
*dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
4411 struct sockaddr
*addr
= (struct sockaddr
*)&mc
->addr
;
4412 struct net_device
*ndev
= NULL
;
4413 enum ib_gid_type gid_type
;
4416 send_only
= mc
->join_state
== BIT(SENDONLY_FULLMEMBER_JOIN
);
4418 if (cma_zero_addr((struct sockaddr
*)&mc
->addr
))
4421 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
4425 mc
->multicast
.ib
= kzalloc(sizeof(struct ib_sa_multicast
), GFP_KERNEL
);
4426 if (!mc
->multicast
.ib
) {
4431 gid_type
= id_priv
->cma_dev
->default_gid_type
[id_priv
->id
.port_num
-
4432 rdma_start_port(id_priv
->cma_dev
->device
)];
4433 cma_iboe_set_mgid(addr
, &mc
->multicast
.ib
->rec
.mgid
, gid_type
);
4435 mc
->multicast
.ib
->rec
.pkey
= cpu_to_be16(0xffff);
4436 if (id_priv
->id
.ps
== RDMA_PS_UDP
)
4437 mc
->multicast
.ib
->rec
.qkey
= cpu_to_be32(RDMA_UDP_QKEY
);
4439 if (dev_addr
->bound_dev_if
)
4440 ndev
= dev_get_by_index(dev_addr
->net
, dev_addr
->bound_dev_if
);
4445 mc
->multicast
.ib
->rec
.rate
= iboe_get_rate(ndev
);
4446 mc
->multicast
.ib
->rec
.hop_limit
= 1;
4447 mc
->multicast
.ib
->rec
.mtu
= iboe_get_mtu(ndev
->mtu
);
4449 if (addr
->sa_family
== AF_INET
) {
4450 if (gid_type
== IB_GID_TYPE_ROCE_UDP_ENCAP
) {
4451 mc
->multicast
.ib
->rec
.hop_limit
= IPV6_DEFAULT_HOPLIMIT
;
4453 err
= cma_igmp_send(ndev
, &mc
->multicast
.ib
->rec
.mgid
,
4458 if (gid_type
== IB_GID_TYPE_ROCE_UDP_ENCAP
)
4462 if (err
|| !mc
->multicast
.ib
->rec
.mtu
) {
4467 rdma_ip2gid((struct sockaddr
*)&id_priv
->id
.route
.addr
.src_addr
,
4468 &mc
->multicast
.ib
->rec
.port_gid
);
4471 INIT_WORK(&work
->work
, iboe_mcast_work_handler
);
4472 kref_get(&mc
->mcref
);
4473 queue_work(cma_wq
, &work
->work
);
4478 kfree(mc
->multicast
.ib
);
4484 int rdma_join_multicast(struct rdma_cm_id
*id
, struct sockaddr
*addr
,
4485 u8 join_state
, void *context
)
4487 struct rdma_id_private
*id_priv
;
4488 struct cma_multicast
*mc
;
4494 id_priv
= container_of(id
, struct rdma_id_private
, id
);
4495 if (!cma_comp(id_priv
, RDMA_CM_ADDR_BOUND
) &&
4496 !cma_comp(id_priv
, RDMA_CM_ADDR_RESOLVED
))
4499 mc
= kmalloc(sizeof *mc
, GFP_KERNEL
);
4503 memcpy(&mc
->addr
, addr
, rdma_addr_size(addr
));
4504 mc
->context
= context
;
4505 mc
->id_priv
= id_priv
;
4506 mc
->join_state
= join_state
;
4508 if (rdma_protocol_roce(id
->device
, id
->port_num
)) {
4509 kref_init(&mc
->mcref
);
4510 ret
= cma_iboe_join_multicast(id_priv
, mc
);
4513 } else if (rdma_cap_ib_mcast(id
->device
, id
->port_num
)) {
4514 ret
= cma_join_ib_multicast(id_priv
, mc
);
4522 spin_lock(&id_priv
->lock
);
4523 list_add(&mc
->list
, &id_priv
->mc_list
);
4524 spin_unlock(&id_priv
->lock
);
4531 EXPORT_SYMBOL(rdma_join_multicast
);
4533 void rdma_leave_multicast(struct rdma_cm_id
*id
, struct sockaddr
*addr
)
4535 struct rdma_id_private
*id_priv
;
4536 struct cma_multicast
*mc
;
4538 id_priv
= container_of(id
, struct rdma_id_private
, id
);
4539 spin_lock_irq(&id_priv
->lock
);
4540 list_for_each_entry(mc
, &id_priv
->mc_list
, list
) {
4541 if (!memcmp(&mc
->addr
, addr
, rdma_addr_size(addr
))) {
4542 list_del(&mc
->list
);
4543 spin_unlock_irq(&id_priv
->lock
);
4546 ib_detach_mcast(id
->qp
,
4547 &mc
->multicast
.ib
->rec
.mgid
,
4548 be16_to_cpu(mc
->multicast
.ib
->rec
.mlid
));
4550 BUG_ON(id_priv
->cma_dev
->device
!= id
->device
);
4552 if (rdma_cap_ib_mcast(id
->device
, id
->port_num
)) {
4553 ib_sa_free_multicast(mc
->multicast
.ib
);
4555 } else if (rdma_protocol_roce(id
->device
, id
->port_num
)) {
4556 cma_leave_roce_mc_group(id_priv
, mc
);
4561 spin_unlock_irq(&id_priv
->lock
);
4563 EXPORT_SYMBOL(rdma_leave_multicast
);
4565 static int cma_netdev_change(struct net_device
*ndev
, struct rdma_id_private
*id_priv
)
4567 struct rdma_dev_addr
*dev_addr
;
4568 struct cma_ndev_work
*work
;
4570 dev_addr
= &id_priv
->id
.route
.addr
.dev_addr
;
4572 if ((dev_addr
->bound_dev_if
== ndev
->ifindex
) &&
4573 (net_eq(dev_net(ndev
), dev_addr
->net
)) &&
4574 memcmp(dev_addr
->src_dev_addr
, ndev
->dev_addr
, ndev
->addr_len
)) {
4575 pr_info("RDMA CM addr change for ndev %s used by id %p\n",
4576 ndev
->name
, &id_priv
->id
);
4577 work
= kzalloc(sizeof *work
, GFP_KERNEL
);
4581 INIT_WORK(&work
->work
, cma_ndev_work_handler
);
4583 work
->event
.event
= RDMA_CM_EVENT_ADDR_CHANGE
;
4584 atomic_inc(&id_priv
->refcount
);
4585 queue_work(cma_wq
, &work
->work
);
4591 static int cma_netdev_callback(struct notifier_block
*self
, unsigned long event
,
4594 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
4595 struct cma_device
*cma_dev
;
4596 struct rdma_id_private
*id_priv
;
4597 int ret
= NOTIFY_DONE
;
4599 if (event
!= NETDEV_BONDING_FAILOVER
)
4602 if (!netif_is_bond_master(ndev
))
4606 list_for_each_entry(cma_dev
, &dev_list
, list
)
4607 list_for_each_entry(id_priv
, &cma_dev
->id_list
, list
) {
4608 ret
= cma_netdev_change(ndev
, id_priv
);
4614 mutex_unlock(&lock
);
4618 static struct notifier_block cma_nb
= {
4619 .notifier_call
= cma_netdev_callback
4622 static void cma_add_one(struct ib_device
*device
)
4624 struct cma_device
*cma_dev
;
4625 struct rdma_id_private
*id_priv
;
4627 unsigned long supported_gids
= 0;
4629 cma_dev
= kmalloc(sizeof *cma_dev
, GFP_KERNEL
);
4633 cma_dev
->device
= device
;
4634 cma_dev
->default_gid_type
= kcalloc(device
->phys_port_cnt
,
4635 sizeof(*cma_dev
->default_gid_type
),
4637 if (!cma_dev
->default_gid_type
)
4640 cma_dev
->default_roce_tos
= kcalloc(device
->phys_port_cnt
,
4641 sizeof(*cma_dev
->default_roce_tos
),
4643 if (!cma_dev
->default_roce_tos
)
4646 rdma_for_each_port (device
, i
) {
4647 supported_gids
= roce_gid_type_mask_support(device
, i
);
4648 WARN_ON(!supported_gids
);
4649 if (supported_gids
& (1 << CMA_PREFERRED_ROCE_GID_TYPE
))
4650 cma_dev
->default_gid_type
[i
- rdma_start_port(device
)] =
4651 CMA_PREFERRED_ROCE_GID_TYPE
;
4653 cma_dev
->default_gid_type
[i
- rdma_start_port(device
)] =
4654 find_first_bit(&supported_gids
, BITS_PER_LONG
);
4655 cma_dev
->default_roce_tos
[i
- rdma_start_port(device
)] = 0;
4658 init_completion(&cma_dev
->comp
);
4659 atomic_set(&cma_dev
->refcount
, 1);
4660 INIT_LIST_HEAD(&cma_dev
->id_list
);
4661 ib_set_client_data(device
, &cma_client
, cma_dev
);
4664 list_add_tail(&cma_dev
->list
, &dev_list
);
4665 list_for_each_entry(id_priv
, &listen_any_list
, list
)
4666 cma_listen_on_dev(id_priv
, cma_dev
);
4667 mutex_unlock(&lock
);
4669 trace_cm_add_one(device
);
4673 kfree(cma_dev
->default_gid_type
);
4681 static int cma_remove_id_dev(struct rdma_id_private
*id_priv
)
4683 struct rdma_cm_event event
= {};
4684 enum rdma_cm_state state
;
4687 /* Record that we want to remove the device */
4688 state
= cma_exch(id_priv
, RDMA_CM_DEVICE_REMOVAL
);
4689 if (state
== RDMA_CM_DESTROYING
)
4692 cma_cancel_operation(id_priv
, state
);
4693 mutex_lock(&id_priv
->handler_mutex
);
4695 /* Check for destruction from another callback. */
4696 if (!cma_comp(id_priv
, RDMA_CM_DEVICE_REMOVAL
))
4699 event
.event
= RDMA_CM_EVENT_DEVICE_REMOVAL
;
4700 ret
= cma_cm_event_handler(id_priv
, &event
);
4702 mutex_unlock(&id_priv
->handler_mutex
);
4706 static void cma_process_remove(struct cma_device
*cma_dev
)
4708 struct rdma_id_private
*id_priv
;
4712 while (!list_empty(&cma_dev
->id_list
)) {
4713 id_priv
= list_entry(cma_dev
->id_list
.next
,
4714 struct rdma_id_private
, list
);
4716 list_del(&id_priv
->listen_list
);
4717 list_del_init(&id_priv
->list
);
4718 atomic_inc(&id_priv
->refcount
);
4719 mutex_unlock(&lock
);
4721 ret
= id_priv
->internal_id
? 1 : cma_remove_id_dev(id_priv
);
4722 cma_deref_id(id_priv
);
4724 rdma_destroy_id(&id_priv
->id
);
4728 mutex_unlock(&lock
);
4730 cma_deref_dev(cma_dev
);
4731 wait_for_completion(&cma_dev
->comp
);
4734 static void cma_remove_one(struct ib_device
*device
, void *client_data
)
4736 struct cma_device
*cma_dev
= client_data
;
4738 trace_cm_remove_one(device
);
4744 list_del(&cma_dev
->list
);
4745 mutex_unlock(&lock
);
4747 cma_process_remove(cma_dev
);
4748 kfree(cma_dev
->default_roce_tos
);
4749 kfree(cma_dev
->default_gid_type
);
4753 static int cma_init_net(struct net
*net
)
4755 struct cma_pernet
*pernet
= cma_pernet(net
);
4757 xa_init(&pernet
->tcp_ps
);
4758 xa_init(&pernet
->udp_ps
);
4759 xa_init(&pernet
->ipoib_ps
);
4760 xa_init(&pernet
->ib_ps
);
4765 static void cma_exit_net(struct net
*net
)
4767 struct cma_pernet
*pernet
= cma_pernet(net
);
4769 WARN_ON(!xa_empty(&pernet
->tcp_ps
));
4770 WARN_ON(!xa_empty(&pernet
->udp_ps
));
4771 WARN_ON(!xa_empty(&pernet
->ipoib_ps
));
4772 WARN_ON(!xa_empty(&pernet
->ib_ps
));
4775 static struct pernet_operations cma_pernet_operations
= {
4776 .init
= cma_init_net
,
4777 .exit
= cma_exit_net
,
4778 .id
= &cma_pernet_id
,
4779 .size
= sizeof(struct cma_pernet
),
4782 static int __init
cma_init(void)
4786 cma_wq
= alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM
);
4790 ret
= register_pernet_subsys(&cma_pernet_operations
);
4794 ib_sa_register_client(&sa_client
);
4795 register_netdevice_notifier(&cma_nb
);
4797 ret
= ib_register_client(&cma_client
);
4801 ret
= cma_configfs_init();
4808 ib_unregister_client(&cma_client
);
4810 unregister_netdevice_notifier(&cma_nb
);
4811 ib_sa_unregister_client(&sa_client
);
4812 unregister_pernet_subsys(&cma_pernet_operations
);
4814 destroy_workqueue(cma_wq
);
4818 static void __exit
cma_cleanup(void)
4820 cma_configfs_exit();
4821 ib_unregister_client(&cma_client
);
4822 unregister_netdevice_notifier(&cma_nb
);
4823 ib_sa_unregister_client(&sa_client
);
4824 unregister_pernet_subsys(&cma_pernet_operations
);
4825 destroy_workqueue(cma_wq
);
4828 module_init(cma_init
);
4829 module_exit(cma_cleanup
);