2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/completion.h>
34 #include <linux/file.h>
35 #include <linux/mutex.h>
36 #include <linux/poll.h>
37 #include <linux/sched.h>
38 #include <linux/idr.h>
40 #include <linux/in6.h>
41 #include <linux/miscdevice.h>
42 #include <linux/slab.h>
43 #include <linux/sysctl.h>
44 #include <linux/module.h>
45 #include <linux/nsproxy.h>
47 #include <linux/nospec.h>
49 #include <rdma/rdma_user_cm.h>
50 #include <rdma/ib_marshall.h>
51 #include <rdma/rdma_cm.h>
52 #include <rdma/rdma_cm_ib.h>
53 #include <rdma/ib_addr.h>
55 #include <rdma/ib_cm.h>
56 #include <rdma/rdma_netlink.h>
57 #include "core_priv.h"
59 MODULE_AUTHOR("Sean Hefty");
60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
61 MODULE_LICENSE("Dual BSD/GPL");
63 static unsigned int max_backlog
= 1024;
65 static struct ctl_table_header
*ucma_ctl_table_hdr
;
66 static struct ctl_table ucma_ctl_table
[] = {
68 .procname
= "max_backlog",
70 .maxlen
= sizeof max_backlog
,
72 .proc_handler
= proc_dointvec
,
79 struct list_head ctx_list
;
80 struct list_head event_list
;
81 wait_queue_head_t poll_wait
;
86 struct completion comp
;
91 struct ucma_file
*file
;
92 struct rdma_cm_id
*cm_id
;
96 struct list_head list
;
97 struct list_head mc_list
;
98 struct work_struct close_work
;
101 struct ucma_multicast
{
102 struct ucma_context
*ctx
;
108 struct list_head list
;
109 struct sockaddr_storage addr
;
113 struct ucma_context
*ctx
;
114 struct ucma_context
*conn_req_ctx
;
115 struct ucma_multicast
*mc
;
116 struct list_head list
;
117 struct rdma_ucm_event_resp resp
;
120 static DEFINE_XARRAY_ALLOC(ctx_table
);
121 static DEFINE_XARRAY_ALLOC(multicast_table
);
123 static const struct file_operations ucma_fops
;
124 static int ucma_destroy_private_ctx(struct ucma_context
*ctx
);
126 static inline struct ucma_context
*_ucma_find_context(int id
,
127 struct ucma_file
*file
)
129 struct ucma_context
*ctx
;
131 ctx
= xa_load(&ctx_table
, id
);
133 ctx
= ERR_PTR(-ENOENT
);
134 else if (ctx
->file
!= file
)
135 ctx
= ERR_PTR(-EINVAL
);
139 static struct ucma_context
*ucma_get_ctx(struct ucma_file
*file
, int id
)
141 struct ucma_context
*ctx
;
144 ctx
= _ucma_find_context(id
, file
);
146 if (!refcount_inc_not_zero(&ctx
->ref
))
147 ctx
= ERR_PTR(-ENXIO
);
148 xa_unlock(&ctx_table
);
152 static void ucma_put_ctx(struct ucma_context
*ctx
)
154 if (refcount_dec_and_test(&ctx
->ref
))
155 complete(&ctx
->comp
);
159 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
162 static struct ucma_context
*ucma_get_ctx_dev(struct ucma_file
*file
, int id
)
164 struct ucma_context
*ctx
= ucma_get_ctx(file
, id
);
168 if (!ctx
->cm_id
->device
) {
170 return ERR_PTR(-EINVAL
);
175 static void ucma_close_id(struct work_struct
*work
)
177 struct ucma_context
*ctx
= container_of(work
, struct ucma_context
, close_work
);
179 /* once all inflight tasks are finished, we close all underlying
180 * resources. The context is still alive till its explicit destryoing
181 * by its creator. This puts back the xarray's reference.
184 wait_for_completion(&ctx
->comp
);
185 /* No new events will be generated after destroying the id. */
186 rdma_destroy_id(ctx
->cm_id
);
188 /* Reading the cm_id without holding a positive ref is not allowed */
192 static struct ucma_context
*ucma_alloc_ctx(struct ucma_file
*file
)
194 struct ucma_context
*ctx
;
196 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
200 INIT_WORK(&ctx
->close_work
, ucma_close_id
);
201 init_completion(&ctx
->comp
);
202 INIT_LIST_HEAD(&ctx
->mc_list
);
203 /* So list_del() will work if we don't do ucma_finish_ctx() */
204 INIT_LIST_HEAD(&ctx
->list
);
206 mutex_init(&ctx
->mutex
);
208 if (xa_alloc(&ctx_table
, &ctx
->id
, NULL
, xa_limit_32b
, GFP_KERNEL
)) {
215 static void ucma_set_ctx_cm_id(struct ucma_context
*ctx
,
216 struct rdma_cm_id
*cm_id
)
218 refcount_set(&ctx
->ref
, 1);
222 static void ucma_finish_ctx(struct ucma_context
*ctx
)
224 lockdep_assert_held(&ctx
->file
->mut
);
225 list_add_tail(&ctx
->list
, &ctx
->file
->ctx_list
);
226 xa_store(&ctx_table
, ctx
->id
, ctx
, GFP_KERNEL
);
229 static void ucma_copy_conn_event(struct rdma_ucm_conn_param
*dst
,
230 struct rdma_conn_param
*src
)
232 if (src
->private_data_len
)
233 memcpy(dst
->private_data
, src
->private_data
,
234 src
->private_data_len
);
235 dst
->private_data_len
= src
->private_data_len
;
236 dst
->responder_resources
= src
->responder_resources
;
237 dst
->initiator_depth
= src
->initiator_depth
;
238 dst
->flow_control
= src
->flow_control
;
239 dst
->retry_count
= src
->retry_count
;
240 dst
->rnr_retry_count
= src
->rnr_retry_count
;
242 dst
->qp_num
= src
->qp_num
;
245 static void ucma_copy_ud_event(struct ib_device
*device
,
246 struct rdma_ucm_ud_param
*dst
,
247 struct rdma_ud_param
*src
)
249 if (src
->private_data_len
)
250 memcpy(dst
->private_data
, src
->private_data
,
251 src
->private_data_len
);
252 dst
->private_data_len
= src
->private_data_len
;
253 ib_copy_ah_attr_to_user(device
, &dst
->ah_attr
, &src
->ah_attr
);
254 dst
->qp_num
= src
->qp_num
;
255 dst
->qkey
= src
->qkey
;
258 static struct ucma_event
*ucma_create_uevent(struct ucma_context
*ctx
,
259 struct rdma_cm_event
*event
)
261 struct ucma_event
*uevent
;
263 uevent
= kzalloc(sizeof(*uevent
), GFP_KERNEL
);
268 switch (event
->event
) {
269 case RDMA_CM_EVENT_MULTICAST_JOIN
:
270 case RDMA_CM_EVENT_MULTICAST_ERROR
:
271 uevent
->mc
= (struct ucma_multicast
*)
272 event
->param
.ud
.private_data
;
273 uevent
->resp
.uid
= uevent
->mc
->uid
;
274 uevent
->resp
.id
= uevent
->mc
->id
;
277 uevent
->resp
.uid
= ctx
->uid
;
278 uevent
->resp
.id
= ctx
->id
;
281 uevent
->resp
.event
= event
->event
;
282 uevent
->resp
.status
= event
->status
;
283 if (ctx
->cm_id
->qp_type
== IB_QPT_UD
)
284 ucma_copy_ud_event(ctx
->cm_id
->device
, &uevent
->resp
.param
.ud
,
287 ucma_copy_conn_event(&uevent
->resp
.param
.conn
,
290 uevent
->resp
.ece
.vendor_id
= event
->ece
.vendor_id
;
291 uevent
->resp
.ece
.attr_mod
= event
->ece
.attr_mod
;
295 static int ucma_connect_event_handler(struct rdma_cm_id
*cm_id
,
296 struct rdma_cm_event
*event
)
298 struct ucma_context
*listen_ctx
= cm_id
->context
;
299 struct ucma_context
*ctx
;
300 struct ucma_event
*uevent
;
302 if (!atomic_add_unless(&listen_ctx
->backlog
, -1, 0))
304 ctx
= ucma_alloc_ctx(listen_ctx
->file
);
307 ucma_set_ctx_cm_id(ctx
, cm_id
);
309 uevent
= ucma_create_uevent(listen_ctx
, event
);
312 uevent
->conn_req_ctx
= ctx
;
313 uevent
->resp
.id
= ctx
->id
;
315 ctx
->cm_id
->context
= ctx
;
317 mutex_lock(&ctx
->file
->mut
);
318 ucma_finish_ctx(ctx
);
319 list_add_tail(&uevent
->list
, &ctx
->file
->event_list
);
320 mutex_unlock(&ctx
->file
->mut
);
321 wake_up_interruptible(&ctx
->file
->poll_wait
);
325 ucma_destroy_private_ctx(ctx
);
327 atomic_inc(&listen_ctx
->backlog
);
328 /* Returning error causes the new ID to be destroyed */
332 static int ucma_event_handler(struct rdma_cm_id
*cm_id
,
333 struct rdma_cm_event
*event
)
335 struct ucma_event
*uevent
;
336 struct ucma_context
*ctx
= cm_id
->context
;
338 if (event
->event
== RDMA_CM_EVENT_CONNECT_REQUEST
)
339 return ucma_connect_event_handler(cm_id
, event
);
342 * We ignore events for new connections until userspace has set their
343 * context. This can only happen if an error occurs on a new connection
344 * before the user accepts it. This is okay, since the accept will just
345 * fail later. However, we do need to release the underlying HW
346 * resources in case of a device removal event.
349 uevent
= ucma_create_uevent(ctx
, event
);
353 mutex_lock(&ctx
->file
->mut
);
354 list_add_tail(&uevent
->list
, &ctx
->file
->event_list
);
355 mutex_unlock(&ctx
->file
->mut
);
356 wake_up_interruptible(&ctx
->file
->poll_wait
);
359 if (event
->event
== RDMA_CM_EVENT_DEVICE_REMOVAL
) {
361 if (xa_load(&ctx_table
, ctx
->id
) == ctx
)
362 queue_work(system_unbound_wq
, &ctx
->close_work
);
363 xa_unlock(&ctx_table
);
368 static ssize_t
ucma_get_event(struct ucma_file
*file
, const char __user
*inbuf
,
369 int in_len
, int out_len
)
371 struct rdma_ucm_get_event cmd
;
372 struct ucma_event
*uevent
;
375 * Old 32 bit user space does not send the 4 byte padding in the
376 * reserved field. We don't care, allow it to keep working.
378 if (out_len
< sizeof(uevent
->resp
) - sizeof(uevent
->resp
.reserved
) -
379 sizeof(uevent
->resp
.ece
))
382 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
385 mutex_lock(&file
->mut
);
386 while (list_empty(&file
->event_list
)) {
387 mutex_unlock(&file
->mut
);
389 if (file
->filp
->f_flags
& O_NONBLOCK
)
392 if (wait_event_interruptible(file
->poll_wait
,
393 !list_empty(&file
->event_list
)))
396 mutex_lock(&file
->mut
);
399 uevent
= list_first_entry(&file
->event_list
, struct ucma_event
, list
);
401 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
403 min_t(size_t, out_len
, sizeof(uevent
->resp
)))) {
404 mutex_unlock(&file
->mut
);
408 list_del(&uevent
->list
);
409 uevent
->ctx
->events_reported
++;
411 uevent
->mc
->events_reported
++;
412 if (uevent
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
)
413 atomic_inc(&uevent
->ctx
->backlog
);
414 mutex_unlock(&file
->mut
);
420 static int ucma_get_qp_type(struct rdma_ucm_create_id
*cmd
, enum ib_qp_type
*qp_type
)
424 *qp_type
= IB_QPT_RC
;
428 *qp_type
= IB_QPT_UD
;
431 *qp_type
= cmd
->qp_type
;
438 static ssize_t
ucma_create_id(struct ucma_file
*file
, const char __user
*inbuf
,
439 int in_len
, int out_len
)
441 struct rdma_ucm_create_id cmd
;
442 struct rdma_ucm_create_id_resp resp
;
443 struct ucma_context
*ctx
;
444 struct rdma_cm_id
*cm_id
;
445 enum ib_qp_type qp_type
;
448 if (out_len
< sizeof(resp
))
451 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
454 ret
= ucma_get_qp_type(&cmd
, &qp_type
);
458 ctx
= ucma_alloc_ctx(file
);
463 cm_id
= rdma_create_user_id(ucma_event_handler
, ctx
, cmd
.ps
, qp_type
);
465 ret
= PTR_ERR(cm_id
);
468 ucma_set_ctx_cm_id(ctx
, cm_id
);
471 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
472 &resp
, sizeof(resp
))) {
477 mutex_lock(&file
->mut
);
478 ucma_finish_ctx(ctx
);
479 mutex_unlock(&file
->mut
);
483 ucma_destroy_private_ctx(ctx
);
487 static void ucma_cleanup_multicast(struct ucma_context
*ctx
)
489 struct ucma_multicast
*mc
, *tmp
;
491 xa_lock(&multicast_table
);
492 list_for_each_entry_safe(mc
, tmp
, &ctx
->mc_list
, list
) {
495 * At this point mc->ctx->ref is 0 so the mc cannot leave the
496 * lock on the reader and this is enough serialization
498 __xa_erase(&multicast_table
, mc
->id
);
501 xa_unlock(&multicast_table
);
504 static void ucma_cleanup_mc_events(struct ucma_multicast
*mc
)
506 struct ucma_event
*uevent
, *tmp
;
508 rdma_lock_handler(mc
->ctx
->cm_id
);
509 mutex_lock(&mc
->ctx
->file
->mut
);
510 list_for_each_entry_safe(uevent
, tmp
, &mc
->ctx
->file
->event_list
, list
) {
511 if (uevent
->mc
!= mc
)
514 list_del(&uevent
->list
);
517 mutex_unlock(&mc
->ctx
->file
->mut
);
518 rdma_unlock_handler(mc
->ctx
->cm_id
);
521 static int ucma_cleanup_ctx_events(struct ucma_context
*ctx
)
524 struct ucma_event
*uevent
, *tmp
;
527 /* Cleanup events not yet reported to the user.*/
528 mutex_lock(&ctx
->file
->mut
);
529 list_for_each_entry_safe(uevent
, tmp
, &ctx
->file
->event_list
, list
) {
530 if (uevent
->ctx
!= ctx
)
533 if (uevent
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
&&
534 xa_cmpxchg(&ctx_table
, uevent
->conn_req_ctx
->id
,
535 uevent
->conn_req_ctx
, XA_ZERO_ENTRY
,
536 GFP_KERNEL
) == uevent
->conn_req_ctx
) {
537 list_move_tail(&uevent
->list
, &list
);
540 list_del(&uevent
->list
);
543 list_del(&ctx
->list
);
544 events_reported
= ctx
->events_reported
;
545 mutex_unlock(&ctx
->file
->mut
);
548 * If this was a listening ID then any connections spawned from it that
549 * have not been delivered to userspace are cleaned up too. Must be done
552 list_for_each_entry_safe(uevent
, tmp
, &list
, list
) {
553 ucma_destroy_private_ctx(uevent
->conn_req_ctx
);
556 return events_reported
;
560 * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
561 * the ctx is not public to the user). This either because:
562 * - ucma_finish_ctx() hasn't been called
563 * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
565 static int ucma_destroy_private_ctx(struct ucma_context
*ctx
)
570 * Destroy the underlying cm_id. New work queuing is prevented now by
571 * the removal from the xarray. Once the work is cancled ref will either
572 * be 0 because the work ran to completion and consumed the ref from the
573 * xarray, or it will be positive because we still have the ref from the
574 * xarray. This can also be 0 in cases where cm_id was never set
576 cancel_work_sync(&ctx
->close_work
);
577 if (refcount_read(&ctx
->ref
))
578 ucma_close_id(&ctx
->close_work
);
580 events_reported
= ucma_cleanup_ctx_events(ctx
);
581 ucma_cleanup_multicast(ctx
);
583 WARN_ON(xa_cmpxchg(&ctx_table
, ctx
->id
, XA_ZERO_ENTRY
, NULL
,
584 GFP_KERNEL
) != NULL
);
585 mutex_destroy(&ctx
->mutex
);
587 return events_reported
;
590 static ssize_t
ucma_destroy_id(struct ucma_file
*file
, const char __user
*inbuf
,
591 int in_len
, int out_len
)
593 struct rdma_ucm_destroy_id cmd
;
594 struct rdma_ucm_destroy_id_resp resp
;
595 struct ucma_context
*ctx
;
598 if (out_len
< sizeof(resp
))
601 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
605 ctx
= _ucma_find_context(cmd
.id
, file
);
607 if (__xa_cmpxchg(&ctx_table
, ctx
->id
, ctx
, XA_ZERO_ENTRY
,
609 ctx
= ERR_PTR(-ENOENT
);
611 xa_unlock(&ctx_table
);
616 resp
.events_reported
= ucma_destroy_private_ctx(ctx
);
617 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
618 &resp
, sizeof(resp
)))
624 static ssize_t
ucma_bind_ip(struct ucma_file
*file
, const char __user
*inbuf
,
625 int in_len
, int out_len
)
627 struct rdma_ucm_bind_ip cmd
;
628 struct ucma_context
*ctx
;
631 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
634 if (!rdma_addr_size_in6(&cmd
.addr
))
637 ctx
= ucma_get_ctx(file
, cmd
.id
);
641 mutex_lock(&ctx
->mutex
);
642 ret
= rdma_bind_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.addr
);
643 mutex_unlock(&ctx
->mutex
);
649 static ssize_t
ucma_bind(struct ucma_file
*file
, const char __user
*inbuf
,
650 int in_len
, int out_len
)
652 struct rdma_ucm_bind cmd
;
653 struct ucma_context
*ctx
;
656 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
659 if (cmd
.reserved
|| !cmd
.addr_size
||
660 cmd
.addr_size
!= rdma_addr_size_kss(&cmd
.addr
))
663 ctx
= ucma_get_ctx(file
, cmd
.id
);
667 mutex_lock(&ctx
->mutex
);
668 ret
= rdma_bind_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.addr
);
669 mutex_unlock(&ctx
->mutex
);
674 static ssize_t
ucma_resolve_ip(struct ucma_file
*file
,
675 const char __user
*inbuf
,
676 int in_len
, int out_len
)
678 struct rdma_ucm_resolve_ip cmd
;
679 struct ucma_context
*ctx
;
682 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
685 if ((cmd
.src_addr
.sin6_family
&& !rdma_addr_size_in6(&cmd
.src_addr
)) ||
686 !rdma_addr_size_in6(&cmd
.dst_addr
))
689 ctx
= ucma_get_ctx(file
, cmd
.id
);
693 mutex_lock(&ctx
->mutex
);
694 ret
= rdma_resolve_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.src_addr
,
695 (struct sockaddr
*) &cmd
.dst_addr
, cmd
.timeout_ms
);
696 mutex_unlock(&ctx
->mutex
);
701 static ssize_t
ucma_resolve_addr(struct ucma_file
*file
,
702 const char __user
*inbuf
,
703 int in_len
, int out_len
)
705 struct rdma_ucm_resolve_addr cmd
;
706 struct ucma_context
*ctx
;
709 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
713 (cmd
.src_size
&& (cmd
.src_size
!= rdma_addr_size_kss(&cmd
.src_addr
))) ||
714 !cmd
.dst_size
|| (cmd
.dst_size
!= rdma_addr_size_kss(&cmd
.dst_addr
)))
717 ctx
= ucma_get_ctx(file
, cmd
.id
);
721 mutex_lock(&ctx
->mutex
);
722 ret
= rdma_resolve_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.src_addr
,
723 (struct sockaddr
*) &cmd
.dst_addr
, cmd
.timeout_ms
);
724 mutex_unlock(&ctx
->mutex
);
729 static ssize_t
ucma_resolve_route(struct ucma_file
*file
,
730 const char __user
*inbuf
,
731 int in_len
, int out_len
)
733 struct rdma_ucm_resolve_route cmd
;
734 struct ucma_context
*ctx
;
737 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
740 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
744 mutex_lock(&ctx
->mutex
);
745 ret
= rdma_resolve_route(ctx
->cm_id
, cmd
.timeout_ms
);
746 mutex_unlock(&ctx
->mutex
);
751 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp
*resp
,
752 struct rdma_route
*route
)
754 struct rdma_dev_addr
*dev_addr
;
756 resp
->num_paths
= route
->num_pri_alt_paths
;
757 switch (route
->num_pri_alt_paths
) {
759 dev_addr
= &route
->addr
.dev_addr
;
760 rdma_addr_get_dgid(dev_addr
,
761 (union ib_gid
*) &resp
->ib_route
[0].dgid
);
762 rdma_addr_get_sgid(dev_addr
,
763 (union ib_gid
*) &resp
->ib_route
[0].sgid
);
764 resp
->ib_route
[0].pkey
= cpu_to_be16(ib_addr_get_pkey(dev_addr
));
767 ib_copy_path_rec_to_user(&resp
->ib_route
[1],
768 &route
->path_rec
[1]);
771 ib_copy_path_rec_to_user(&resp
->ib_route
[0],
772 &route
->path_rec
[0]);
779 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp
*resp
,
780 struct rdma_route
*route
)
783 resp
->num_paths
= route
->num_pri_alt_paths
;
784 switch (route
->num_pri_alt_paths
) {
786 rdma_ip2gid((struct sockaddr
*)&route
->addr
.dst_addr
,
787 (union ib_gid
*)&resp
->ib_route
[0].dgid
);
788 rdma_ip2gid((struct sockaddr
*)&route
->addr
.src_addr
,
789 (union ib_gid
*)&resp
->ib_route
[0].sgid
);
790 resp
->ib_route
[0].pkey
= cpu_to_be16(0xffff);
793 ib_copy_path_rec_to_user(&resp
->ib_route
[1],
794 &route
->path_rec
[1]);
797 ib_copy_path_rec_to_user(&resp
->ib_route
[0],
798 &route
->path_rec
[0]);
805 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp
*resp
,
806 struct rdma_route
*route
)
808 struct rdma_dev_addr
*dev_addr
;
810 dev_addr
= &route
->addr
.dev_addr
;
811 rdma_addr_get_dgid(dev_addr
, (union ib_gid
*) &resp
->ib_route
[0].dgid
);
812 rdma_addr_get_sgid(dev_addr
, (union ib_gid
*) &resp
->ib_route
[0].sgid
);
815 static ssize_t
ucma_query_route(struct ucma_file
*file
,
816 const char __user
*inbuf
,
817 int in_len
, int out_len
)
819 struct rdma_ucm_query cmd
;
820 struct rdma_ucm_query_route_resp resp
;
821 struct ucma_context
*ctx
;
822 struct sockaddr
*addr
;
825 if (out_len
< offsetof(struct rdma_ucm_query_route_resp
, ibdev_index
))
828 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
831 ctx
= ucma_get_ctx(file
, cmd
.id
);
835 mutex_lock(&ctx
->mutex
);
836 memset(&resp
, 0, sizeof resp
);
837 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.src_addr
;
838 memcpy(&resp
.src_addr
, addr
, addr
->sa_family
== AF_INET
?
839 sizeof(struct sockaddr_in
) :
840 sizeof(struct sockaddr_in6
));
841 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.dst_addr
;
842 memcpy(&resp
.dst_addr
, addr
, addr
->sa_family
== AF_INET
?
843 sizeof(struct sockaddr_in
) :
844 sizeof(struct sockaddr_in6
));
845 if (!ctx
->cm_id
->device
)
848 resp
.node_guid
= (__force __u64
) ctx
->cm_id
->device
->node_guid
;
849 resp
.ibdev_index
= ctx
->cm_id
->device
->index
;
850 resp
.port_num
= ctx
->cm_id
->port_num
;
852 if (rdma_cap_ib_sa(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
853 ucma_copy_ib_route(&resp
, &ctx
->cm_id
->route
);
854 else if (rdma_protocol_roce(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
855 ucma_copy_iboe_route(&resp
, &ctx
->cm_id
->route
);
856 else if (rdma_protocol_iwarp(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
857 ucma_copy_iw_route(&resp
, &ctx
->cm_id
->route
);
860 mutex_unlock(&ctx
->mutex
);
861 if (copy_to_user(u64_to_user_ptr(cmd
.response
), &resp
,
862 min_t(size_t, out_len
, sizeof(resp
))))
869 static void ucma_query_device_addr(struct rdma_cm_id
*cm_id
,
870 struct rdma_ucm_query_addr_resp
*resp
)
875 resp
->node_guid
= (__force __u64
) cm_id
->device
->node_guid
;
876 resp
->ibdev_index
= cm_id
->device
->index
;
877 resp
->port_num
= cm_id
->port_num
;
878 resp
->pkey
= (__force __u16
) cpu_to_be16(
879 ib_addr_get_pkey(&cm_id
->route
.addr
.dev_addr
));
882 static ssize_t
ucma_query_addr(struct ucma_context
*ctx
,
883 void __user
*response
, int out_len
)
885 struct rdma_ucm_query_addr_resp resp
;
886 struct sockaddr
*addr
;
889 if (out_len
< offsetof(struct rdma_ucm_query_addr_resp
, ibdev_index
))
892 memset(&resp
, 0, sizeof resp
);
894 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.src_addr
;
895 resp
.src_size
= rdma_addr_size(addr
);
896 memcpy(&resp
.src_addr
, addr
, resp
.src_size
);
898 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.dst_addr
;
899 resp
.dst_size
= rdma_addr_size(addr
);
900 memcpy(&resp
.dst_addr
, addr
, resp
.dst_size
);
902 ucma_query_device_addr(ctx
->cm_id
, &resp
);
904 if (copy_to_user(response
, &resp
, min_t(size_t, out_len
, sizeof(resp
))))
910 static ssize_t
ucma_query_path(struct ucma_context
*ctx
,
911 void __user
*response
, int out_len
)
913 struct rdma_ucm_query_path_resp
*resp
;
916 if (out_len
< sizeof(*resp
))
919 resp
= kzalloc(out_len
, GFP_KERNEL
);
923 resp
->num_paths
= ctx
->cm_id
->route
.num_pri_alt_paths
;
924 for (i
= 0, out_len
-= sizeof(*resp
);
925 i
< resp
->num_paths
&& out_len
> sizeof(struct ib_path_rec_data
);
926 i
++, out_len
-= sizeof(struct ib_path_rec_data
)) {
927 struct sa_path_rec
*rec
= &ctx
->cm_id
->route
.path_rec
[i
];
929 resp
->path_data
[i
].flags
= IB_PATH_GMP
| IB_PATH_PRIMARY
|
930 IB_PATH_BIDIRECTIONAL
;
931 if (rec
->rec_type
== SA_PATH_REC_TYPE_OPA
) {
932 struct sa_path_rec ib
;
934 sa_convert_path_opa_to_ib(&ib
, rec
);
935 ib_sa_pack_path(&ib
, &resp
->path_data
[i
].path_rec
);
938 ib_sa_pack_path(rec
, &resp
->path_data
[i
].path_rec
);
942 if (copy_to_user(response
, resp
, struct_size(resp
, path_data
, i
)))
949 static ssize_t
ucma_query_gid(struct ucma_context
*ctx
,
950 void __user
*response
, int out_len
)
952 struct rdma_ucm_query_addr_resp resp
;
953 struct sockaddr_ib
*addr
;
956 if (out_len
< offsetof(struct rdma_ucm_query_addr_resp
, ibdev_index
))
959 memset(&resp
, 0, sizeof resp
);
961 ucma_query_device_addr(ctx
->cm_id
, &resp
);
963 addr
= (struct sockaddr_ib
*) &resp
.src_addr
;
964 resp
.src_size
= sizeof(*addr
);
965 if (ctx
->cm_id
->route
.addr
.src_addr
.ss_family
== AF_IB
) {
966 memcpy(addr
, &ctx
->cm_id
->route
.addr
.src_addr
, resp
.src_size
);
968 addr
->sib_family
= AF_IB
;
969 addr
->sib_pkey
= (__force __be16
) resp
.pkey
;
970 rdma_read_gids(ctx
->cm_id
, (union ib_gid
*)&addr
->sib_addr
,
972 addr
->sib_sid
= rdma_get_service_id(ctx
->cm_id
, (struct sockaddr
*)
973 &ctx
->cm_id
->route
.addr
.src_addr
);
976 addr
= (struct sockaddr_ib
*) &resp
.dst_addr
;
977 resp
.dst_size
= sizeof(*addr
);
978 if (ctx
->cm_id
->route
.addr
.dst_addr
.ss_family
== AF_IB
) {
979 memcpy(addr
, &ctx
->cm_id
->route
.addr
.dst_addr
, resp
.dst_size
);
981 addr
->sib_family
= AF_IB
;
982 addr
->sib_pkey
= (__force __be16
) resp
.pkey
;
983 rdma_read_gids(ctx
->cm_id
, NULL
,
984 (union ib_gid
*)&addr
->sib_addr
);
985 addr
->sib_sid
= rdma_get_service_id(ctx
->cm_id
, (struct sockaddr
*)
986 &ctx
->cm_id
->route
.addr
.dst_addr
);
989 if (copy_to_user(response
, &resp
, min_t(size_t, out_len
, sizeof(resp
))))
995 static ssize_t
ucma_query(struct ucma_file
*file
,
996 const char __user
*inbuf
,
997 int in_len
, int out_len
)
999 struct rdma_ucm_query cmd
;
1000 struct ucma_context
*ctx
;
1001 void __user
*response
;
1004 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1007 response
= u64_to_user_ptr(cmd
.response
);
1008 ctx
= ucma_get_ctx(file
, cmd
.id
);
1010 return PTR_ERR(ctx
);
1012 mutex_lock(&ctx
->mutex
);
1013 switch (cmd
.option
) {
1014 case RDMA_USER_CM_QUERY_ADDR
:
1015 ret
= ucma_query_addr(ctx
, response
, out_len
);
1017 case RDMA_USER_CM_QUERY_PATH
:
1018 ret
= ucma_query_path(ctx
, response
, out_len
);
1020 case RDMA_USER_CM_QUERY_GID
:
1021 ret
= ucma_query_gid(ctx
, response
, out_len
);
1027 mutex_unlock(&ctx
->mutex
);
1033 static void ucma_copy_conn_param(struct rdma_cm_id
*id
,
1034 struct rdma_conn_param
*dst
,
1035 struct rdma_ucm_conn_param
*src
)
1037 dst
->private_data
= src
->private_data
;
1038 dst
->private_data_len
= src
->private_data_len
;
1039 dst
->responder_resources
= src
->responder_resources
;
1040 dst
->initiator_depth
= src
->initiator_depth
;
1041 dst
->flow_control
= src
->flow_control
;
1042 dst
->retry_count
= src
->retry_count
;
1043 dst
->rnr_retry_count
= src
->rnr_retry_count
;
1044 dst
->srq
= src
->srq
;
1045 dst
->qp_num
= src
->qp_num
& 0xFFFFFF;
1046 dst
->qkey
= (id
->route
.addr
.src_addr
.ss_family
== AF_IB
) ? src
->qkey
: 0;
1049 static ssize_t
ucma_connect(struct ucma_file
*file
, const char __user
*inbuf
,
1050 int in_len
, int out_len
)
1052 struct rdma_conn_param conn_param
;
1053 struct rdma_ucm_ece ece
= {};
1054 struct rdma_ucm_connect cmd
;
1055 struct ucma_context
*ctx
;
1059 if (in_len
< offsetofend(typeof(cmd
), reserved
))
1061 in_size
= min_t(size_t, in_len
, sizeof(cmd
));
1062 if (copy_from_user(&cmd
, inbuf
, in_size
))
1065 if (!cmd
.conn_param
.valid
)
1068 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1070 return PTR_ERR(ctx
);
1072 ucma_copy_conn_param(ctx
->cm_id
, &conn_param
, &cmd
.conn_param
);
1073 if (offsetofend(typeof(cmd
), ece
) <= in_size
) {
1074 ece
.vendor_id
= cmd
.ece
.vendor_id
;
1075 ece
.attr_mod
= cmd
.ece
.attr_mod
;
1078 mutex_lock(&ctx
->mutex
);
1079 ret
= rdma_connect_ece(ctx
->cm_id
, &conn_param
, &ece
);
1080 mutex_unlock(&ctx
->mutex
);
1085 static ssize_t
ucma_listen(struct ucma_file
*file
, const char __user
*inbuf
,
1086 int in_len
, int out_len
)
1088 struct rdma_ucm_listen cmd
;
1089 struct ucma_context
*ctx
;
1092 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1095 ctx
= ucma_get_ctx(file
, cmd
.id
);
1097 return PTR_ERR(ctx
);
1099 if (cmd
.backlog
<= 0 || cmd
.backlog
> max_backlog
)
1100 cmd
.backlog
= max_backlog
;
1101 atomic_set(&ctx
->backlog
, cmd
.backlog
);
1103 mutex_lock(&ctx
->mutex
);
1104 ret
= rdma_listen(ctx
->cm_id
, cmd
.backlog
);
1105 mutex_unlock(&ctx
->mutex
);
1110 static ssize_t
ucma_accept(struct ucma_file
*file
, const char __user
*inbuf
,
1111 int in_len
, int out_len
)
1113 struct rdma_ucm_accept cmd
;
1114 struct rdma_conn_param conn_param
;
1115 struct rdma_ucm_ece ece
= {};
1116 struct ucma_context
*ctx
;
1120 if (in_len
< offsetofend(typeof(cmd
), reserved
))
1122 in_size
= min_t(size_t, in_len
, sizeof(cmd
));
1123 if (copy_from_user(&cmd
, inbuf
, in_size
))
1126 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1128 return PTR_ERR(ctx
);
1130 if (offsetofend(typeof(cmd
), ece
) <= in_size
) {
1131 ece
.vendor_id
= cmd
.ece
.vendor_id
;
1132 ece
.attr_mod
= cmd
.ece
.attr_mod
;
1135 if (cmd
.conn_param
.valid
) {
1136 ucma_copy_conn_param(ctx
->cm_id
, &conn_param
, &cmd
.conn_param
);
1137 mutex_lock(&ctx
->mutex
);
1138 rdma_lock_handler(ctx
->cm_id
);
1139 ret
= rdma_accept_ece(ctx
->cm_id
, &conn_param
, &ece
);
1141 /* The uid must be set atomically with the handler */
1144 rdma_unlock_handler(ctx
->cm_id
);
1145 mutex_unlock(&ctx
->mutex
);
1147 mutex_lock(&ctx
->mutex
);
1148 rdma_lock_handler(ctx
->cm_id
);
1149 ret
= rdma_accept_ece(ctx
->cm_id
, NULL
, &ece
);
1150 rdma_unlock_handler(ctx
->cm_id
);
1151 mutex_unlock(&ctx
->mutex
);
1157 static ssize_t
ucma_reject(struct ucma_file
*file
, const char __user
*inbuf
,
1158 int in_len
, int out_len
)
1160 struct rdma_ucm_reject cmd
;
1161 struct ucma_context
*ctx
;
1164 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1168 cmd
.reason
= IB_CM_REJ_CONSUMER_DEFINED
;
1170 switch (cmd
.reason
) {
1171 case IB_CM_REJ_CONSUMER_DEFINED
:
1172 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED
:
1178 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1180 return PTR_ERR(ctx
);
1182 mutex_lock(&ctx
->mutex
);
1183 ret
= rdma_reject(ctx
->cm_id
, cmd
.private_data
, cmd
.private_data_len
,
1185 mutex_unlock(&ctx
->mutex
);
1190 static ssize_t
ucma_disconnect(struct ucma_file
*file
, const char __user
*inbuf
,
1191 int in_len
, int out_len
)
1193 struct rdma_ucm_disconnect cmd
;
1194 struct ucma_context
*ctx
;
1197 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1200 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1202 return PTR_ERR(ctx
);
1204 mutex_lock(&ctx
->mutex
);
1205 ret
= rdma_disconnect(ctx
->cm_id
);
1206 mutex_unlock(&ctx
->mutex
);
1211 static ssize_t
ucma_init_qp_attr(struct ucma_file
*file
,
1212 const char __user
*inbuf
,
1213 int in_len
, int out_len
)
1215 struct rdma_ucm_init_qp_attr cmd
;
1216 struct ib_uverbs_qp_attr resp
;
1217 struct ucma_context
*ctx
;
1218 struct ib_qp_attr qp_attr
;
1221 if (out_len
< sizeof(resp
))
1224 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1227 if (cmd
.qp_state
> IB_QPS_ERR
)
1230 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1232 return PTR_ERR(ctx
);
1234 resp
.qp_attr_mask
= 0;
1235 memset(&qp_attr
, 0, sizeof qp_attr
);
1236 qp_attr
.qp_state
= cmd
.qp_state
;
1237 mutex_lock(&ctx
->mutex
);
1238 ret
= rdma_init_qp_attr(ctx
->cm_id
, &qp_attr
, &resp
.qp_attr_mask
);
1239 mutex_unlock(&ctx
->mutex
);
1243 ib_copy_qp_attr_to_user(ctx
->cm_id
->device
, &resp
, &qp_attr
);
1244 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1245 &resp
, sizeof(resp
)))
1253 static int ucma_set_option_id(struct ucma_context
*ctx
, int optname
,
1254 void *optval
, size_t optlen
)
1259 case RDMA_OPTION_ID_TOS
:
1260 if (optlen
!= sizeof(u8
)) {
1264 rdma_set_service_type(ctx
->cm_id
, *((u8
*) optval
));
1266 case RDMA_OPTION_ID_REUSEADDR
:
1267 if (optlen
!= sizeof(int)) {
1271 ret
= rdma_set_reuseaddr(ctx
->cm_id
, *((int *) optval
) ? 1 : 0);
1273 case RDMA_OPTION_ID_AFONLY
:
1274 if (optlen
!= sizeof(int)) {
1278 ret
= rdma_set_afonly(ctx
->cm_id
, *((int *) optval
) ? 1 : 0);
1280 case RDMA_OPTION_ID_ACK_TIMEOUT
:
1281 if (optlen
!= sizeof(u8
)) {
1285 ret
= rdma_set_ack_timeout(ctx
->cm_id
, *((u8
*)optval
));
1294 static int ucma_set_ib_path(struct ucma_context
*ctx
,
1295 struct ib_path_rec_data
*path_data
, size_t optlen
)
1297 struct sa_path_rec sa_path
;
1298 struct rdma_cm_event event
;
1301 if (optlen
% sizeof(*path_data
))
1304 for (; optlen
; optlen
-= sizeof(*path_data
), path_data
++) {
1305 if (path_data
->flags
== (IB_PATH_GMP
| IB_PATH_PRIMARY
|
1306 IB_PATH_BIDIRECTIONAL
))
1313 if (!ctx
->cm_id
->device
)
1316 memset(&sa_path
, 0, sizeof(sa_path
));
1318 sa_path
.rec_type
= SA_PATH_REC_TYPE_IB
;
1319 ib_sa_unpack_path(path_data
->path_rec
, &sa_path
);
1321 if (rdma_cap_opa_ah(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
)) {
1322 struct sa_path_rec opa
;
1324 sa_convert_path_ib_to_opa(&opa
, &sa_path
);
1325 mutex_lock(&ctx
->mutex
);
1326 ret
= rdma_set_ib_path(ctx
->cm_id
, &opa
);
1327 mutex_unlock(&ctx
->mutex
);
1329 mutex_lock(&ctx
->mutex
);
1330 ret
= rdma_set_ib_path(ctx
->cm_id
, &sa_path
);
1331 mutex_unlock(&ctx
->mutex
);
1336 memset(&event
, 0, sizeof event
);
1337 event
.event
= RDMA_CM_EVENT_ROUTE_RESOLVED
;
1338 return ucma_event_handler(ctx
->cm_id
, &event
);
1341 static int ucma_set_option_ib(struct ucma_context
*ctx
, int optname
,
1342 void *optval
, size_t optlen
)
1347 case RDMA_OPTION_IB_PATH
:
1348 ret
= ucma_set_ib_path(ctx
, optval
, optlen
);
1357 static int ucma_set_option_level(struct ucma_context
*ctx
, int level
,
1358 int optname
, void *optval
, size_t optlen
)
1363 case RDMA_OPTION_ID
:
1364 mutex_lock(&ctx
->mutex
);
1365 ret
= ucma_set_option_id(ctx
, optname
, optval
, optlen
);
1366 mutex_unlock(&ctx
->mutex
);
1368 case RDMA_OPTION_IB
:
1369 ret
= ucma_set_option_ib(ctx
, optname
, optval
, optlen
);
1378 static ssize_t
ucma_set_option(struct ucma_file
*file
, const char __user
*inbuf
,
1379 int in_len
, int out_len
)
1381 struct rdma_ucm_set_option cmd
;
1382 struct ucma_context
*ctx
;
1386 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1389 if (unlikely(cmd
.optlen
> KMALLOC_MAX_SIZE
))
1392 ctx
= ucma_get_ctx(file
, cmd
.id
);
1394 return PTR_ERR(ctx
);
1396 optval
= memdup_user(u64_to_user_ptr(cmd
.optval
),
1398 if (IS_ERR(optval
)) {
1399 ret
= PTR_ERR(optval
);
1403 ret
= ucma_set_option_level(ctx
, cmd
.level
, cmd
.optname
, optval
,
1412 static ssize_t
ucma_notify(struct ucma_file
*file
, const char __user
*inbuf
,
1413 int in_len
, int out_len
)
1415 struct rdma_ucm_notify cmd
;
1416 struct ucma_context
*ctx
;
1419 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1422 ctx
= ucma_get_ctx(file
, cmd
.id
);
1424 return PTR_ERR(ctx
);
1426 mutex_lock(&ctx
->mutex
);
1427 if (ctx
->cm_id
->device
)
1428 ret
= rdma_notify(ctx
->cm_id
, (enum ib_event_type
)cmd
.event
);
1429 mutex_unlock(&ctx
->mutex
);
1435 static ssize_t
ucma_process_join(struct ucma_file
*file
,
1436 struct rdma_ucm_join_mcast
*cmd
, int out_len
)
1438 struct rdma_ucm_create_id_resp resp
;
1439 struct ucma_context
*ctx
;
1440 struct ucma_multicast
*mc
;
1441 struct sockaddr
*addr
;
1445 if (out_len
< sizeof(resp
))
1448 addr
= (struct sockaddr
*) &cmd
->addr
;
1449 if (cmd
->addr_size
!= rdma_addr_size(addr
))
1452 if (cmd
->join_flags
== RDMA_MC_JOIN_FLAG_FULLMEMBER
)
1453 join_state
= BIT(FULLMEMBER_JOIN
);
1454 else if (cmd
->join_flags
== RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER
)
1455 join_state
= BIT(SENDONLY_FULLMEMBER_JOIN
);
1459 ctx
= ucma_get_ctx_dev(file
, cmd
->id
);
1461 return PTR_ERR(ctx
);
1463 mc
= kzalloc(sizeof(*mc
), GFP_KERNEL
);
1470 mc
->join_state
= join_state
;
1472 memcpy(&mc
->addr
, addr
, cmd
->addr_size
);
1474 xa_lock(&multicast_table
);
1475 if (__xa_alloc(&multicast_table
, &mc
->id
, NULL
, xa_limit_32b
,
1481 list_add_tail(&mc
->list
, &ctx
->mc_list
);
1482 xa_unlock(&multicast_table
);
1484 mutex_lock(&ctx
->mutex
);
1485 ret
= rdma_join_multicast(ctx
->cm_id
, (struct sockaddr
*)&mc
->addr
,
1487 mutex_unlock(&ctx
->mutex
);
1492 if (copy_to_user(u64_to_user_ptr(cmd
->response
),
1493 &resp
, sizeof(resp
))) {
1495 goto err_leave_multicast
;
1498 xa_store(&multicast_table
, mc
->id
, mc
, 0);
1503 err_leave_multicast
:
1504 mutex_lock(&ctx
->mutex
);
1505 rdma_leave_multicast(ctx
->cm_id
, (struct sockaddr
*) &mc
->addr
);
1506 mutex_unlock(&ctx
->mutex
);
1507 ucma_cleanup_mc_events(mc
);
1509 xa_lock(&multicast_table
);
1510 list_del(&mc
->list
);
1511 __xa_erase(&multicast_table
, mc
->id
);
1513 xa_unlock(&multicast_table
);
1520 static ssize_t
ucma_join_ip_multicast(struct ucma_file
*file
,
1521 const char __user
*inbuf
,
1522 int in_len
, int out_len
)
1524 struct rdma_ucm_join_ip_mcast cmd
;
1525 struct rdma_ucm_join_mcast join_cmd
;
1527 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1530 join_cmd
.response
= cmd
.response
;
1531 join_cmd
.uid
= cmd
.uid
;
1532 join_cmd
.id
= cmd
.id
;
1533 join_cmd
.addr_size
= rdma_addr_size_in6(&cmd
.addr
);
1534 if (!join_cmd
.addr_size
)
1537 join_cmd
.join_flags
= RDMA_MC_JOIN_FLAG_FULLMEMBER
;
1538 memcpy(&join_cmd
.addr
, &cmd
.addr
, join_cmd
.addr_size
);
1540 return ucma_process_join(file
, &join_cmd
, out_len
);
1543 static ssize_t
ucma_join_multicast(struct ucma_file
*file
,
1544 const char __user
*inbuf
,
1545 int in_len
, int out_len
)
1547 struct rdma_ucm_join_mcast cmd
;
1549 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1552 if (!rdma_addr_size_kss(&cmd
.addr
))
1555 return ucma_process_join(file
, &cmd
, out_len
);
1558 static ssize_t
ucma_leave_multicast(struct ucma_file
*file
,
1559 const char __user
*inbuf
,
1560 int in_len
, int out_len
)
1562 struct rdma_ucm_destroy_id cmd
;
1563 struct rdma_ucm_destroy_id_resp resp
;
1564 struct ucma_multicast
*mc
;
1567 if (out_len
< sizeof(resp
))
1570 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1573 xa_lock(&multicast_table
);
1574 mc
= xa_load(&multicast_table
, cmd
.id
);
1576 mc
= ERR_PTR(-ENOENT
);
1577 else if (READ_ONCE(mc
->ctx
->file
) != file
)
1578 mc
= ERR_PTR(-EINVAL
);
1579 else if (!refcount_inc_not_zero(&mc
->ctx
->ref
))
1580 mc
= ERR_PTR(-ENXIO
);
1583 xa_unlock(&multicast_table
);
1588 list_del(&mc
->list
);
1589 __xa_erase(&multicast_table
, mc
->id
);
1590 xa_unlock(&multicast_table
);
1592 mutex_lock(&mc
->ctx
->mutex
);
1593 rdma_leave_multicast(mc
->ctx
->cm_id
, (struct sockaddr
*) &mc
->addr
);
1594 mutex_unlock(&mc
->ctx
->mutex
);
1596 ucma_cleanup_mc_events(mc
);
1598 ucma_put_ctx(mc
->ctx
);
1599 resp
.events_reported
= mc
->events_reported
;
1602 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1603 &resp
, sizeof(resp
)))
1609 static ssize_t
ucma_migrate_id(struct ucma_file
*new_file
,
1610 const char __user
*inbuf
,
1611 int in_len
, int out_len
)
1613 struct rdma_ucm_migrate_id cmd
;
1614 struct rdma_ucm_migrate_resp resp
;
1615 struct ucma_event
*uevent
, *tmp
;
1616 struct ucma_context
*ctx
;
1617 LIST_HEAD(event_list
);
1618 struct ucma_file
*cur_file
;
1621 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1624 /* Get current fd to protect against it being closed */
1625 CLASS(fd
, f
)(cmd
.fd
);
1628 if (fd_file(f
)->f_op
!= &ucma_fops
)
1630 cur_file
= fd_file(f
)->private_data
;
1632 /* Validate current fd and prevent destruction of id. */
1633 ctx
= ucma_get_ctx(cur_file
, cmd
.id
);
1635 return PTR_ERR(ctx
);
1637 rdma_lock_handler(ctx
->cm_id
);
1639 * ctx->file can only be changed under the handler & xa_lock. xa_load()
1640 * must be checked again to ensure the ctx hasn't begun destruction
1641 * since the ucma_get_ctx().
1643 xa_lock(&ctx_table
);
1644 if (_ucma_find_context(cmd
.id
, cur_file
) != ctx
) {
1645 xa_unlock(&ctx_table
);
1649 ctx
->file
= new_file
;
1650 xa_unlock(&ctx_table
);
1652 mutex_lock(&cur_file
->mut
);
1653 list_del(&ctx
->list
);
1655 * At this point lock_handler() prevents addition of new uevents for
1658 list_for_each_entry_safe(uevent
, tmp
, &cur_file
->event_list
, list
)
1659 if (uevent
->ctx
== ctx
)
1660 list_move_tail(&uevent
->list
, &event_list
);
1661 resp
.events_reported
= ctx
->events_reported
;
1662 mutex_unlock(&cur_file
->mut
);
1664 mutex_lock(&new_file
->mut
);
1665 list_add_tail(&ctx
->list
, &new_file
->ctx_list
);
1666 list_splice_tail(&event_list
, &new_file
->event_list
);
1667 mutex_unlock(&new_file
->mut
);
1669 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1670 &resp
, sizeof(resp
)))
1674 rdma_unlock_handler(ctx
->cm_id
);
1679 static ssize_t (*ucma_cmd_table
[])(struct ucma_file
*file
,
1680 const char __user
*inbuf
,
1681 int in_len
, int out_len
) = {
1682 [RDMA_USER_CM_CMD_CREATE_ID
] = ucma_create_id
,
1683 [RDMA_USER_CM_CMD_DESTROY_ID
] = ucma_destroy_id
,
1684 [RDMA_USER_CM_CMD_BIND_IP
] = ucma_bind_ip
,
1685 [RDMA_USER_CM_CMD_RESOLVE_IP
] = ucma_resolve_ip
,
1686 [RDMA_USER_CM_CMD_RESOLVE_ROUTE
] = ucma_resolve_route
,
1687 [RDMA_USER_CM_CMD_QUERY_ROUTE
] = ucma_query_route
,
1688 [RDMA_USER_CM_CMD_CONNECT
] = ucma_connect
,
1689 [RDMA_USER_CM_CMD_LISTEN
] = ucma_listen
,
1690 [RDMA_USER_CM_CMD_ACCEPT
] = ucma_accept
,
1691 [RDMA_USER_CM_CMD_REJECT
] = ucma_reject
,
1692 [RDMA_USER_CM_CMD_DISCONNECT
] = ucma_disconnect
,
1693 [RDMA_USER_CM_CMD_INIT_QP_ATTR
] = ucma_init_qp_attr
,
1694 [RDMA_USER_CM_CMD_GET_EVENT
] = ucma_get_event
,
1695 [RDMA_USER_CM_CMD_GET_OPTION
] = NULL
,
1696 [RDMA_USER_CM_CMD_SET_OPTION
] = ucma_set_option
,
1697 [RDMA_USER_CM_CMD_NOTIFY
] = ucma_notify
,
1698 [RDMA_USER_CM_CMD_JOIN_IP_MCAST
] = ucma_join_ip_multicast
,
1699 [RDMA_USER_CM_CMD_LEAVE_MCAST
] = ucma_leave_multicast
,
1700 [RDMA_USER_CM_CMD_MIGRATE_ID
] = ucma_migrate_id
,
1701 [RDMA_USER_CM_CMD_QUERY
] = ucma_query
,
1702 [RDMA_USER_CM_CMD_BIND
] = ucma_bind
,
1703 [RDMA_USER_CM_CMD_RESOLVE_ADDR
] = ucma_resolve_addr
,
1704 [RDMA_USER_CM_CMD_JOIN_MCAST
] = ucma_join_multicast
1707 static ssize_t
ucma_write(struct file
*filp
, const char __user
*buf
,
1708 size_t len
, loff_t
*pos
)
1710 struct ucma_file
*file
= filp
->private_data
;
1711 struct rdma_ucm_cmd_hdr hdr
;
1714 if (!ib_safe_file_access(filp
)) {
1715 pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
1716 __func__
, task_tgid_vnr(current
), current
->comm
);
1720 if (len
< sizeof(hdr
))
1723 if (copy_from_user(&hdr
, buf
, sizeof(hdr
)))
1726 if (hdr
.cmd
>= ARRAY_SIZE(ucma_cmd_table
))
1728 hdr
.cmd
= array_index_nospec(hdr
.cmd
, ARRAY_SIZE(ucma_cmd_table
));
1730 if (hdr
.in
+ sizeof(hdr
) > len
)
1733 if (!ucma_cmd_table
[hdr
.cmd
])
1736 ret
= ucma_cmd_table
[hdr
.cmd
](file
, buf
+ sizeof(hdr
), hdr
.in
, hdr
.out
);
1743 static __poll_t
ucma_poll(struct file
*filp
, struct poll_table_struct
*wait
)
1745 struct ucma_file
*file
= filp
->private_data
;
1748 poll_wait(filp
, &file
->poll_wait
, wait
);
1750 if (!list_empty(&file
->event_list
))
1751 mask
= EPOLLIN
| EPOLLRDNORM
;
1757 * ucma_open() does not need the BKL:
1759 * - no global state is referred to;
1760 * - there is no ioctl method to race against;
1761 * - no further module initialization is required for open to work
1762 * after the device is registered.
1764 static int ucma_open(struct inode
*inode
, struct file
*filp
)
1766 struct ucma_file
*file
;
1768 file
= kmalloc(sizeof *file
, GFP_KERNEL
);
1772 INIT_LIST_HEAD(&file
->event_list
);
1773 INIT_LIST_HEAD(&file
->ctx_list
);
1774 init_waitqueue_head(&file
->poll_wait
);
1775 mutex_init(&file
->mut
);
1777 filp
->private_data
= file
;
1780 return stream_open(inode
, filp
);
1783 static int ucma_close(struct inode
*inode
, struct file
*filp
)
1785 struct ucma_file
*file
= filp
->private_data
;
1788 * All paths that touch ctx_list or ctx_list starting from write() are
1789 * prevented by this being a FD release function. The list_add_tail() in
1790 * ucma_connect_event_handler() can run concurrently, however it only
1791 * adds to the list *after* a listening ID. By only reading the first of
1792 * the list, and relying on ucma_destroy_private_ctx() to block
1793 * ucma_connect_event_handler(), no additional locking is needed.
1795 while (!list_empty(&file
->ctx_list
)) {
1796 struct ucma_context
*ctx
= list_first_entry(
1797 &file
->ctx_list
, struct ucma_context
, list
);
1799 WARN_ON(xa_cmpxchg(&ctx_table
, ctx
->id
, ctx
, XA_ZERO_ENTRY
,
1800 GFP_KERNEL
) != ctx
);
1801 ucma_destroy_private_ctx(ctx
);
1807 static const struct file_operations ucma_fops
= {
1808 .owner
= THIS_MODULE
,
1810 .release
= ucma_close
,
1811 .write
= ucma_write
,
1815 static struct miscdevice ucma_misc
= {
1816 .minor
= MISC_DYNAMIC_MINOR
,
1818 .nodename
= "infiniband/rdma_cm",
1823 static int ucma_get_global_nl_info(struct ib_client_nl_info
*res
)
1825 res
->abi
= RDMA_USER_CM_ABI_VERSION
;
1826 res
->cdev
= ucma_misc
.this_device
;
1830 static struct ib_client rdma_cma_client
= {
1832 .get_global_nl_info
= ucma_get_global_nl_info
,
1834 MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
1836 static ssize_t
abi_version_show(struct device
*dev
,
1837 struct device_attribute
*attr
, char *buf
)
1839 return sysfs_emit(buf
, "%d\n", RDMA_USER_CM_ABI_VERSION
);
1841 static DEVICE_ATTR_RO(abi_version
);
1843 static int __init
ucma_init(void)
1847 ret
= misc_register(&ucma_misc
);
1851 ret
= device_create_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1853 pr_err("rdma_ucm: couldn't create abi_version attr\n");
1857 ucma_ctl_table_hdr
= register_net_sysctl(&init_net
, "net/rdma_ucm", ucma_ctl_table
);
1858 if (!ucma_ctl_table_hdr
) {
1859 pr_err("rdma_ucm: couldn't register sysctl paths\n");
1864 ret
= ib_register_client(&rdma_cma_client
);
1870 unregister_net_sysctl_table(ucma_ctl_table_hdr
);
1872 device_remove_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1874 misc_deregister(&ucma_misc
);
1878 static void __exit
ucma_cleanup(void)
1880 ib_unregister_client(&rdma_cma_client
);
1881 unregister_net_sysctl_table(ucma_ctl_table_hdr
);
1882 device_remove_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1883 misc_deregister(&ucma_misc
);
1886 module_init(ucma_init
);
1887 module_exit(ucma_cleanup
);