2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/completion.h>
34 #include <linux/file.h>
35 #include <linux/mutex.h>
36 #include <linux/poll.h>
37 #include <linux/sched.h>
38 #include <linux/idr.h>
40 #include <linux/in6.h>
41 #include <linux/miscdevice.h>
42 #include <linux/slab.h>
43 #include <linux/sysctl.h>
44 #include <linux/module.h>
45 #include <linux/nsproxy.h>
47 #include <linux/nospec.h>
49 #include <rdma/rdma_user_cm.h>
50 #include <rdma/ib_marshall.h>
51 #include <rdma/rdma_cm.h>
52 #include <rdma/rdma_cm_ib.h>
53 #include <rdma/ib_addr.h>
55 #include <rdma/ib_cm.h>
56 #include <rdma/rdma_netlink.h>
57 #include "core_priv.h"
59 MODULE_AUTHOR("Sean Hefty");
60 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
61 MODULE_LICENSE("Dual BSD/GPL");
63 static unsigned int max_backlog
= 1024;
65 static struct ctl_table_header
*ucma_ctl_table_hdr
;
66 static struct ctl_table ucma_ctl_table
[] = {
68 .procname
= "max_backlog",
70 .maxlen
= sizeof max_backlog
,
72 .proc_handler
= proc_dointvec
,
80 struct list_head ctx_list
;
81 struct list_head event_list
;
82 wait_queue_head_t poll_wait
;
87 struct completion comp
;
92 struct ucma_file
*file
;
93 struct rdma_cm_id
*cm_id
;
97 struct list_head list
;
98 /* sync between removal event and id destroy, protected by file mut */
100 struct work_struct close_work
;
103 struct ucma_multicast
{
104 struct ucma_context
*ctx
;
110 struct sockaddr_storage addr
;
114 struct ucma_context
*ctx
;
115 struct ucma_context
*conn_req_ctx
;
116 struct ucma_multicast
*mc
;
117 struct list_head list
;
118 struct rdma_ucm_event_resp resp
;
121 static DEFINE_XARRAY_ALLOC(ctx_table
);
122 static DEFINE_XARRAY_ALLOC(multicast_table
);
124 static const struct file_operations ucma_fops
;
125 static int __destroy_id(struct ucma_context
*ctx
);
127 static inline struct ucma_context
*_ucma_find_context(int id
,
128 struct ucma_file
*file
)
130 struct ucma_context
*ctx
;
132 ctx
= xa_load(&ctx_table
, id
);
134 ctx
= ERR_PTR(-ENOENT
);
135 else if (ctx
->file
!= file
)
136 ctx
= ERR_PTR(-EINVAL
);
140 static struct ucma_context
*ucma_get_ctx(struct ucma_file
*file
, int id
)
142 struct ucma_context
*ctx
;
145 ctx
= _ucma_find_context(id
, file
);
147 if (!refcount_inc_not_zero(&ctx
->ref
))
148 ctx
= ERR_PTR(-ENXIO
);
149 xa_unlock(&ctx_table
);
153 static void ucma_put_ctx(struct ucma_context
*ctx
)
155 if (refcount_dec_and_test(&ctx
->ref
))
156 complete(&ctx
->comp
);
160 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
163 static struct ucma_context
*ucma_get_ctx_dev(struct ucma_file
*file
, int id
)
165 struct ucma_context
*ctx
= ucma_get_ctx(file
, id
);
169 if (!ctx
->cm_id
->device
) {
171 return ERR_PTR(-EINVAL
);
176 static void ucma_close_id(struct work_struct
*work
)
178 struct ucma_context
*ctx
= container_of(work
, struct ucma_context
, close_work
);
180 /* once all inflight tasks are finished, we close all underlying
181 * resources. The context is still alive till its explicit destryoing
185 wait_for_completion(&ctx
->comp
);
186 /* No new events will be generated after destroying the id. */
187 rdma_destroy_id(ctx
->cm_id
);
190 * At this point ctx->ref is zero so the only place the ctx can be is in
191 * a uevent or in __destroy_id(). Since the former doesn't touch
192 * ctx->cm_id and the latter sync cancels this, there is no races with
198 static struct ucma_context
*ucma_alloc_ctx(struct ucma_file
*file
)
200 struct ucma_context
*ctx
;
202 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
206 INIT_WORK(&ctx
->close_work
, ucma_close_id
);
207 refcount_set(&ctx
->ref
, 1);
208 init_completion(&ctx
->comp
);
209 /* So list_del() will work if we don't do ucma_finish_ctx() */
210 INIT_LIST_HEAD(&ctx
->list
);
212 mutex_init(&ctx
->mutex
);
214 if (xa_alloc(&ctx_table
, &ctx
->id
, NULL
, xa_limit_32b
, GFP_KERNEL
)) {
221 static void ucma_finish_ctx(struct ucma_context
*ctx
)
223 lockdep_assert_held(&ctx
->file
->mut
);
224 list_add_tail(&ctx
->list
, &ctx
->file
->ctx_list
);
225 xa_store(&ctx_table
, ctx
->id
, ctx
, GFP_KERNEL
);
228 static void ucma_copy_conn_event(struct rdma_ucm_conn_param
*dst
,
229 struct rdma_conn_param
*src
)
231 if (src
->private_data_len
)
232 memcpy(dst
->private_data
, src
->private_data
,
233 src
->private_data_len
);
234 dst
->private_data_len
= src
->private_data_len
;
235 dst
->responder_resources
=src
->responder_resources
;
236 dst
->initiator_depth
= src
->initiator_depth
;
237 dst
->flow_control
= src
->flow_control
;
238 dst
->retry_count
= src
->retry_count
;
239 dst
->rnr_retry_count
= src
->rnr_retry_count
;
241 dst
->qp_num
= src
->qp_num
;
244 static void ucma_copy_ud_event(struct ib_device
*device
,
245 struct rdma_ucm_ud_param
*dst
,
246 struct rdma_ud_param
*src
)
248 if (src
->private_data_len
)
249 memcpy(dst
->private_data
, src
->private_data
,
250 src
->private_data_len
);
251 dst
->private_data_len
= src
->private_data_len
;
252 ib_copy_ah_attr_to_user(device
, &dst
->ah_attr
, &src
->ah_attr
);
253 dst
->qp_num
= src
->qp_num
;
254 dst
->qkey
= src
->qkey
;
257 static struct ucma_event
*ucma_create_uevent(struct ucma_context
*ctx
,
258 struct rdma_cm_event
*event
)
260 struct ucma_event
*uevent
;
262 uevent
= kzalloc(sizeof(*uevent
), GFP_KERNEL
);
267 switch (event
->event
) {
268 case RDMA_CM_EVENT_MULTICAST_JOIN
:
269 case RDMA_CM_EVENT_MULTICAST_ERROR
:
270 uevent
->mc
= (struct ucma_multicast
*)
271 event
->param
.ud
.private_data
;
272 uevent
->resp
.uid
= uevent
->mc
->uid
;
273 uevent
->resp
.id
= uevent
->mc
->id
;
276 uevent
->resp
.uid
= ctx
->uid
;
277 uevent
->resp
.id
= ctx
->id
;
280 uevent
->resp
.event
= event
->event
;
281 uevent
->resp
.status
= event
->status
;
282 if (ctx
->cm_id
->qp_type
== IB_QPT_UD
)
283 ucma_copy_ud_event(ctx
->cm_id
->device
, &uevent
->resp
.param
.ud
,
286 ucma_copy_conn_event(&uevent
->resp
.param
.conn
,
289 uevent
->resp
.ece
.vendor_id
= event
->ece
.vendor_id
;
290 uevent
->resp
.ece
.attr_mod
= event
->ece
.attr_mod
;
294 static int ucma_connect_event_handler(struct rdma_cm_id
*cm_id
,
295 struct rdma_cm_event
*event
)
297 struct ucma_context
*listen_ctx
= cm_id
->context
;
298 struct ucma_context
*ctx
;
299 struct ucma_event
*uevent
;
301 if (!atomic_add_unless(&listen_ctx
->backlog
, -1, 0))
303 ctx
= ucma_alloc_ctx(listen_ctx
->file
);
308 uevent
= ucma_create_uevent(listen_ctx
, event
);
311 uevent
->conn_req_ctx
= ctx
;
312 uevent
->resp
.id
= ctx
->id
;
314 ctx
->cm_id
->context
= ctx
;
316 mutex_lock(&ctx
->file
->mut
);
317 ucma_finish_ctx(ctx
);
318 list_add_tail(&uevent
->list
, &ctx
->file
->event_list
);
319 mutex_unlock(&ctx
->file
->mut
);
320 wake_up_interruptible(&ctx
->file
->poll_wait
);
324 xa_erase(&ctx_table
, ctx
->id
);
327 atomic_inc(&listen_ctx
->backlog
);
328 /* Returning error causes the new ID to be destroyed */
332 static int ucma_event_handler(struct rdma_cm_id
*cm_id
,
333 struct rdma_cm_event
*event
)
335 struct ucma_event
*uevent
;
336 struct ucma_context
*ctx
= cm_id
->context
;
338 if (event
->event
== RDMA_CM_EVENT_CONNECT_REQUEST
)
339 return ucma_connect_event_handler(cm_id
, event
);
342 * We ignore events for new connections until userspace has set their
343 * context. This can only happen if an error occurs on a new connection
344 * before the user accepts it. This is okay, since the accept will just
345 * fail later. However, we do need to release the underlying HW
346 * resources in case of a device removal event.
349 uevent
= ucma_create_uevent(ctx
, event
);
353 mutex_lock(&ctx
->file
->mut
);
354 list_add_tail(&uevent
->list
, &ctx
->file
->event_list
);
355 mutex_unlock(&ctx
->file
->mut
);
356 wake_up_interruptible(&ctx
->file
->poll_wait
);
359 if (event
->event
== RDMA_CM_EVENT_DEVICE_REMOVAL
&& !ctx
->destroying
)
360 queue_work(system_unbound_wq
, &ctx
->close_work
);
364 static ssize_t
ucma_get_event(struct ucma_file
*file
, const char __user
*inbuf
,
365 int in_len
, int out_len
)
367 struct rdma_ucm_get_event cmd
;
368 struct ucma_event
*uevent
;
371 * Old 32 bit user space does not send the 4 byte padding in the
372 * reserved field. We don't care, allow it to keep working.
374 if (out_len
< sizeof(uevent
->resp
) - sizeof(uevent
->resp
.reserved
) -
375 sizeof(uevent
->resp
.ece
))
378 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
381 mutex_lock(&file
->mut
);
382 while (list_empty(&file
->event_list
)) {
383 mutex_unlock(&file
->mut
);
385 if (file
->filp
->f_flags
& O_NONBLOCK
)
388 if (wait_event_interruptible(file
->poll_wait
,
389 !list_empty(&file
->event_list
)))
392 mutex_lock(&file
->mut
);
395 uevent
= list_first_entry(&file
->event_list
, struct ucma_event
, list
);
397 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
399 min_t(size_t, out_len
, sizeof(uevent
->resp
)))) {
400 mutex_unlock(&file
->mut
);
404 list_del(&uevent
->list
);
405 uevent
->ctx
->events_reported
++;
407 uevent
->mc
->events_reported
++;
408 if (uevent
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
)
409 atomic_inc(&uevent
->ctx
->backlog
);
410 mutex_unlock(&file
->mut
);
416 static int ucma_get_qp_type(struct rdma_ucm_create_id
*cmd
, enum ib_qp_type
*qp_type
)
420 *qp_type
= IB_QPT_RC
;
424 *qp_type
= IB_QPT_UD
;
427 *qp_type
= cmd
->qp_type
;
434 static ssize_t
ucma_create_id(struct ucma_file
*file
, const char __user
*inbuf
,
435 int in_len
, int out_len
)
437 struct rdma_ucm_create_id cmd
;
438 struct rdma_ucm_create_id_resp resp
;
439 struct ucma_context
*ctx
;
440 struct rdma_cm_id
*cm_id
;
441 enum ib_qp_type qp_type
;
444 if (out_len
< sizeof(resp
))
447 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
450 ret
= ucma_get_qp_type(&cmd
, &qp_type
);
454 ctx
= ucma_alloc_ctx(file
);
459 cm_id
= rdma_create_user_id(ucma_event_handler
, ctx
, cmd
.ps
, qp_type
);
461 ret
= PTR_ERR(cm_id
);
467 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
468 &resp
, sizeof(resp
))) {
469 xa_erase(&ctx_table
, ctx
->id
);
474 mutex_lock(&file
->mut
);
475 ucma_finish_ctx(ctx
);
476 mutex_unlock(&file
->mut
);
480 xa_erase(&ctx_table
, ctx
->id
);
485 static void ucma_cleanup_multicast(struct ucma_context
*ctx
)
487 struct ucma_multicast
*mc
;
490 xa_for_each(&multicast_table
, index
, mc
) {
494 * At this point mc->ctx->ref is 0 so the mc cannot leave the
495 * lock on the reader and this is enough serialization
497 xa_erase(&multicast_table
, index
);
502 static void ucma_cleanup_mc_events(struct ucma_multicast
*mc
)
504 struct ucma_event
*uevent
, *tmp
;
506 rdma_lock_handler(mc
->ctx
->cm_id
);
507 mutex_lock(&mc
->ctx
->file
->mut
);
508 list_for_each_entry_safe(uevent
, tmp
, &mc
->ctx
->file
->event_list
, list
) {
509 if (uevent
->mc
!= mc
)
512 list_del(&uevent
->list
);
515 mutex_unlock(&mc
->ctx
->file
->mut
);
516 rdma_unlock_handler(mc
->ctx
->cm_id
);
520 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
521 * this point, no new events will be reported from the hardware. However, we
522 * still need to cleanup the UCMA context for this ID. Specifically, there
523 * might be events that have not yet been consumed by the user space software.
524 * mutex. After that we release them as needed.
526 static int ucma_free_ctx(struct ucma_context
*ctx
)
529 struct ucma_event
*uevent
, *tmp
;
532 ucma_cleanup_multicast(ctx
);
534 /* Cleanup events not yet reported to the user. */
535 mutex_lock(&ctx
->file
->mut
);
536 list_for_each_entry_safe(uevent
, tmp
, &ctx
->file
->event_list
, list
) {
537 if (uevent
->ctx
== ctx
|| uevent
->conn_req_ctx
== ctx
)
538 list_move_tail(&uevent
->list
, &list
);
540 list_del(&ctx
->list
);
541 events_reported
= ctx
->events_reported
;
542 mutex_unlock(&ctx
->file
->mut
);
545 * If this was a listening ID then any connections spawned from it
546 * that have not been delivered to userspace are cleaned up too.
547 * Must be done outside any locks.
549 list_for_each_entry_safe(uevent
, tmp
, &list
, list
) {
550 list_del(&uevent
->list
);
551 if (uevent
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
&&
552 uevent
->conn_req_ctx
!= ctx
)
553 __destroy_id(uevent
->conn_req_ctx
);
557 mutex_destroy(&ctx
->mutex
);
559 return events_reported
;
562 static int __destroy_id(struct ucma_context
*ctx
)
565 * If the refcount is already 0 then ucma_close_id() has already
566 * destroyed the cm_id, otherwise holding the refcount keeps cm_id
567 * valid. Prevent queue_work() from being called.
569 if (refcount_inc_not_zero(&ctx
->ref
)) {
570 rdma_lock_handler(ctx
->cm_id
);
572 rdma_unlock_handler(ctx
->cm_id
);
576 cancel_work_sync(&ctx
->close_work
);
577 /* At this point it's guaranteed that there is no inflight closing task */
579 ucma_close_id(&ctx
->close_work
);
580 return ucma_free_ctx(ctx
);
583 static ssize_t
ucma_destroy_id(struct ucma_file
*file
, const char __user
*inbuf
,
584 int in_len
, int out_len
)
586 struct rdma_ucm_destroy_id cmd
;
587 struct rdma_ucm_destroy_id_resp resp
;
588 struct ucma_context
*ctx
;
591 if (out_len
< sizeof(resp
))
594 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
598 ctx
= _ucma_find_context(cmd
.id
, file
);
600 __xa_erase(&ctx_table
, ctx
->id
);
601 xa_unlock(&ctx_table
);
606 resp
.events_reported
= __destroy_id(ctx
);
607 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
608 &resp
, sizeof(resp
)))
614 static ssize_t
ucma_bind_ip(struct ucma_file
*file
, const char __user
*inbuf
,
615 int in_len
, int out_len
)
617 struct rdma_ucm_bind_ip cmd
;
618 struct ucma_context
*ctx
;
621 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
624 if (!rdma_addr_size_in6(&cmd
.addr
))
627 ctx
= ucma_get_ctx(file
, cmd
.id
);
631 mutex_lock(&ctx
->mutex
);
632 ret
= rdma_bind_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.addr
);
633 mutex_unlock(&ctx
->mutex
);
639 static ssize_t
ucma_bind(struct ucma_file
*file
, const char __user
*inbuf
,
640 int in_len
, int out_len
)
642 struct rdma_ucm_bind cmd
;
643 struct ucma_context
*ctx
;
646 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
649 if (cmd
.reserved
|| !cmd
.addr_size
||
650 cmd
.addr_size
!= rdma_addr_size_kss(&cmd
.addr
))
653 ctx
= ucma_get_ctx(file
, cmd
.id
);
657 mutex_lock(&ctx
->mutex
);
658 ret
= rdma_bind_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.addr
);
659 mutex_unlock(&ctx
->mutex
);
664 static ssize_t
ucma_resolve_ip(struct ucma_file
*file
,
665 const char __user
*inbuf
,
666 int in_len
, int out_len
)
668 struct rdma_ucm_resolve_ip cmd
;
669 struct ucma_context
*ctx
;
672 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
675 if ((cmd
.src_addr
.sin6_family
&& !rdma_addr_size_in6(&cmd
.src_addr
)) ||
676 !rdma_addr_size_in6(&cmd
.dst_addr
))
679 ctx
= ucma_get_ctx(file
, cmd
.id
);
683 mutex_lock(&ctx
->mutex
);
684 ret
= rdma_resolve_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.src_addr
,
685 (struct sockaddr
*) &cmd
.dst_addr
, cmd
.timeout_ms
);
686 mutex_unlock(&ctx
->mutex
);
691 static ssize_t
ucma_resolve_addr(struct ucma_file
*file
,
692 const char __user
*inbuf
,
693 int in_len
, int out_len
)
695 struct rdma_ucm_resolve_addr cmd
;
696 struct ucma_context
*ctx
;
699 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
703 (cmd
.src_size
&& (cmd
.src_size
!= rdma_addr_size_kss(&cmd
.src_addr
))) ||
704 !cmd
.dst_size
|| (cmd
.dst_size
!= rdma_addr_size_kss(&cmd
.dst_addr
)))
707 ctx
= ucma_get_ctx(file
, cmd
.id
);
711 mutex_lock(&ctx
->mutex
);
712 ret
= rdma_resolve_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.src_addr
,
713 (struct sockaddr
*) &cmd
.dst_addr
, cmd
.timeout_ms
);
714 mutex_unlock(&ctx
->mutex
);
719 static ssize_t
ucma_resolve_route(struct ucma_file
*file
,
720 const char __user
*inbuf
,
721 int in_len
, int out_len
)
723 struct rdma_ucm_resolve_route cmd
;
724 struct ucma_context
*ctx
;
727 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
730 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
734 mutex_lock(&ctx
->mutex
);
735 ret
= rdma_resolve_route(ctx
->cm_id
, cmd
.timeout_ms
);
736 mutex_unlock(&ctx
->mutex
);
741 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp
*resp
,
742 struct rdma_route
*route
)
744 struct rdma_dev_addr
*dev_addr
;
746 resp
->num_paths
= route
->num_paths
;
747 switch (route
->num_paths
) {
749 dev_addr
= &route
->addr
.dev_addr
;
750 rdma_addr_get_dgid(dev_addr
,
751 (union ib_gid
*) &resp
->ib_route
[0].dgid
);
752 rdma_addr_get_sgid(dev_addr
,
753 (union ib_gid
*) &resp
->ib_route
[0].sgid
);
754 resp
->ib_route
[0].pkey
= cpu_to_be16(ib_addr_get_pkey(dev_addr
));
757 ib_copy_path_rec_to_user(&resp
->ib_route
[1],
758 &route
->path_rec
[1]);
761 ib_copy_path_rec_to_user(&resp
->ib_route
[0],
762 &route
->path_rec
[0]);
769 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp
*resp
,
770 struct rdma_route
*route
)
773 resp
->num_paths
= route
->num_paths
;
774 switch (route
->num_paths
) {
776 rdma_ip2gid((struct sockaddr
*)&route
->addr
.dst_addr
,
777 (union ib_gid
*)&resp
->ib_route
[0].dgid
);
778 rdma_ip2gid((struct sockaddr
*)&route
->addr
.src_addr
,
779 (union ib_gid
*)&resp
->ib_route
[0].sgid
);
780 resp
->ib_route
[0].pkey
= cpu_to_be16(0xffff);
783 ib_copy_path_rec_to_user(&resp
->ib_route
[1],
784 &route
->path_rec
[1]);
787 ib_copy_path_rec_to_user(&resp
->ib_route
[0],
788 &route
->path_rec
[0]);
795 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp
*resp
,
796 struct rdma_route
*route
)
798 struct rdma_dev_addr
*dev_addr
;
800 dev_addr
= &route
->addr
.dev_addr
;
801 rdma_addr_get_dgid(dev_addr
, (union ib_gid
*) &resp
->ib_route
[0].dgid
);
802 rdma_addr_get_sgid(dev_addr
, (union ib_gid
*) &resp
->ib_route
[0].sgid
);
805 static ssize_t
ucma_query_route(struct ucma_file
*file
,
806 const char __user
*inbuf
,
807 int in_len
, int out_len
)
809 struct rdma_ucm_query cmd
;
810 struct rdma_ucm_query_route_resp resp
;
811 struct ucma_context
*ctx
;
812 struct sockaddr
*addr
;
815 if (out_len
< offsetof(struct rdma_ucm_query_route_resp
, ibdev_index
))
818 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
821 ctx
= ucma_get_ctx(file
, cmd
.id
);
825 mutex_lock(&ctx
->mutex
);
826 memset(&resp
, 0, sizeof resp
);
827 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.src_addr
;
828 memcpy(&resp
.src_addr
, addr
, addr
->sa_family
== AF_INET
?
829 sizeof(struct sockaddr_in
) :
830 sizeof(struct sockaddr_in6
));
831 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.dst_addr
;
832 memcpy(&resp
.dst_addr
, addr
, addr
->sa_family
== AF_INET
?
833 sizeof(struct sockaddr_in
) :
834 sizeof(struct sockaddr_in6
));
835 if (!ctx
->cm_id
->device
)
838 resp
.node_guid
= (__force __u64
) ctx
->cm_id
->device
->node_guid
;
839 resp
.ibdev_index
= ctx
->cm_id
->device
->index
;
840 resp
.port_num
= ctx
->cm_id
->port_num
;
842 if (rdma_cap_ib_sa(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
843 ucma_copy_ib_route(&resp
, &ctx
->cm_id
->route
);
844 else if (rdma_protocol_roce(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
845 ucma_copy_iboe_route(&resp
, &ctx
->cm_id
->route
);
846 else if (rdma_protocol_iwarp(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
847 ucma_copy_iw_route(&resp
, &ctx
->cm_id
->route
);
850 mutex_unlock(&ctx
->mutex
);
851 if (copy_to_user(u64_to_user_ptr(cmd
.response
), &resp
,
852 min_t(size_t, out_len
, sizeof(resp
))))
859 static void ucma_query_device_addr(struct rdma_cm_id
*cm_id
,
860 struct rdma_ucm_query_addr_resp
*resp
)
865 resp
->node_guid
= (__force __u64
) cm_id
->device
->node_guid
;
866 resp
->ibdev_index
= cm_id
->device
->index
;
867 resp
->port_num
= cm_id
->port_num
;
868 resp
->pkey
= (__force __u16
) cpu_to_be16(
869 ib_addr_get_pkey(&cm_id
->route
.addr
.dev_addr
));
872 static ssize_t
ucma_query_addr(struct ucma_context
*ctx
,
873 void __user
*response
, int out_len
)
875 struct rdma_ucm_query_addr_resp resp
;
876 struct sockaddr
*addr
;
879 if (out_len
< offsetof(struct rdma_ucm_query_addr_resp
, ibdev_index
))
882 memset(&resp
, 0, sizeof resp
);
884 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.src_addr
;
885 resp
.src_size
= rdma_addr_size(addr
);
886 memcpy(&resp
.src_addr
, addr
, resp
.src_size
);
888 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.dst_addr
;
889 resp
.dst_size
= rdma_addr_size(addr
);
890 memcpy(&resp
.dst_addr
, addr
, resp
.dst_size
);
892 ucma_query_device_addr(ctx
->cm_id
, &resp
);
894 if (copy_to_user(response
, &resp
, min_t(size_t, out_len
, sizeof(resp
))))
900 static ssize_t
ucma_query_path(struct ucma_context
*ctx
,
901 void __user
*response
, int out_len
)
903 struct rdma_ucm_query_path_resp
*resp
;
906 if (out_len
< sizeof(*resp
))
909 resp
= kzalloc(out_len
, GFP_KERNEL
);
913 resp
->num_paths
= ctx
->cm_id
->route
.num_paths
;
914 for (i
= 0, out_len
-= sizeof(*resp
);
915 i
< resp
->num_paths
&& out_len
> sizeof(struct ib_path_rec_data
);
916 i
++, out_len
-= sizeof(struct ib_path_rec_data
)) {
917 struct sa_path_rec
*rec
= &ctx
->cm_id
->route
.path_rec
[i
];
919 resp
->path_data
[i
].flags
= IB_PATH_GMP
| IB_PATH_PRIMARY
|
920 IB_PATH_BIDIRECTIONAL
;
921 if (rec
->rec_type
== SA_PATH_REC_TYPE_OPA
) {
922 struct sa_path_rec ib
;
924 sa_convert_path_opa_to_ib(&ib
, rec
);
925 ib_sa_pack_path(&ib
, &resp
->path_data
[i
].path_rec
);
928 ib_sa_pack_path(rec
, &resp
->path_data
[i
].path_rec
);
932 if (copy_to_user(response
, resp
, struct_size(resp
, path_data
, i
)))
939 static ssize_t
ucma_query_gid(struct ucma_context
*ctx
,
940 void __user
*response
, int out_len
)
942 struct rdma_ucm_query_addr_resp resp
;
943 struct sockaddr_ib
*addr
;
946 if (out_len
< offsetof(struct rdma_ucm_query_addr_resp
, ibdev_index
))
949 memset(&resp
, 0, sizeof resp
);
951 ucma_query_device_addr(ctx
->cm_id
, &resp
);
953 addr
= (struct sockaddr_ib
*) &resp
.src_addr
;
954 resp
.src_size
= sizeof(*addr
);
955 if (ctx
->cm_id
->route
.addr
.src_addr
.ss_family
== AF_IB
) {
956 memcpy(addr
, &ctx
->cm_id
->route
.addr
.src_addr
, resp
.src_size
);
958 addr
->sib_family
= AF_IB
;
959 addr
->sib_pkey
= (__force __be16
) resp
.pkey
;
960 rdma_read_gids(ctx
->cm_id
, (union ib_gid
*)&addr
->sib_addr
,
962 addr
->sib_sid
= rdma_get_service_id(ctx
->cm_id
, (struct sockaddr
*)
963 &ctx
->cm_id
->route
.addr
.src_addr
);
966 addr
= (struct sockaddr_ib
*) &resp
.dst_addr
;
967 resp
.dst_size
= sizeof(*addr
);
968 if (ctx
->cm_id
->route
.addr
.dst_addr
.ss_family
== AF_IB
) {
969 memcpy(addr
, &ctx
->cm_id
->route
.addr
.dst_addr
, resp
.dst_size
);
971 addr
->sib_family
= AF_IB
;
972 addr
->sib_pkey
= (__force __be16
) resp
.pkey
;
973 rdma_read_gids(ctx
->cm_id
, NULL
,
974 (union ib_gid
*)&addr
->sib_addr
);
975 addr
->sib_sid
= rdma_get_service_id(ctx
->cm_id
, (struct sockaddr
*)
976 &ctx
->cm_id
->route
.addr
.dst_addr
);
979 if (copy_to_user(response
, &resp
, min_t(size_t, out_len
, sizeof(resp
))))
985 static ssize_t
ucma_query(struct ucma_file
*file
,
986 const char __user
*inbuf
,
987 int in_len
, int out_len
)
989 struct rdma_ucm_query cmd
;
990 struct ucma_context
*ctx
;
991 void __user
*response
;
994 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
997 response
= u64_to_user_ptr(cmd
.response
);
998 ctx
= ucma_get_ctx(file
, cmd
.id
);
1000 return PTR_ERR(ctx
);
1002 mutex_lock(&ctx
->mutex
);
1003 switch (cmd
.option
) {
1004 case RDMA_USER_CM_QUERY_ADDR
:
1005 ret
= ucma_query_addr(ctx
, response
, out_len
);
1007 case RDMA_USER_CM_QUERY_PATH
:
1008 ret
= ucma_query_path(ctx
, response
, out_len
);
1010 case RDMA_USER_CM_QUERY_GID
:
1011 ret
= ucma_query_gid(ctx
, response
, out_len
);
1017 mutex_unlock(&ctx
->mutex
);
1023 static void ucma_copy_conn_param(struct rdma_cm_id
*id
,
1024 struct rdma_conn_param
*dst
,
1025 struct rdma_ucm_conn_param
*src
)
1027 dst
->private_data
= src
->private_data
;
1028 dst
->private_data_len
= src
->private_data_len
;
1029 dst
->responder_resources
=src
->responder_resources
;
1030 dst
->initiator_depth
= src
->initiator_depth
;
1031 dst
->flow_control
= src
->flow_control
;
1032 dst
->retry_count
= src
->retry_count
;
1033 dst
->rnr_retry_count
= src
->rnr_retry_count
;
1034 dst
->srq
= src
->srq
;
1035 dst
->qp_num
= src
->qp_num
& 0xFFFFFF;
1036 dst
->qkey
= (id
->route
.addr
.src_addr
.ss_family
== AF_IB
) ? src
->qkey
: 0;
1039 static ssize_t
ucma_connect(struct ucma_file
*file
, const char __user
*inbuf
,
1040 int in_len
, int out_len
)
1042 struct rdma_conn_param conn_param
;
1043 struct rdma_ucm_ece ece
= {};
1044 struct rdma_ucm_connect cmd
;
1045 struct ucma_context
*ctx
;
1049 if (in_len
< offsetofend(typeof(cmd
), reserved
))
1051 in_size
= min_t(size_t, in_len
, sizeof(cmd
));
1052 if (copy_from_user(&cmd
, inbuf
, in_size
))
1055 if (!cmd
.conn_param
.valid
)
1058 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1060 return PTR_ERR(ctx
);
1062 ucma_copy_conn_param(ctx
->cm_id
, &conn_param
, &cmd
.conn_param
);
1063 if (offsetofend(typeof(cmd
), ece
) <= in_size
) {
1064 ece
.vendor_id
= cmd
.ece
.vendor_id
;
1065 ece
.attr_mod
= cmd
.ece
.attr_mod
;
1068 mutex_lock(&ctx
->mutex
);
1069 ret
= rdma_connect_ece(ctx
->cm_id
, &conn_param
, &ece
);
1070 mutex_unlock(&ctx
->mutex
);
1075 static ssize_t
ucma_listen(struct ucma_file
*file
, const char __user
*inbuf
,
1076 int in_len
, int out_len
)
1078 struct rdma_ucm_listen cmd
;
1079 struct ucma_context
*ctx
;
1082 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1085 ctx
= ucma_get_ctx(file
, cmd
.id
);
1087 return PTR_ERR(ctx
);
1089 if (cmd
.backlog
<= 0 || cmd
.backlog
> max_backlog
)
1090 cmd
.backlog
= max_backlog
;
1091 atomic_set(&ctx
->backlog
, cmd
.backlog
);
1093 mutex_lock(&ctx
->mutex
);
1094 ret
= rdma_listen(ctx
->cm_id
, cmd
.backlog
);
1095 mutex_unlock(&ctx
->mutex
);
1100 static ssize_t
ucma_accept(struct ucma_file
*file
, const char __user
*inbuf
,
1101 int in_len
, int out_len
)
1103 struct rdma_ucm_accept cmd
;
1104 struct rdma_conn_param conn_param
;
1105 struct rdma_ucm_ece ece
= {};
1106 struct ucma_context
*ctx
;
1110 if (in_len
< offsetofend(typeof(cmd
), reserved
))
1112 in_size
= min_t(size_t, in_len
, sizeof(cmd
));
1113 if (copy_from_user(&cmd
, inbuf
, in_size
))
1116 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1118 return PTR_ERR(ctx
);
1120 if (offsetofend(typeof(cmd
), ece
) <= in_size
) {
1121 ece
.vendor_id
= cmd
.ece
.vendor_id
;
1122 ece
.attr_mod
= cmd
.ece
.attr_mod
;
1125 if (cmd
.conn_param
.valid
) {
1126 ucma_copy_conn_param(ctx
->cm_id
, &conn_param
, &cmd
.conn_param
);
1127 mutex_lock(&ctx
->mutex
);
1128 rdma_lock_handler(ctx
->cm_id
);
1129 ret
= rdma_accept_ece(ctx
->cm_id
, &conn_param
, &ece
);
1131 /* The uid must be set atomically with the handler */
1134 rdma_unlock_handler(ctx
->cm_id
);
1135 mutex_unlock(&ctx
->mutex
);
1137 mutex_lock(&ctx
->mutex
);
1138 rdma_lock_handler(ctx
->cm_id
);
1139 ret
= rdma_accept_ece(ctx
->cm_id
, NULL
, &ece
);
1140 rdma_unlock_handler(ctx
->cm_id
);
1141 mutex_unlock(&ctx
->mutex
);
1147 static ssize_t
ucma_reject(struct ucma_file
*file
, const char __user
*inbuf
,
1148 int in_len
, int out_len
)
1150 struct rdma_ucm_reject cmd
;
1151 struct ucma_context
*ctx
;
1154 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1158 cmd
.reason
= IB_CM_REJ_CONSUMER_DEFINED
;
1160 switch (cmd
.reason
) {
1161 case IB_CM_REJ_CONSUMER_DEFINED
:
1162 case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED
:
1168 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1170 return PTR_ERR(ctx
);
1172 mutex_lock(&ctx
->mutex
);
1173 ret
= rdma_reject(ctx
->cm_id
, cmd
.private_data
, cmd
.private_data_len
,
1175 mutex_unlock(&ctx
->mutex
);
1180 static ssize_t
ucma_disconnect(struct ucma_file
*file
, const char __user
*inbuf
,
1181 int in_len
, int out_len
)
1183 struct rdma_ucm_disconnect cmd
;
1184 struct ucma_context
*ctx
;
1187 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1190 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1192 return PTR_ERR(ctx
);
1194 mutex_lock(&ctx
->mutex
);
1195 ret
= rdma_disconnect(ctx
->cm_id
);
1196 mutex_unlock(&ctx
->mutex
);
1201 static ssize_t
ucma_init_qp_attr(struct ucma_file
*file
,
1202 const char __user
*inbuf
,
1203 int in_len
, int out_len
)
1205 struct rdma_ucm_init_qp_attr cmd
;
1206 struct ib_uverbs_qp_attr resp
;
1207 struct ucma_context
*ctx
;
1208 struct ib_qp_attr qp_attr
;
1211 if (out_len
< sizeof(resp
))
1214 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1217 if (cmd
.qp_state
> IB_QPS_ERR
)
1220 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1222 return PTR_ERR(ctx
);
1224 resp
.qp_attr_mask
= 0;
1225 memset(&qp_attr
, 0, sizeof qp_attr
);
1226 qp_attr
.qp_state
= cmd
.qp_state
;
1227 mutex_lock(&ctx
->mutex
);
1228 ret
= rdma_init_qp_attr(ctx
->cm_id
, &qp_attr
, &resp
.qp_attr_mask
);
1229 mutex_unlock(&ctx
->mutex
);
1233 ib_copy_qp_attr_to_user(ctx
->cm_id
->device
, &resp
, &qp_attr
);
1234 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1235 &resp
, sizeof(resp
)))
1243 static int ucma_set_option_id(struct ucma_context
*ctx
, int optname
,
1244 void *optval
, size_t optlen
)
1249 case RDMA_OPTION_ID_TOS
:
1250 if (optlen
!= sizeof(u8
)) {
1254 rdma_set_service_type(ctx
->cm_id
, *((u8
*) optval
));
1256 case RDMA_OPTION_ID_REUSEADDR
:
1257 if (optlen
!= sizeof(int)) {
1261 ret
= rdma_set_reuseaddr(ctx
->cm_id
, *((int *) optval
) ? 1 : 0);
1263 case RDMA_OPTION_ID_AFONLY
:
1264 if (optlen
!= sizeof(int)) {
1268 ret
= rdma_set_afonly(ctx
->cm_id
, *((int *) optval
) ? 1 : 0);
1270 case RDMA_OPTION_ID_ACK_TIMEOUT
:
1271 if (optlen
!= sizeof(u8
)) {
1275 ret
= rdma_set_ack_timeout(ctx
->cm_id
, *((u8
*)optval
));
1284 static int ucma_set_ib_path(struct ucma_context
*ctx
,
1285 struct ib_path_rec_data
*path_data
, size_t optlen
)
1287 struct sa_path_rec sa_path
;
1288 struct rdma_cm_event event
;
1291 if (optlen
% sizeof(*path_data
))
1294 for (; optlen
; optlen
-= sizeof(*path_data
), path_data
++) {
1295 if (path_data
->flags
== (IB_PATH_GMP
| IB_PATH_PRIMARY
|
1296 IB_PATH_BIDIRECTIONAL
))
1303 if (!ctx
->cm_id
->device
)
1306 memset(&sa_path
, 0, sizeof(sa_path
));
1308 sa_path
.rec_type
= SA_PATH_REC_TYPE_IB
;
1309 ib_sa_unpack_path(path_data
->path_rec
, &sa_path
);
1311 if (rdma_cap_opa_ah(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
)) {
1312 struct sa_path_rec opa
;
1314 sa_convert_path_ib_to_opa(&opa
, &sa_path
);
1315 mutex_lock(&ctx
->mutex
);
1316 ret
= rdma_set_ib_path(ctx
->cm_id
, &opa
);
1317 mutex_unlock(&ctx
->mutex
);
1319 mutex_lock(&ctx
->mutex
);
1320 ret
= rdma_set_ib_path(ctx
->cm_id
, &sa_path
);
1321 mutex_unlock(&ctx
->mutex
);
1326 memset(&event
, 0, sizeof event
);
1327 event
.event
= RDMA_CM_EVENT_ROUTE_RESOLVED
;
1328 return ucma_event_handler(ctx
->cm_id
, &event
);
1331 static int ucma_set_option_ib(struct ucma_context
*ctx
, int optname
,
1332 void *optval
, size_t optlen
)
1337 case RDMA_OPTION_IB_PATH
:
1338 ret
= ucma_set_ib_path(ctx
, optval
, optlen
);
1347 static int ucma_set_option_level(struct ucma_context
*ctx
, int level
,
1348 int optname
, void *optval
, size_t optlen
)
1353 case RDMA_OPTION_ID
:
1354 mutex_lock(&ctx
->mutex
);
1355 ret
= ucma_set_option_id(ctx
, optname
, optval
, optlen
);
1356 mutex_unlock(&ctx
->mutex
);
1358 case RDMA_OPTION_IB
:
1359 ret
= ucma_set_option_ib(ctx
, optname
, optval
, optlen
);
1368 static ssize_t
ucma_set_option(struct ucma_file
*file
, const char __user
*inbuf
,
1369 int in_len
, int out_len
)
1371 struct rdma_ucm_set_option cmd
;
1372 struct ucma_context
*ctx
;
1376 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1379 if (unlikely(cmd
.optlen
> KMALLOC_MAX_SIZE
))
1382 ctx
= ucma_get_ctx(file
, cmd
.id
);
1384 return PTR_ERR(ctx
);
1386 optval
= memdup_user(u64_to_user_ptr(cmd
.optval
),
1388 if (IS_ERR(optval
)) {
1389 ret
= PTR_ERR(optval
);
1393 ret
= ucma_set_option_level(ctx
, cmd
.level
, cmd
.optname
, optval
,
1402 static ssize_t
ucma_notify(struct ucma_file
*file
, const char __user
*inbuf
,
1403 int in_len
, int out_len
)
1405 struct rdma_ucm_notify cmd
;
1406 struct ucma_context
*ctx
;
1409 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1412 ctx
= ucma_get_ctx(file
, cmd
.id
);
1414 return PTR_ERR(ctx
);
1416 mutex_lock(&ctx
->mutex
);
1417 if (ctx
->cm_id
->device
)
1418 ret
= rdma_notify(ctx
->cm_id
, (enum ib_event_type
)cmd
.event
);
1419 mutex_unlock(&ctx
->mutex
);
1425 static ssize_t
ucma_process_join(struct ucma_file
*file
,
1426 struct rdma_ucm_join_mcast
*cmd
, int out_len
)
1428 struct rdma_ucm_create_id_resp resp
;
1429 struct ucma_context
*ctx
;
1430 struct ucma_multicast
*mc
;
1431 struct sockaddr
*addr
;
1435 if (out_len
< sizeof(resp
))
1438 addr
= (struct sockaddr
*) &cmd
->addr
;
1439 if (cmd
->addr_size
!= rdma_addr_size(addr
))
1442 if (cmd
->join_flags
== RDMA_MC_JOIN_FLAG_FULLMEMBER
)
1443 join_state
= BIT(FULLMEMBER_JOIN
);
1444 else if (cmd
->join_flags
== RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER
)
1445 join_state
= BIT(SENDONLY_FULLMEMBER_JOIN
);
1449 ctx
= ucma_get_ctx_dev(file
, cmd
->id
);
1451 return PTR_ERR(ctx
);
1453 mc
= kzalloc(sizeof(*mc
), GFP_KERNEL
);
1460 mc
->join_state
= join_state
;
1462 memcpy(&mc
->addr
, addr
, cmd
->addr_size
);
1464 if (xa_alloc(&multicast_table
, &mc
->id
, NULL
, xa_limit_32b
,
1470 mutex_lock(&ctx
->mutex
);
1471 ret
= rdma_join_multicast(ctx
->cm_id
, (struct sockaddr
*)&mc
->addr
,
1473 mutex_unlock(&ctx
->mutex
);
1478 if (copy_to_user(u64_to_user_ptr(cmd
->response
),
1479 &resp
, sizeof(resp
))) {
1481 goto err_leave_multicast
;
1484 xa_store(&multicast_table
, mc
->id
, mc
, 0);
1489 err_leave_multicast
:
1490 mutex_lock(&ctx
->mutex
);
1491 rdma_leave_multicast(ctx
->cm_id
, (struct sockaddr
*) &mc
->addr
);
1492 mutex_unlock(&ctx
->mutex
);
1493 ucma_cleanup_mc_events(mc
);
1495 xa_erase(&multicast_table
, mc
->id
);
1503 static ssize_t
ucma_join_ip_multicast(struct ucma_file
*file
,
1504 const char __user
*inbuf
,
1505 int in_len
, int out_len
)
1507 struct rdma_ucm_join_ip_mcast cmd
;
1508 struct rdma_ucm_join_mcast join_cmd
;
1510 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1513 join_cmd
.response
= cmd
.response
;
1514 join_cmd
.uid
= cmd
.uid
;
1515 join_cmd
.id
= cmd
.id
;
1516 join_cmd
.addr_size
= rdma_addr_size_in6(&cmd
.addr
);
1517 if (!join_cmd
.addr_size
)
1520 join_cmd
.join_flags
= RDMA_MC_JOIN_FLAG_FULLMEMBER
;
1521 memcpy(&join_cmd
.addr
, &cmd
.addr
, join_cmd
.addr_size
);
1523 return ucma_process_join(file
, &join_cmd
, out_len
);
1526 static ssize_t
ucma_join_multicast(struct ucma_file
*file
,
1527 const char __user
*inbuf
,
1528 int in_len
, int out_len
)
1530 struct rdma_ucm_join_mcast cmd
;
1532 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1535 if (!rdma_addr_size_kss(&cmd
.addr
))
1538 return ucma_process_join(file
, &cmd
, out_len
);
1541 static ssize_t
ucma_leave_multicast(struct ucma_file
*file
,
1542 const char __user
*inbuf
,
1543 int in_len
, int out_len
)
1545 struct rdma_ucm_destroy_id cmd
;
1546 struct rdma_ucm_destroy_id_resp resp
;
1547 struct ucma_multicast
*mc
;
1550 if (out_len
< sizeof(resp
))
1553 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1556 xa_lock(&multicast_table
);
1557 mc
= xa_load(&multicast_table
, cmd
.id
);
1559 mc
= ERR_PTR(-ENOENT
);
1560 else if (READ_ONCE(mc
->ctx
->file
) != file
)
1561 mc
= ERR_PTR(-EINVAL
);
1562 else if (!refcount_inc_not_zero(&mc
->ctx
->ref
))
1563 mc
= ERR_PTR(-ENXIO
);
1565 __xa_erase(&multicast_table
, mc
->id
);
1566 xa_unlock(&multicast_table
);
1573 mutex_lock(&mc
->ctx
->mutex
);
1574 rdma_leave_multicast(mc
->ctx
->cm_id
, (struct sockaddr
*) &mc
->addr
);
1575 mutex_unlock(&mc
->ctx
->mutex
);
1577 ucma_cleanup_mc_events(mc
);
1579 ucma_put_ctx(mc
->ctx
);
1580 resp
.events_reported
= mc
->events_reported
;
1583 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1584 &resp
, sizeof(resp
)))
1590 static ssize_t
ucma_migrate_id(struct ucma_file
*new_file
,
1591 const char __user
*inbuf
,
1592 int in_len
, int out_len
)
1594 struct rdma_ucm_migrate_id cmd
;
1595 struct rdma_ucm_migrate_resp resp
;
1596 struct ucma_event
*uevent
, *tmp
;
1597 struct ucma_context
*ctx
;
1598 LIST_HEAD(event_list
);
1600 struct ucma_file
*cur_file
;
1603 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1606 /* Get current fd to protect against it being closed */
1610 if (f
.file
->f_op
!= &ucma_fops
) {
1614 cur_file
= f
.file
->private_data
;
1616 /* Validate current fd and prevent destruction of id. */
1617 ctx
= ucma_get_ctx(cur_file
, cmd
.id
);
1623 rdma_lock_handler(ctx
->cm_id
);
1625 * ctx->file can only be changed under the handler & xa_lock. xa_load()
1626 * must be checked again to ensure the ctx hasn't begun destruction
1627 * since the ucma_get_ctx().
1629 xa_lock(&ctx_table
);
1630 if (_ucma_find_context(cmd
.id
, cur_file
) != ctx
) {
1631 xa_unlock(&ctx_table
);
1635 ctx
->file
= new_file
;
1636 xa_unlock(&ctx_table
);
1638 mutex_lock(&cur_file
->mut
);
1639 list_del(&ctx
->list
);
1641 * At this point lock_handler() prevents addition of new uevents for
1644 list_for_each_entry_safe(uevent
, tmp
, &cur_file
->event_list
, list
)
1645 if (uevent
->ctx
== ctx
)
1646 list_move_tail(&uevent
->list
, &event_list
);
1647 resp
.events_reported
= ctx
->events_reported
;
1648 mutex_unlock(&cur_file
->mut
);
1650 mutex_lock(&new_file
->mut
);
1651 list_add_tail(&ctx
->list
, &new_file
->ctx_list
);
1652 list_splice_tail(&event_list
, &new_file
->event_list
);
1653 mutex_unlock(&new_file
->mut
);
1655 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1656 &resp
, sizeof(resp
)))
1660 rdma_unlock_handler(ctx
->cm_id
);
1667 static ssize_t (*ucma_cmd_table
[])(struct ucma_file
*file
,
1668 const char __user
*inbuf
,
1669 int in_len
, int out_len
) = {
1670 [RDMA_USER_CM_CMD_CREATE_ID
] = ucma_create_id
,
1671 [RDMA_USER_CM_CMD_DESTROY_ID
] = ucma_destroy_id
,
1672 [RDMA_USER_CM_CMD_BIND_IP
] = ucma_bind_ip
,
1673 [RDMA_USER_CM_CMD_RESOLVE_IP
] = ucma_resolve_ip
,
1674 [RDMA_USER_CM_CMD_RESOLVE_ROUTE
] = ucma_resolve_route
,
1675 [RDMA_USER_CM_CMD_QUERY_ROUTE
] = ucma_query_route
,
1676 [RDMA_USER_CM_CMD_CONNECT
] = ucma_connect
,
1677 [RDMA_USER_CM_CMD_LISTEN
] = ucma_listen
,
1678 [RDMA_USER_CM_CMD_ACCEPT
] = ucma_accept
,
1679 [RDMA_USER_CM_CMD_REJECT
] = ucma_reject
,
1680 [RDMA_USER_CM_CMD_DISCONNECT
] = ucma_disconnect
,
1681 [RDMA_USER_CM_CMD_INIT_QP_ATTR
] = ucma_init_qp_attr
,
1682 [RDMA_USER_CM_CMD_GET_EVENT
] = ucma_get_event
,
1683 [RDMA_USER_CM_CMD_GET_OPTION
] = NULL
,
1684 [RDMA_USER_CM_CMD_SET_OPTION
] = ucma_set_option
,
1685 [RDMA_USER_CM_CMD_NOTIFY
] = ucma_notify
,
1686 [RDMA_USER_CM_CMD_JOIN_IP_MCAST
] = ucma_join_ip_multicast
,
1687 [RDMA_USER_CM_CMD_LEAVE_MCAST
] = ucma_leave_multicast
,
1688 [RDMA_USER_CM_CMD_MIGRATE_ID
] = ucma_migrate_id
,
1689 [RDMA_USER_CM_CMD_QUERY
] = ucma_query
,
1690 [RDMA_USER_CM_CMD_BIND
] = ucma_bind
,
1691 [RDMA_USER_CM_CMD_RESOLVE_ADDR
] = ucma_resolve_addr
,
1692 [RDMA_USER_CM_CMD_JOIN_MCAST
] = ucma_join_multicast
1695 static ssize_t
ucma_write(struct file
*filp
, const char __user
*buf
,
1696 size_t len
, loff_t
*pos
)
1698 struct ucma_file
*file
= filp
->private_data
;
1699 struct rdma_ucm_cmd_hdr hdr
;
1702 if (!ib_safe_file_access(filp
)) {
1703 pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
1704 task_tgid_vnr(current
), current
->comm
);
1708 if (len
< sizeof(hdr
))
1711 if (copy_from_user(&hdr
, buf
, sizeof(hdr
)))
1714 if (hdr
.cmd
>= ARRAY_SIZE(ucma_cmd_table
))
1716 hdr
.cmd
= array_index_nospec(hdr
.cmd
, ARRAY_SIZE(ucma_cmd_table
));
1718 if (hdr
.in
+ sizeof(hdr
) > len
)
1721 if (!ucma_cmd_table
[hdr
.cmd
])
1724 ret
= ucma_cmd_table
[hdr
.cmd
](file
, buf
+ sizeof(hdr
), hdr
.in
, hdr
.out
);
1731 static __poll_t
ucma_poll(struct file
*filp
, struct poll_table_struct
*wait
)
1733 struct ucma_file
*file
= filp
->private_data
;
1736 poll_wait(filp
, &file
->poll_wait
, wait
);
1738 if (!list_empty(&file
->event_list
))
1739 mask
= EPOLLIN
| EPOLLRDNORM
;
1745 * ucma_open() does not need the BKL:
1747 * - no global state is referred to;
1748 * - there is no ioctl method to race against;
1749 * - no further module initialization is required for open to work
1750 * after the device is registered.
1752 static int ucma_open(struct inode
*inode
, struct file
*filp
)
1754 struct ucma_file
*file
;
1756 file
= kmalloc(sizeof *file
, GFP_KERNEL
);
1760 INIT_LIST_HEAD(&file
->event_list
);
1761 INIT_LIST_HEAD(&file
->ctx_list
);
1762 init_waitqueue_head(&file
->poll_wait
);
1763 mutex_init(&file
->mut
);
1765 filp
->private_data
= file
;
1768 return stream_open(inode
, filp
);
1771 static int ucma_close(struct inode
*inode
, struct file
*filp
)
1773 struct ucma_file
*file
= filp
->private_data
;
1776 * All paths that touch ctx_list or ctx_list starting from write() are
1777 * prevented by this being a FD release function. The list_add_tail() in
1778 * ucma_connect_event_handler() can run concurrently, however it only
1779 * adds to the list *after* a listening ID. By only reading the first of
1780 * the list, and relying on __destroy_id() to block
1781 * ucma_connect_event_handler(), no additional locking is needed.
1783 while (!list_empty(&file
->ctx_list
)) {
1784 struct ucma_context
*ctx
= list_first_entry(
1785 &file
->ctx_list
, struct ucma_context
, list
);
1787 xa_erase(&ctx_table
, ctx
->id
);
1794 static const struct file_operations ucma_fops
= {
1795 .owner
= THIS_MODULE
,
1797 .release
= ucma_close
,
1798 .write
= ucma_write
,
1800 .llseek
= no_llseek
,
1803 static struct miscdevice ucma_misc
= {
1804 .minor
= MISC_DYNAMIC_MINOR
,
1806 .nodename
= "infiniband/rdma_cm",
1811 static int ucma_get_global_nl_info(struct ib_client_nl_info
*res
)
1813 res
->abi
= RDMA_USER_CM_ABI_VERSION
;
1814 res
->cdev
= ucma_misc
.this_device
;
1818 static struct ib_client rdma_cma_client
= {
1820 .get_global_nl_info
= ucma_get_global_nl_info
,
1822 MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
1824 static ssize_t
show_abi_version(struct device
*dev
,
1825 struct device_attribute
*attr
,
1828 return sysfs_emit(buf
, "%d\n", RDMA_USER_CM_ABI_VERSION
);
1830 static DEVICE_ATTR(abi_version
, S_IRUGO
, show_abi_version
, NULL
);
1832 static int __init
ucma_init(void)
1836 ret
= misc_register(&ucma_misc
);
1840 ret
= device_create_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1842 pr_err("rdma_ucm: couldn't create abi_version attr\n");
1846 ucma_ctl_table_hdr
= register_net_sysctl(&init_net
, "net/rdma_ucm", ucma_ctl_table
);
1847 if (!ucma_ctl_table_hdr
) {
1848 pr_err("rdma_ucm: couldn't register sysctl paths\n");
1853 ret
= ib_register_client(&rdma_cma_client
);
1859 unregister_net_sysctl_table(ucma_ctl_table_hdr
);
1861 device_remove_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1863 misc_deregister(&ucma_misc
);
1867 static void __exit
ucma_cleanup(void)
1869 ib_unregister_client(&rdma_cma_client
);
1870 unregister_net_sysctl_table(ucma_ctl_table_hdr
);
1871 device_remove_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1872 misc_deregister(&ucma_misc
);
1875 module_init(ucma_init
);
1876 module_exit(ucma_cleanup
);