2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/completion.h>
34 #include <linux/file.h>
35 #include <linux/mutex.h>
36 #include <linux/poll.h>
37 #include <linux/sched.h>
38 #include <linux/idr.h>
40 #include <linux/in6.h>
41 #include <linux/miscdevice.h>
42 #include <linux/slab.h>
43 #include <linux/sysctl.h>
44 #include <linux/module.h>
45 #include <linux/nsproxy.h>
47 #include <linux/nospec.h>
49 #include <rdma/rdma_user_cm.h>
50 #include <rdma/ib_marshall.h>
51 #include <rdma/rdma_cm.h>
52 #include <rdma/rdma_cm_ib.h>
53 #include <rdma/ib_addr.h>
55 #include <rdma/rdma_netlink.h>
56 #include "core_priv.h"
58 MODULE_AUTHOR("Sean Hefty");
59 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
60 MODULE_LICENSE("Dual BSD/GPL");
62 static unsigned int max_backlog
= 1024;
64 static struct ctl_table_header
*ucma_ctl_table_hdr
;
65 static struct ctl_table ucma_ctl_table
[] = {
67 .procname
= "max_backlog",
69 .maxlen
= sizeof max_backlog
,
71 .proc_handler
= proc_dointvec
,
79 struct list_head ctx_list
;
80 struct list_head event_list
;
81 wait_queue_head_t poll_wait
;
82 struct workqueue_struct
*close_wq
;
87 struct completion comp
;
92 struct ucma_file
*file
;
93 struct rdma_cm_id
*cm_id
;
97 struct list_head list
;
98 struct list_head mc_list
;
99 /* mark that device is in process of destroying the internal HW
100 * resources, protected by the ctx_table lock
103 /* sync between removal event and id destroy, protected by file mut */
105 struct work_struct close_work
;
108 struct ucma_multicast
{
109 struct ucma_context
*ctx
;
115 struct list_head list
;
116 struct sockaddr_storage addr
;
120 struct ucma_context
*ctx
;
121 struct ucma_multicast
*mc
;
122 struct list_head list
;
123 struct rdma_cm_id
*cm_id
;
124 struct rdma_ucm_event_resp resp
;
125 struct work_struct close_work
;
128 static DEFINE_XARRAY_ALLOC(ctx_table
);
129 static DEFINE_XARRAY_ALLOC(multicast_table
);
131 static const struct file_operations ucma_fops
;
133 static inline struct ucma_context
*_ucma_find_context(int id
,
134 struct ucma_file
*file
)
136 struct ucma_context
*ctx
;
138 ctx
= xa_load(&ctx_table
, id
);
140 ctx
= ERR_PTR(-ENOENT
);
141 else if (ctx
->file
!= file
|| !ctx
->cm_id
)
142 ctx
= ERR_PTR(-EINVAL
);
146 static struct ucma_context
*ucma_get_ctx(struct ucma_file
*file
, int id
)
148 struct ucma_context
*ctx
;
151 ctx
= _ucma_find_context(id
, file
);
156 refcount_inc(&ctx
->ref
);
158 xa_unlock(&ctx_table
);
162 static void ucma_put_ctx(struct ucma_context
*ctx
)
164 if (refcount_dec_and_test(&ctx
->ref
))
165 complete(&ctx
->comp
);
169 * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
172 static struct ucma_context
*ucma_get_ctx_dev(struct ucma_file
*file
, int id
)
174 struct ucma_context
*ctx
= ucma_get_ctx(file
, id
);
178 if (!ctx
->cm_id
->device
) {
180 return ERR_PTR(-EINVAL
);
185 static void ucma_close_event_id(struct work_struct
*work
)
187 struct ucma_event
*uevent_close
= container_of(work
, struct ucma_event
, close_work
);
189 rdma_destroy_id(uevent_close
->cm_id
);
193 static void ucma_close_id(struct work_struct
*work
)
195 struct ucma_context
*ctx
= container_of(work
, struct ucma_context
, close_work
);
197 /* once all inflight tasks are finished, we close all underlying
198 * resources. The context is still alive till its explicit destryoing
202 wait_for_completion(&ctx
->comp
);
203 /* No new events will be generated after destroying the id. */
204 rdma_destroy_id(ctx
->cm_id
);
207 static struct ucma_context
*ucma_alloc_ctx(struct ucma_file
*file
)
209 struct ucma_context
*ctx
;
211 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
215 INIT_WORK(&ctx
->close_work
, ucma_close_id
);
216 refcount_set(&ctx
->ref
, 1);
217 init_completion(&ctx
->comp
);
218 INIT_LIST_HEAD(&ctx
->mc_list
);
220 mutex_init(&ctx
->mutex
);
222 if (xa_alloc(&ctx_table
, &ctx
->id
, ctx
, xa_limit_32b
, GFP_KERNEL
))
225 list_add_tail(&ctx
->list
, &file
->ctx_list
);
233 static struct ucma_multicast
* ucma_alloc_multicast(struct ucma_context
*ctx
)
235 struct ucma_multicast
*mc
;
237 mc
= kzalloc(sizeof(*mc
), GFP_KERNEL
);
242 if (xa_alloc(&multicast_table
, &mc
->id
, NULL
, xa_limit_32b
, GFP_KERNEL
))
245 list_add_tail(&mc
->list
, &ctx
->mc_list
);
253 static void ucma_copy_conn_event(struct rdma_ucm_conn_param
*dst
,
254 struct rdma_conn_param
*src
)
256 if (src
->private_data_len
)
257 memcpy(dst
->private_data
, src
->private_data
,
258 src
->private_data_len
);
259 dst
->private_data_len
= src
->private_data_len
;
260 dst
->responder_resources
=src
->responder_resources
;
261 dst
->initiator_depth
= src
->initiator_depth
;
262 dst
->flow_control
= src
->flow_control
;
263 dst
->retry_count
= src
->retry_count
;
264 dst
->rnr_retry_count
= src
->rnr_retry_count
;
266 dst
->qp_num
= src
->qp_num
;
269 static void ucma_copy_ud_event(struct ib_device
*device
,
270 struct rdma_ucm_ud_param
*dst
,
271 struct rdma_ud_param
*src
)
273 if (src
->private_data_len
)
274 memcpy(dst
->private_data
, src
->private_data
,
275 src
->private_data_len
);
276 dst
->private_data_len
= src
->private_data_len
;
277 ib_copy_ah_attr_to_user(device
, &dst
->ah_attr
, &src
->ah_attr
);
278 dst
->qp_num
= src
->qp_num
;
279 dst
->qkey
= src
->qkey
;
282 static void ucma_set_event_context(struct ucma_context
*ctx
,
283 struct rdma_cm_event
*event
,
284 struct ucma_event
*uevent
)
287 switch (event
->event
) {
288 case RDMA_CM_EVENT_MULTICAST_JOIN
:
289 case RDMA_CM_EVENT_MULTICAST_ERROR
:
290 uevent
->mc
= (struct ucma_multicast
*)
291 event
->param
.ud
.private_data
;
292 uevent
->resp
.uid
= uevent
->mc
->uid
;
293 uevent
->resp
.id
= uevent
->mc
->id
;
296 uevent
->resp
.uid
= ctx
->uid
;
297 uevent
->resp
.id
= ctx
->id
;
302 /* Called with file->mut locked for the relevant context. */
303 static void ucma_removal_event_handler(struct rdma_cm_id
*cm_id
)
305 struct ucma_context
*ctx
= cm_id
->context
;
306 struct ucma_event
*con_req_eve
;
312 /* only if context is pointing to cm_id that it owns it and can be
313 * queued to be closed, otherwise that cm_id is an inflight one that
314 * is part of that context event list pending to be detached and
315 * reattached to its new context as part of ucma_get_event,
316 * handled separately below.
318 if (ctx
->cm_id
== cm_id
) {
321 xa_unlock(&ctx_table
);
322 queue_work(ctx
->file
->close_wq
, &ctx
->close_work
);
326 list_for_each_entry(con_req_eve
, &ctx
->file
->event_list
, list
) {
327 if (con_req_eve
->cm_id
== cm_id
&&
328 con_req_eve
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
) {
329 list_del(&con_req_eve
->list
);
330 INIT_WORK(&con_req_eve
->close_work
, ucma_close_event_id
);
331 queue_work(ctx
->file
->close_wq
, &con_req_eve
->close_work
);
337 pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
340 static int ucma_event_handler(struct rdma_cm_id
*cm_id
,
341 struct rdma_cm_event
*event
)
343 struct ucma_event
*uevent
;
344 struct ucma_context
*ctx
= cm_id
->context
;
347 uevent
= kzalloc(sizeof(*uevent
), GFP_KERNEL
);
349 return event
->event
== RDMA_CM_EVENT_CONNECT_REQUEST
;
351 mutex_lock(&ctx
->file
->mut
);
352 uevent
->cm_id
= cm_id
;
353 ucma_set_event_context(ctx
, event
, uevent
);
354 uevent
->resp
.event
= event
->event
;
355 uevent
->resp
.status
= event
->status
;
356 if (cm_id
->qp_type
== IB_QPT_UD
)
357 ucma_copy_ud_event(cm_id
->device
, &uevent
->resp
.param
.ud
,
360 ucma_copy_conn_event(&uevent
->resp
.param
.conn
,
363 if (event
->event
== RDMA_CM_EVENT_CONNECT_REQUEST
) {
370 } else if (!ctx
->uid
|| ctx
->cm_id
!= cm_id
) {
372 * We ignore events for new connections until userspace has set
373 * their context. This can only happen if an error occurs on a
374 * new connection before the user accepts it. This is okay,
375 * since the accept will just fail later. However, we do need
376 * to release the underlying HW resources in case of a device
379 if (event
->event
== RDMA_CM_EVENT_DEVICE_REMOVAL
)
380 ucma_removal_event_handler(cm_id
);
386 list_add_tail(&uevent
->list
, &ctx
->file
->event_list
);
387 wake_up_interruptible(&ctx
->file
->poll_wait
);
388 if (event
->event
== RDMA_CM_EVENT_DEVICE_REMOVAL
)
389 ucma_removal_event_handler(cm_id
);
391 mutex_unlock(&ctx
->file
->mut
);
395 static ssize_t
ucma_get_event(struct ucma_file
*file
, const char __user
*inbuf
,
396 int in_len
, int out_len
)
398 struct ucma_context
*ctx
;
399 struct rdma_ucm_get_event cmd
;
400 struct ucma_event
*uevent
;
404 * Old 32 bit user space does not send the 4 byte padding in the
405 * reserved field. We don't care, allow it to keep working.
407 if (out_len
< sizeof(uevent
->resp
) - sizeof(uevent
->resp
.reserved
))
410 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
413 mutex_lock(&file
->mut
);
414 while (list_empty(&file
->event_list
)) {
415 mutex_unlock(&file
->mut
);
417 if (file
->filp
->f_flags
& O_NONBLOCK
)
420 if (wait_event_interruptible(file
->poll_wait
,
421 !list_empty(&file
->event_list
)))
424 mutex_lock(&file
->mut
);
427 uevent
= list_entry(file
->event_list
.next
, struct ucma_event
, list
);
429 if (uevent
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
) {
430 ctx
= ucma_alloc_ctx(file
);
435 uevent
->ctx
->backlog
++;
436 ctx
->cm_id
= uevent
->cm_id
;
437 ctx
->cm_id
->context
= ctx
;
438 uevent
->resp
.id
= ctx
->id
;
441 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
443 min_t(size_t, out_len
, sizeof(uevent
->resp
)))) {
448 list_del(&uevent
->list
);
449 uevent
->ctx
->events_reported
++;
451 uevent
->mc
->events_reported
++;
454 mutex_unlock(&file
->mut
);
458 static int ucma_get_qp_type(struct rdma_ucm_create_id
*cmd
, enum ib_qp_type
*qp_type
)
462 *qp_type
= IB_QPT_RC
;
466 *qp_type
= IB_QPT_UD
;
469 *qp_type
= cmd
->qp_type
;
476 static ssize_t
ucma_create_id(struct ucma_file
*file
, const char __user
*inbuf
,
477 int in_len
, int out_len
)
479 struct rdma_ucm_create_id cmd
;
480 struct rdma_ucm_create_id_resp resp
;
481 struct ucma_context
*ctx
;
482 struct rdma_cm_id
*cm_id
;
483 enum ib_qp_type qp_type
;
486 if (out_len
< sizeof(resp
))
489 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
492 ret
= ucma_get_qp_type(&cmd
, &qp_type
);
496 mutex_lock(&file
->mut
);
497 ctx
= ucma_alloc_ctx(file
);
498 mutex_unlock(&file
->mut
);
503 cm_id
= __rdma_create_id(current
->nsproxy
->net_ns
,
504 ucma_event_handler
, ctx
, cmd
.ps
, qp_type
, NULL
);
506 ret
= PTR_ERR(cm_id
);
511 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
512 &resp
, sizeof(resp
))) {
521 rdma_destroy_id(cm_id
);
523 xa_erase(&ctx_table
, ctx
->id
);
524 mutex_lock(&file
->mut
);
525 list_del(&ctx
->list
);
526 mutex_unlock(&file
->mut
);
531 static void ucma_cleanup_multicast(struct ucma_context
*ctx
)
533 struct ucma_multicast
*mc
, *tmp
;
535 mutex_lock(&ctx
->file
->mut
);
536 list_for_each_entry_safe(mc
, tmp
, &ctx
->mc_list
, list
) {
538 xa_erase(&multicast_table
, mc
->id
);
541 mutex_unlock(&ctx
->file
->mut
);
544 static void ucma_cleanup_mc_events(struct ucma_multicast
*mc
)
546 struct ucma_event
*uevent
, *tmp
;
548 list_for_each_entry_safe(uevent
, tmp
, &mc
->ctx
->file
->event_list
, list
) {
549 if (uevent
->mc
!= mc
)
552 list_del(&uevent
->list
);
558 * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
559 * this point, no new events will be reported from the hardware. However, we
560 * still need to cleanup the UCMA context for this ID. Specifically, there
561 * might be events that have not yet been consumed by the user space software.
562 * These might include pending connect requests which we have not completed
563 * processing. We cannot call rdma_destroy_id while holding the lock of the
564 * context (file->mut), as it might cause a deadlock. We therefore extract all
565 * relevant events from the context pending events list while holding the
566 * mutex. After that we release them as needed.
568 static int ucma_free_ctx(struct ucma_context
*ctx
)
571 struct ucma_event
*uevent
, *tmp
;
575 ucma_cleanup_multicast(ctx
);
577 /* Cleanup events not yet reported to the user. */
578 mutex_lock(&ctx
->file
->mut
);
579 list_for_each_entry_safe(uevent
, tmp
, &ctx
->file
->event_list
, list
) {
580 if (uevent
->ctx
== ctx
)
581 list_move_tail(&uevent
->list
, &list
);
583 list_del(&ctx
->list
);
584 mutex_unlock(&ctx
->file
->mut
);
586 list_for_each_entry_safe(uevent
, tmp
, &list
, list
) {
587 list_del(&uevent
->list
);
588 if (uevent
->resp
.event
== RDMA_CM_EVENT_CONNECT_REQUEST
)
589 rdma_destroy_id(uevent
->cm_id
);
593 events_reported
= ctx
->events_reported
;
594 mutex_destroy(&ctx
->mutex
);
596 return events_reported
;
599 static ssize_t
ucma_destroy_id(struct ucma_file
*file
, const char __user
*inbuf
,
600 int in_len
, int out_len
)
602 struct rdma_ucm_destroy_id cmd
;
603 struct rdma_ucm_destroy_id_resp resp
;
604 struct ucma_context
*ctx
;
607 if (out_len
< sizeof(resp
))
610 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
614 ctx
= _ucma_find_context(cmd
.id
, file
);
616 __xa_erase(&ctx_table
, ctx
->id
);
617 xa_unlock(&ctx_table
);
622 mutex_lock(&ctx
->file
->mut
);
624 mutex_unlock(&ctx
->file
->mut
);
626 flush_workqueue(ctx
->file
->close_wq
);
627 /* At this point it's guaranteed that there is no inflight
631 xa_unlock(&ctx_table
);
633 wait_for_completion(&ctx
->comp
);
634 rdma_destroy_id(ctx
->cm_id
);
636 xa_unlock(&ctx_table
);
639 resp
.events_reported
= ucma_free_ctx(ctx
);
640 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
641 &resp
, sizeof(resp
)))
647 static ssize_t
ucma_bind_ip(struct ucma_file
*file
, const char __user
*inbuf
,
648 int in_len
, int out_len
)
650 struct rdma_ucm_bind_ip cmd
;
651 struct ucma_context
*ctx
;
654 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
657 if (!rdma_addr_size_in6(&cmd
.addr
))
660 ctx
= ucma_get_ctx(file
, cmd
.id
);
664 mutex_lock(&ctx
->mutex
);
665 ret
= rdma_bind_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.addr
);
666 mutex_unlock(&ctx
->mutex
);
672 static ssize_t
ucma_bind(struct ucma_file
*file
, const char __user
*inbuf
,
673 int in_len
, int out_len
)
675 struct rdma_ucm_bind cmd
;
676 struct ucma_context
*ctx
;
679 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
682 if (cmd
.reserved
|| !cmd
.addr_size
||
683 cmd
.addr_size
!= rdma_addr_size_kss(&cmd
.addr
))
686 ctx
= ucma_get_ctx(file
, cmd
.id
);
690 mutex_lock(&ctx
->mutex
);
691 ret
= rdma_bind_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.addr
);
692 mutex_unlock(&ctx
->mutex
);
697 static ssize_t
ucma_resolve_ip(struct ucma_file
*file
,
698 const char __user
*inbuf
,
699 int in_len
, int out_len
)
701 struct rdma_ucm_resolve_ip cmd
;
702 struct ucma_context
*ctx
;
705 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
708 if ((cmd
.src_addr
.sin6_family
&& !rdma_addr_size_in6(&cmd
.src_addr
)) ||
709 !rdma_addr_size_in6(&cmd
.dst_addr
))
712 ctx
= ucma_get_ctx(file
, cmd
.id
);
716 mutex_lock(&ctx
->mutex
);
717 ret
= rdma_resolve_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.src_addr
,
718 (struct sockaddr
*) &cmd
.dst_addr
, cmd
.timeout_ms
);
719 mutex_unlock(&ctx
->mutex
);
724 static ssize_t
ucma_resolve_addr(struct ucma_file
*file
,
725 const char __user
*inbuf
,
726 int in_len
, int out_len
)
728 struct rdma_ucm_resolve_addr cmd
;
729 struct ucma_context
*ctx
;
732 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
736 (cmd
.src_size
&& (cmd
.src_size
!= rdma_addr_size_kss(&cmd
.src_addr
))) ||
737 !cmd
.dst_size
|| (cmd
.dst_size
!= rdma_addr_size_kss(&cmd
.dst_addr
)))
740 ctx
= ucma_get_ctx(file
, cmd
.id
);
744 mutex_lock(&ctx
->mutex
);
745 ret
= rdma_resolve_addr(ctx
->cm_id
, (struct sockaddr
*) &cmd
.src_addr
,
746 (struct sockaddr
*) &cmd
.dst_addr
, cmd
.timeout_ms
);
747 mutex_unlock(&ctx
->mutex
);
752 static ssize_t
ucma_resolve_route(struct ucma_file
*file
,
753 const char __user
*inbuf
,
754 int in_len
, int out_len
)
756 struct rdma_ucm_resolve_route cmd
;
757 struct ucma_context
*ctx
;
760 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
763 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
767 mutex_lock(&ctx
->mutex
);
768 ret
= rdma_resolve_route(ctx
->cm_id
, cmd
.timeout_ms
);
769 mutex_unlock(&ctx
->mutex
);
774 static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp
*resp
,
775 struct rdma_route
*route
)
777 struct rdma_dev_addr
*dev_addr
;
779 resp
->num_paths
= route
->num_paths
;
780 switch (route
->num_paths
) {
782 dev_addr
= &route
->addr
.dev_addr
;
783 rdma_addr_get_dgid(dev_addr
,
784 (union ib_gid
*) &resp
->ib_route
[0].dgid
);
785 rdma_addr_get_sgid(dev_addr
,
786 (union ib_gid
*) &resp
->ib_route
[0].sgid
);
787 resp
->ib_route
[0].pkey
= cpu_to_be16(ib_addr_get_pkey(dev_addr
));
790 ib_copy_path_rec_to_user(&resp
->ib_route
[1],
791 &route
->path_rec
[1]);
794 ib_copy_path_rec_to_user(&resp
->ib_route
[0],
795 &route
->path_rec
[0]);
802 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp
*resp
,
803 struct rdma_route
*route
)
806 resp
->num_paths
= route
->num_paths
;
807 switch (route
->num_paths
) {
809 rdma_ip2gid((struct sockaddr
*)&route
->addr
.dst_addr
,
810 (union ib_gid
*)&resp
->ib_route
[0].dgid
);
811 rdma_ip2gid((struct sockaddr
*)&route
->addr
.src_addr
,
812 (union ib_gid
*)&resp
->ib_route
[0].sgid
);
813 resp
->ib_route
[0].pkey
= cpu_to_be16(0xffff);
816 ib_copy_path_rec_to_user(&resp
->ib_route
[1],
817 &route
->path_rec
[1]);
820 ib_copy_path_rec_to_user(&resp
->ib_route
[0],
821 &route
->path_rec
[0]);
828 static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp
*resp
,
829 struct rdma_route
*route
)
831 struct rdma_dev_addr
*dev_addr
;
833 dev_addr
= &route
->addr
.dev_addr
;
834 rdma_addr_get_dgid(dev_addr
, (union ib_gid
*) &resp
->ib_route
[0].dgid
);
835 rdma_addr_get_sgid(dev_addr
, (union ib_gid
*) &resp
->ib_route
[0].sgid
);
838 static ssize_t
ucma_query_route(struct ucma_file
*file
,
839 const char __user
*inbuf
,
840 int in_len
, int out_len
)
842 struct rdma_ucm_query cmd
;
843 struct rdma_ucm_query_route_resp resp
;
844 struct ucma_context
*ctx
;
845 struct sockaddr
*addr
;
848 if (out_len
< sizeof(resp
))
851 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
854 ctx
= ucma_get_ctx(file
, cmd
.id
);
858 mutex_lock(&ctx
->mutex
);
859 memset(&resp
, 0, sizeof resp
);
860 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.src_addr
;
861 memcpy(&resp
.src_addr
, addr
, addr
->sa_family
== AF_INET
?
862 sizeof(struct sockaddr_in
) :
863 sizeof(struct sockaddr_in6
));
864 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.dst_addr
;
865 memcpy(&resp
.dst_addr
, addr
, addr
->sa_family
== AF_INET
?
866 sizeof(struct sockaddr_in
) :
867 sizeof(struct sockaddr_in6
));
868 if (!ctx
->cm_id
->device
)
871 resp
.node_guid
= (__force __u64
) ctx
->cm_id
->device
->node_guid
;
872 resp
.port_num
= ctx
->cm_id
->port_num
;
874 if (rdma_cap_ib_sa(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
875 ucma_copy_ib_route(&resp
, &ctx
->cm_id
->route
);
876 else if (rdma_protocol_roce(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
877 ucma_copy_iboe_route(&resp
, &ctx
->cm_id
->route
);
878 else if (rdma_protocol_iwarp(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
))
879 ucma_copy_iw_route(&resp
, &ctx
->cm_id
->route
);
882 mutex_unlock(&ctx
->mutex
);
883 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
884 &resp
, sizeof(resp
)))
891 static void ucma_query_device_addr(struct rdma_cm_id
*cm_id
,
892 struct rdma_ucm_query_addr_resp
*resp
)
897 resp
->node_guid
= (__force __u64
) cm_id
->device
->node_guid
;
898 resp
->port_num
= cm_id
->port_num
;
899 resp
->pkey
= (__force __u16
) cpu_to_be16(
900 ib_addr_get_pkey(&cm_id
->route
.addr
.dev_addr
));
903 static ssize_t
ucma_query_addr(struct ucma_context
*ctx
,
904 void __user
*response
, int out_len
)
906 struct rdma_ucm_query_addr_resp resp
;
907 struct sockaddr
*addr
;
910 if (out_len
< sizeof(resp
))
913 memset(&resp
, 0, sizeof resp
);
915 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.src_addr
;
916 resp
.src_size
= rdma_addr_size(addr
);
917 memcpy(&resp
.src_addr
, addr
, resp
.src_size
);
919 addr
= (struct sockaddr
*) &ctx
->cm_id
->route
.addr
.dst_addr
;
920 resp
.dst_size
= rdma_addr_size(addr
);
921 memcpy(&resp
.dst_addr
, addr
, resp
.dst_size
);
923 ucma_query_device_addr(ctx
->cm_id
, &resp
);
925 if (copy_to_user(response
, &resp
, sizeof(resp
)))
931 static ssize_t
ucma_query_path(struct ucma_context
*ctx
,
932 void __user
*response
, int out_len
)
934 struct rdma_ucm_query_path_resp
*resp
;
937 if (out_len
< sizeof(*resp
))
940 resp
= kzalloc(out_len
, GFP_KERNEL
);
944 resp
->num_paths
= ctx
->cm_id
->route
.num_paths
;
945 for (i
= 0, out_len
-= sizeof(*resp
);
946 i
< resp
->num_paths
&& out_len
> sizeof(struct ib_path_rec_data
);
947 i
++, out_len
-= sizeof(struct ib_path_rec_data
)) {
948 struct sa_path_rec
*rec
= &ctx
->cm_id
->route
.path_rec
[i
];
950 resp
->path_data
[i
].flags
= IB_PATH_GMP
| IB_PATH_PRIMARY
|
951 IB_PATH_BIDIRECTIONAL
;
952 if (rec
->rec_type
== SA_PATH_REC_TYPE_OPA
) {
953 struct sa_path_rec ib
;
955 sa_convert_path_opa_to_ib(&ib
, rec
);
956 ib_sa_pack_path(&ib
, &resp
->path_data
[i
].path_rec
);
959 ib_sa_pack_path(rec
, &resp
->path_data
[i
].path_rec
);
963 if (copy_to_user(response
, resp
, struct_size(resp
, path_data
, i
)))
970 static ssize_t
ucma_query_gid(struct ucma_context
*ctx
,
971 void __user
*response
, int out_len
)
973 struct rdma_ucm_query_addr_resp resp
;
974 struct sockaddr_ib
*addr
;
977 if (out_len
< sizeof(resp
))
980 memset(&resp
, 0, sizeof resp
);
982 ucma_query_device_addr(ctx
->cm_id
, &resp
);
984 addr
= (struct sockaddr_ib
*) &resp
.src_addr
;
985 resp
.src_size
= sizeof(*addr
);
986 if (ctx
->cm_id
->route
.addr
.src_addr
.ss_family
== AF_IB
) {
987 memcpy(addr
, &ctx
->cm_id
->route
.addr
.src_addr
, resp
.src_size
);
989 addr
->sib_family
= AF_IB
;
990 addr
->sib_pkey
= (__force __be16
) resp
.pkey
;
991 rdma_read_gids(ctx
->cm_id
, (union ib_gid
*)&addr
->sib_addr
,
993 addr
->sib_sid
= rdma_get_service_id(ctx
->cm_id
, (struct sockaddr
*)
994 &ctx
->cm_id
->route
.addr
.src_addr
);
997 addr
= (struct sockaddr_ib
*) &resp
.dst_addr
;
998 resp
.dst_size
= sizeof(*addr
);
999 if (ctx
->cm_id
->route
.addr
.dst_addr
.ss_family
== AF_IB
) {
1000 memcpy(addr
, &ctx
->cm_id
->route
.addr
.dst_addr
, resp
.dst_size
);
1002 addr
->sib_family
= AF_IB
;
1003 addr
->sib_pkey
= (__force __be16
) resp
.pkey
;
1004 rdma_read_gids(ctx
->cm_id
, NULL
,
1005 (union ib_gid
*)&addr
->sib_addr
);
1006 addr
->sib_sid
= rdma_get_service_id(ctx
->cm_id
, (struct sockaddr
*)
1007 &ctx
->cm_id
->route
.addr
.dst_addr
);
1010 if (copy_to_user(response
, &resp
, sizeof(resp
)))
1016 static ssize_t
ucma_query(struct ucma_file
*file
,
1017 const char __user
*inbuf
,
1018 int in_len
, int out_len
)
1020 struct rdma_ucm_query cmd
;
1021 struct ucma_context
*ctx
;
1022 void __user
*response
;
1025 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1028 response
= u64_to_user_ptr(cmd
.response
);
1029 ctx
= ucma_get_ctx(file
, cmd
.id
);
1031 return PTR_ERR(ctx
);
1033 mutex_lock(&ctx
->mutex
);
1034 switch (cmd
.option
) {
1035 case RDMA_USER_CM_QUERY_ADDR
:
1036 ret
= ucma_query_addr(ctx
, response
, out_len
);
1038 case RDMA_USER_CM_QUERY_PATH
:
1039 ret
= ucma_query_path(ctx
, response
, out_len
);
1041 case RDMA_USER_CM_QUERY_GID
:
1042 ret
= ucma_query_gid(ctx
, response
, out_len
);
1048 mutex_unlock(&ctx
->mutex
);
1054 static void ucma_copy_conn_param(struct rdma_cm_id
*id
,
1055 struct rdma_conn_param
*dst
,
1056 struct rdma_ucm_conn_param
*src
)
1058 dst
->private_data
= src
->private_data
;
1059 dst
->private_data_len
= src
->private_data_len
;
1060 dst
->responder_resources
=src
->responder_resources
;
1061 dst
->initiator_depth
= src
->initiator_depth
;
1062 dst
->flow_control
= src
->flow_control
;
1063 dst
->retry_count
= src
->retry_count
;
1064 dst
->rnr_retry_count
= src
->rnr_retry_count
;
1065 dst
->srq
= src
->srq
;
1066 dst
->qp_num
= src
->qp_num
& 0xFFFFFF;
1067 dst
->qkey
= (id
->route
.addr
.src_addr
.ss_family
== AF_IB
) ? src
->qkey
: 0;
1070 static ssize_t
ucma_connect(struct ucma_file
*file
, const char __user
*inbuf
,
1071 int in_len
, int out_len
)
1073 struct rdma_ucm_connect cmd
;
1074 struct rdma_conn_param conn_param
;
1075 struct ucma_context
*ctx
;
1078 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1081 if (!cmd
.conn_param
.valid
)
1084 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1086 return PTR_ERR(ctx
);
1088 ucma_copy_conn_param(ctx
->cm_id
, &conn_param
, &cmd
.conn_param
);
1089 mutex_lock(&ctx
->mutex
);
1090 ret
= rdma_connect(ctx
->cm_id
, &conn_param
);
1091 mutex_unlock(&ctx
->mutex
);
1096 static ssize_t
ucma_listen(struct ucma_file
*file
, const char __user
*inbuf
,
1097 int in_len
, int out_len
)
1099 struct rdma_ucm_listen cmd
;
1100 struct ucma_context
*ctx
;
1103 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1106 ctx
= ucma_get_ctx(file
, cmd
.id
);
1108 return PTR_ERR(ctx
);
1110 ctx
->backlog
= cmd
.backlog
> 0 && cmd
.backlog
< max_backlog
?
1111 cmd
.backlog
: max_backlog
;
1112 mutex_lock(&ctx
->mutex
);
1113 ret
= rdma_listen(ctx
->cm_id
, ctx
->backlog
);
1114 mutex_unlock(&ctx
->mutex
);
1119 static ssize_t
ucma_accept(struct ucma_file
*file
, const char __user
*inbuf
,
1120 int in_len
, int out_len
)
1122 struct rdma_ucm_accept cmd
;
1123 struct rdma_conn_param conn_param
;
1124 struct ucma_context
*ctx
;
1127 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1130 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1132 return PTR_ERR(ctx
);
1134 if (cmd
.conn_param
.valid
) {
1135 ucma_copy_conn_param(ctx
->cm_id
, &conn_param
, &cmd
.conn_param
);
1136 mutex_lock(&file
->mut
);
1137 mutex_lock(&ctx
->mutex
);
1138 ret
= __rdma_accept(ctx
->cm_id
, &conn_param
, NULL
);
1139 mutex_unlock(&ctx
->mutex
);
1142 mutex_unlock(&file
->mut
);
1144 mutex_lock(&ctx
->mutex
);
1145 ret
= __rdma_accept(ctx
->cm_id
, NULL
, NULL
);
1146 mutex_unlock(&ctx
->mutex
);
1152 static ssize_t
ucma_reject(struct ucma_file
*file
, const char __user
*inbuf
,
1153 int in_len
, int out_len
)
1155 struct rdma_ucm_reject cmd
;
1156 struct ucma_context
*ctx
;
1159 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1162 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1164 return PTR_ERR(ctx
);
1166 mutex_lock(&ctx
->mutex
);
1167 ret
= rdma_reject(ctx
->cm_id
, cmd
.private_data
, cmd
.private_data_len
);
1168 mutex_unlock(&ctx
->mutex
);
1173 static ssize_t
ucma_disconnect(struct ucma_file
*file
, const char __user
*inbuf
,
1174 int in_len
, int out_len
)
1176 struct rdma_ucm_disconnect cmd
;
1177 struct ucma_context
*ctx
;
1180 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1183 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1185 return PTR_ERR(ctx
);
1187 mutex_lock(&ctx
->mutex
);
1188 ret
= rdma_disconnect(ctx
->cm_id
);
1189 mutex_unlock(&ctx
->mutex
);
1194 static ssize_t
ucma_init_qp_attr(struct ucma_file
*file
,
1195 const char __user
*inbuf
,
1196 int in_len
, int out_len
)
1198 struct rdma_ucm_init_qp_attr cmd
;
1199 struct ib_uverbs_qp_attr resp
;
1200 struct ucma_context
*ctx
;
1201 struct ib_qp_attr qp_attr
;
1204 if (out_len
< sizeof(resp
))
1207 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1210 if (cmd
.qp_state
> IB_QPS_ERR
)
1213 ctx
= ucma_get_ctx_dev(file
, cmd
.id
);
1215 return PTR_ERR(ctx
);
1217 resp
.qp_attr_mask
= 0;
1218 memset(&qp_attr
, 0, sizeof qp_attr
);
1219 qp_attr
.qp_state
= cmd
.qp_state
;
1220 mutex_lock(&ctx
->mutex
);
1221 ret
= rdma_init_qp_attr(ctx
->cm_id
, &qp_attr
, &resp
.qp_attr_mask
);
1222 mutex_unlock(&ctx
->mutex
);
1226 ib_copy_qp_attr_to_user(ctx
->cm_id
->device
, &resp
, &qp_attr
);
1227 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1228 &resp
, sizeof(resp
)))
1236 static int ucma_set_option_id(struct ucma_context
*ctx
, int optname
,
1237 void *optval
, size_t optlen
)
1242 case RDMA_OPTION_ID_TOS
:
1243 if (optlen
!= sizeof(u8
)) {
1247 rdma_set_service_type(ctx
->cm_id
, *((u8
*) optval
));
1249 case RDMA_OPTION_ID_REUSEADDR
:
1250 if (optlen
!= sizeof(int)) {
1254 ret
= rdma_set_reuseaddr(ctx
->cm_id
, *((int *) optval
) ? 1 : 0);
1256 case RDMA_OPTION_ID_AFONLY
:
1257 if (optlen
!= sizeof(int)) {
1261 ret
= rdma_set_afonly(ctx
->cm_id
, *((int *) optval
) ? 1 : 0);
1263 case RDMA_OPTION_ID_ACK_TIMEOUT
:
1264 if (optlen
!= sizeof(u8
)) {
1268 ret
= rdma_set_ack_timeout(ctx
->cm_id
, *((u8
*)optval
));
1277 static int ucma_set_ib_path(struct ucma_context
*ctx
,
1278 struct ib_path_rec_data
*path_data
, size_t optlen
)
1280 struct sa_path_rec sa_path
;
1281 struct rdma_cm_event event
;
1284 if (optlen
% sizeof(*path_data
))
1287 for (; optlen
; optlen
-= sizeof(*path_data
), path_data
++) {
1288 if (path_data
->flags
== (IB_PATH_GMP
| IB_PATH_PRIMARY
|
1289 IB_PATH_BIDIRECTIONAL
))
1296 if (!ctx
->cm_id
->device
)
1299 memset(&sa_path
, 0, sizeof(sa_path
));
1301 sa_path
.rec_type
= SA_PATH_REC_TYPE_IB
;
1302 ib_sa_unpack_path(path_data
->path_rec
, &sa_path
);
1304 if (rdma_cap_opa_ah(ctx
->cm_id
->device
, ctx
->cm_id
->port_num
)) {
1305 struct sa_path_rec opa
;
1307 sa_convert_path_ib_to_opa(&opa
, &sa_path
);
1308 mutex_lock(&ctx
->mutex
);
1309 ret
= rdma_set_ib_path(ctx
->cm_id
, &opa
);
1310 mutex_unlock(&ctx
->mutex
);
1312 mutex_lock(&ctx
->mutex
);
1313 ret
= rdma_set_ib_path(ctx
->cm_id
, &sa_path
);
1314 mutex_unlock(&ctx
->mutex
);
1319 memset(&event
, 0, sizeof event
);
1320 event
.event
= RDMA_CM_EVENT_ROUTE_RESOLVED
;
1321 return ucma_event_handler(ctx
->cm_id
, &event
);
1324 static int ucma_set_option_ib(struct ucma_context
*ctx
, int optname
,
1325 void *optval
, size_t optlen
)
1330 case RDMA_OPTION_IB_PATH
:
1331 ret
= ucma_set_ib_path(ctx
, optval
, optlen
);
1340 static int ucma_set_option_level(struct ucma_context
*ctx
, int level
,
1341 int optname
, void *optval
, size_t optlen
)
1346 case RDMA_OPTION_ID
:
1347 mutex_lock(&ctx
->mutex
);
1348 ret
= ucma_set_option_id(ctx
, optname
, optval
, optlen
);
1349 mutex_unlock(&ctx
->mutex
);
1351 case RDMA_OPTION_IB
:
1352 ret
= ucma_set_option_ib(ctx
, optname
, optval
, optlen
);
1361 static ssize_t
ucma_set_option(struct ucma_file
*file
, const char __user
*inbuf
,
1362 int in_len
, int out_len
)
1364 struct rdma_ucm_set_option cmd
;
1365 struct ucma_context
*ctx
;
1369 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1372 if (unlikely(cmd
.optlen
> KMALLOC_MAX_SIZE
))
1375 ctx
= ucma_get_ctx(file
, cmd
.id
);
1377 return PTR_ERR(ctx
);
1379 optval
= memdup_user(u64_to_user_ptr(cmd
.optval
),
1381 if (IS_ERR(optval
)) {
1382 ret
= PTR_ERR(optval
);
1386 ret
= ucma_set_option_level(ctx
, cmd
.level
, cmd
.optname
, optval
,
1395 static ssize_t
ucma_notify(struct ucma_file
*file
, const char __user
*inbuf
,
1396 int in_len
, int out_len
)
1398 struct rdma_ucm_notify cmd
;
1399 struct ucma_context
*ctx
;
1402 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1405 ctx
= ucma_get_ctx(file
, cmd
.id
);
1407 return PTR_ERR(ctx
);
1409 mutex_lock(&ctx
->mutex
);
1410 if (ctx
->cm_id
->device
)
1411 ret
= rdma_notify(ctx
->cm_id
, (enum ib_event_type
)cmd
.event
);
1412 mutex_unlock(&ctx
->mutex
);
1418 static ssize_t
ucma_process_join(struct ucma_file
*file
,
1419 struct rdma_ucm_join_mcast
*cmd
, int out_len
)
1421 struct rdma_ucm_create_id_resp resp
;
1422 struct ucma_context
*ctx
;
1423 struct ucma_multicast
*mc
;
1424 struct sockaddr
*addr
;
1428 if (out_len
< sizeof(resp
))
1431 addr
= (struct sockaddr
*) &cmd
->addr
;
1432 if (cmd
->addr_size
!= rdma_addr_size(addr
))
1435 if (cmd
->join_flags
== RDMA_MC_JOIN_FLAG_FULLMEMBER
)
1436 join_state
= BIT(FULLMEMBER_JOIN
);
1437 else if (cmd
->join_flags
== RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER
)
1438 join_state
= BIT(SENDONLY_FULLMEMBER_JOIN
);
1442 ctx
= ucma_get_ctx_dev(file
, cmd
->id
);
1444 return PTR_ERR(ctx
);
1446 mutex_lock(&file
->mut
);
1447 mc
= ucma_alloc_multicast(ctx
);
1452 mc
->join_state
= join_state
;
1454 memcpy(&mc
->addr
, addr
, cmd
->addr_size
);
1455 mutex_lock(&ctx
->mutex
);
1456 ret
= rdma_join_multicast(ctx
->cm_id
, (struct sockaddr
*)&mc
->addr
,
1458 mutex_unlock(&ctx
->mutex
);
1463 if (copy_to_user(u64_to_user_ptr(cmd
->response
),
1464 &resp
, sizeof(resp
))) {
1469 xa_store(&multicast_table
, mc
->id
, mc
, 0);
1471 mutex_unlock(&file
->mut
);
1476 rdma_leave_multicast(ctx
->cm_id
, (struct sockaddr
*) &mc
->addr
);
1477 ucma_cleanup_mc_events(mc
);
1479 xa_erase(&multicast_table
, mc
->id
);
1480 list_del(&mc
->list
);
1483 mutex_unlock(&file
->mut
);
1488 static ssize_t
ucma_join_ip_multicast(struct ucma_file
*file
,
1489 const char __user
*inbuf
,
1490 int in_len
, int out_len
)
1492 struct rdma_ucm_join_ip_mcast cmd
;
1493 struct rdma_ucm_join_mcast join_cmd
;
1495 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1498 join_cmd
.response
= cmd
.response
;
1499 join_cmd
.uid
= cmd
.uid
;
1500 join_cmd
.id
= cmd
.id
;
1501 join_cmd
.addr_size
= rdma_addr_size_in6(&cmd
.addr
);
1502 if (!join_cmd
.addr_size
)
1505 join_cmd
.join_flags
= RDMA_MC_JOIN_FLAG_FULLMEMBER
;
1506 memcpy(&join_cmd
.addr
, &cmd
.addr
, join_cmd
.addr_size
);
1508 return ucma_process_join(file
, &join_cmd
, out_len
);
1511 static ssize_t
ucma_join_multicast(struct ucma_file
*file
,
1512 const char __user
*inbuf
,
1513 int in_len
, int out_len
)
1515 struct rdma_ucm_join_mcast cmd
;
1517 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1520 if (!rdma_addr_size_kss(&cmd
.addr
))
1523 return ucma_process_join(file
, &cmd
, out_len
);
1526 static ssize_t
ucma_leave_multicast(struct ucma_file
*file
,
1527 const char __user
*inbuf
,
1528 int in_len
, int out_len
)
1530 struct rdma_ucm_destroy_id cmd
;
1531 struct rdma_ucm_destroy_id_resp resp
;
1532 struct ucma_multicast
*mc
;
1535 if (out_len
< sizeof(resp
))
1538 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1541 xa_lock(&multicast_table
);
1542 mc
= xa_load(&multicast_table
, cmd
.id
);
1544 mc
= ERR_PTR(-ENOENT
);
1545 else if (mc
->ctx
->file
!= file
)
1546 mc
= ERR_PTR(-EINVAL
);
1547 else if (!refcount_inc_not_zero(&mc
->ctx
->ref
))
1548 mc
= ERR_PTR(-ENXIO
);
1550 __xa_erase(&multicast_table
, mc
->id
);
1551 xa_unlock(&multicast_table
);
1558 mutex_lock(&mc
->ctx
->mutex
);
1559 rdma_leave_multicast(mc
->ctx
->cm_id
, (struct sockaddr
*) &mc
->addr
);
1560 mutex_unlock(&mc
->ctx
->mutex
);
1562 mutex_lock(&mc
->ctx
->file
->mut
);
1563 ucma_cleanup_mc_events(mc
);
1564 list_del(&mc
->list
);
1565 mutex_unlock(&mc
->ctx
->file
->mut
);
1567 ucma_put_ctx(mc
->ctx
);
1568 resp
.events_reported
= mc
->events_reported
;
1571 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1572 &resp
, sizeof(resp
)))
1578 static void ucma_lock_files(struct ucma_file
*file1
, struct ucma_file
*file2
)
1580 /* Acquire mutex's based on pointer comparison to prevent deadlock. */
1581 if (file1
< file2
) {
1582 mutex_lock(&file1
->mut
);
1583 mutex_lock_nested(&file2
->mut
, SINGLE_DEPTH_NESTING
);
1585 mutex_lock(&file2
->mut
);
1586 mutex_lock_nested(&file1
->mut
, SINGLE_DEPTH_NESTING
);
1590 static void ucma_unlock_files(struct ucma_file
*file1
, struct ucma_file
*file2
)
1592 if (file1
< file2
) {
1593 mutex_unlock(&file2
->mut
);
1594 mutex_unlock(&file1
->mut
);
1596 mutex_unlock(&file1
->mut
);
1597 mutex_unlock(&file2
->mut
);
1601 static void ucma_move_events(struct ucma_context
*ctx
, struct ucma_file
*file
)
1603 struct ucma_event
*uevent
, *tmp
;
1605 list_for_each_entry_safe(uevent
, tmp
, &ctx
->file
->event_list
, list
)
1606 if (uevent
->ctx
== ctx
)
1607 list_move_tail(&uevent
->list
, &file
->event_list
);
1610 static ssize_t
ucma_migrate_id(struct ucma_file
*new_file
,
1611 const char __user
*inbuf
,
1612 int in_len
, int out_len
)
1614 struct rdma_ucm_migrate_id cmd
;
1615 struct rdma_ucm_migrate_resp resp
;
1616 struct ucma_context
*ctx
;
1618 struct ucma_file
*cur_file
;
1621 if (copy_from_user(&cmd
, inbuf
, sizeof(cmd
)))
1624 /* Get current fd to protect against it being closed */
1628 if (f
.file
->f_op
!= &ucma_fops
) {
1633 /* Validate current fd and prevent destruction of id. */
1634 ctx
= ucma_get_ctx(f
.file
->private_data
, cmd
.id
);
1640 cur_file
= ctx
->file
;
1641 if (cur_file
== new_file
) {
1642 resp
.events_reported
= ctx
->events_reported
;
1647 * Migrate events between fd's, maintaining order, and avoiding new
1648 * events being added before existing events.
1650 ucma_lock_files(cur_file
, new_file
);
1651 xa_lock(&ctx_table
);
1653 list_move_tail(&ctx
->list
, &new_file
->ctx_list
);
1654 ucma_move_events(ctx
, new_file
);
1655 ctx
->file
= new_file
;
1656 resp
.events_reported
= ctx
->events_reported
;
1658 xa_unlock(&ctx_table
);
1659 ucma_unlock_files(cur_file
, new_file
);
1662 if (copy_to_user(u64_to_user_ptr(cmd
.response
),
1663 &resp
, sizeof(resp
)))
1672 static ssize_t (*ucma_cmd_table
[])(struct ucma_file
*file
,
1673 const char __user
*inbuf
,
1674 int in_len
, int out_len
) = {
1675 [RDMA_USER_CM_CMD_CREATE_ID
] = ucma_create_id
,
1676 [RDMA_USER_CM_CMD_DESTROY_ID
] = ucma_destroy_id
,
1677 [RDMA_USER_CM_CMD_BIND_IP
] = ucma_bind_ip
,
1678 [RDMA_USER_CM_CMD_RESOLVE_IP
] = ucma_resolve_ip
,
1679 [RDMA_USER_CM_CMD_RESOLVE_ROUTE
] = ucma_resolve_route
,
1680 [RDMA_USER_CM_CMD_QUERY_ROUTE
] = ucma_query_route
,
1681 [RDMA_USER_CM_CMD_CONNECT
] = ucma_connect
,
1682 [RDMA_USER_CM_CMD_LISTEN
] = ucma_listen
,
1683 [RDMA_USER_CM_CMD_ACCEPT
] = ucma_accept
,
1684 [RDMA_USER_CM_CMD_REJECT
] = ucma_reject
,
1685 [RDMA_USER_CM_CMD_DISCONNECT
] = ucma_disconnect
,
1686 [RDMA_USER_CM_CMD_INIT_QP_ATTR
] = ucma_init_qp_attr
,
1687 [RDMA_USER_CM_CMD_GET_EVENT
] = ucma_get_event
,
1688 [RDMA_USER_CM_CMD_GET_OPTION
] = NULL
,
1689 [RDMA_USER_CM_CMD_SET_OPTION
] = ucma_set_option
,
1690 [RDMA_USER_CM_CMD_NOTIFY
] = ucma_notify
,
1691 [RDMA_USER_CM_CMD_JOIN_IP_MCAST
] = ucma_join_ip_multicast
,
1692 [RDMA_USER_CM_CMD_LEAVE_MCAST
] = ucma_leave_multicast
,
1693 [RDMA_USER_CM_CMD_MIGRATE_ID
] = ucma_migrate_id
,
1694 [RDMA_USER_CM_CMD_QUERY
] = ucma_query
,
1695 [RDMA_USER_CM_CMD_BIND
] = ucma_bind
,
1696 [RDMA_USER_CM_CMD_RESOLVE_ADDR
] = ucma_resolve_addr
,
1697 [RDMA_USER_CM_CMD_JOIN_MCAST
] = ucma_join_multicast
1700 static ssize_t
ucma_write(struct file
*filp
, const char __user
*buf
,
1701 size_t len
, loff_t
*pos
)
1703 struct ucma_file
*file
= filp
->private_data
;
1704 struct rdma_ucm_cmd_hdr hdr
;
1707 if (!ib_safe_file_access(filp
)) {
1708 pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
1709 task_tgid_vnr(current
), current
->comm
);
1713 if (len
< sizeof(hdr
))
1716 if (copy_from_user(&hdr
, buf
, sizeof(hdr
)))
1719 if (hdr
.cmd
>= ARRAY_SIZE(ucma_cmd_table
))
1721 hdr
.cmd
= array_index_nospec(hdr
.cmd
, ARRAY_SIZE(ucma_cmd_table
));
1723 if (hdr
.in
+ sizeof(hdr
) > len
)
1726 if (!ucma_cmd_table
[hdr
.cmd
])
1729 ret
= ucma_cmd_table
[hdr
.cmd
](file
, buf
+ sizeof(hdr
), hdr
.in
, hdr
.out
);
1736 static __poll_t
ucma_poll(struct file
*filp
, struct poll_table_struct
*wait
)
1738 struct ucma_file
*file
= filp
->private_data
;
1741 poll_wait(filp
, &file
->poll_wait
, wait
);
1743 if (!list_empty(&file
->event_list
))
1744 mask
= EPOLLIN
| EPOLLRDNORM
;
1750 * ucma_open() does not need the BKL:
1752 * - no global state is referred to;
1753 * - there is no ioctl method to race against;
1754 * - no further module initialization is required for open to work
1755 * after the device is registered.
1757 static int ucma_open(struct inode
*inode
, struct file
*filp
)
1759 struct ucma_file
*file
;
1761 file
= kmalloc(sizeof *file
, GFP_KERNEL
);
1765 file
->close_wq
= alloc_ordered_workqueue("ucma_close_id",
1767 if (!file
->close_wq
) {
1772 INIT_LIST_HEAD(&file
->event_list
);
1773 INIT_LIST_HEAD(&file
->ctx_list
);
1774 init_waitqueue_head(&file
->poll_wait
);
1775 mutex_init(&file
->mut
);
1777 filp
->private_data
= file
;
1780 return stream_open(inode
, filp
);
1783 static int ucma_close(struct inode
*inode
, struct file
*filp
)
1785 struct ucma_file
*file
= filp
->private_data
;
1786 struct ucma_context
*ctx
, *tmp
;
1788 mutex_lock(&file
->mut
);
1789 list_for_each_entry_safe(ctx
, tmp
, &file
->ctx_list
, list
) {
1790 ctx
->destroying
= 1;
1791 mutex_unlock(&file
->mut
);
1793 xa_erase(&ctx_table
, ctx
->id
);
1794 flush_workqueue(file
->close_wq
);
1795 /* At that step once ctx was marked as destroying and workqueue
1796 * was flushed we are safe from any inflights handlers that
1797 * might put other closing task.
1799 xa_lock(&ctx_table
);
1800 if (!ctx
->closing
) {
1801 xa_unlock(&ctx_table
);
1803 wait_for_completion(&ctx
->comp
);
1804 /* rdma_destroy_id ensures that no event handlers are
1805 * inflight for that id before releasing it.
1807 rdma_destroy_id(ctx
->cm_id
);
1809 xa_unlock(&ctx_table
);
1813 mutex_lock(&file
->mut
);
1815 mutex_unlock(&file
->mut
);
1816 destroy_workqueue(file
->close_wq
);
1821 static const struct file_operations ucma_fops
= {
1822 .owner
= THIS_MODULE
,
1824 .release
= ucma_close
,
1825 .write
= ucma_write
,
1827 .llseek
= no_llseek
,
1830 static struct miscdevice ucma_misc
= {
1831 .minor
= MISC_DYNAMIC_MINOR
,
1833 .nodename
= "infiniband/rdma_cm",
1838 static int ucma_get_global_nl_info(struct ib_client_nl_info
*res
)
1840 res
->abi
= RDMA_USER_CM_ABI_VERSION
;
1841 res
->cdev
= ucma_misc
.this_device
;
1845 static struct ib_client rdma_cma_client
= {
1847 .get_global_nl_info
= ucma_get_global_nl_info
,
1849 MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
1851 static ssize_t
show_abi_version(struct device
*dev
,
1852 struct device_attribute
*attr
,
1855 return sprintf(buf
, "%d\n", RDMA_USER_CM_ABI_VERSION
);
1857 static DEVICE_ATTR(abi_version
, S_IRUGO
, show_abi_version
, NULL
);
1859 static int __init
ucma_init(void)
1863 ret
= misc_register(&ucma_misc
);
1867 ret
= device_create_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1869 pr_err("rdma_ucm: couldn't create abi_version attr\n");
1873 ucma_ctl_table_hdr
= register_net_sysctl(&init_net
, "net/rdma_ucm", ucma_ctl_table
);
1874 if (!ucma_ctl_table_hdr
) {
1875 pr_err("rdma_ucm: couldn't register sysctl paths\n");
1880 ret
= ib_register_client(&rdma_cma_client
);
1886 unregister_net_sysctl_table(ucma_ctl_table_hdr
);
1888 device_remove_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1890 misc_deregister(&ucma_misc
);
1894 static void __exit
ucma_cleanup(void)
1896 ib_unregister_client(&rdma_cma_client
);
1897 unregister_net_sysctl_table(ucma_ctl_table_hdr
);
1898 device_remove_file(ucma_misc
.this_device
, &dev_attr_abi_version
);
1899 misc_deregister(&ucma_misc
);
1902 module_init(ucma_init
);
1903 module_exit(ucma_cleanup
);