Linux 4.2.1
[linux/fpc-iii.git] / drivers / infiniband / core / uverbs_main.c
blobf6eef2da7097980b7066c62f1746d7722064f8f3
1 /*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
37 #include <linux/module.h>
38 #include <linux/init.h>
39 #include <linux/device.h>
40 #include <linux/err.h>
41 #include <linux/fs.h>
42 #include <linux/poll.h>
43 #include <linux/sched.h>
44 #include <linux/file.h>
45 #include <linux/cdev.h>
46 #include <linux/anon_inodes.h>
47 #include <linux/slab.h>
49 #include <asm/uaccess.h>
51 #include "uverbs.h"
53 MODULE_AUTHOR("Roland Dreier");
54 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
55 MODULE_LICENSE("Dual BSD/GPL");
57 enum {
58 IB_UVERBS_MAJOR = 231,
59 IB_UVERBS_BASE_MINOR = 192,
60 IB_UVERBS_MAX_DEVICES = 32
63 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
65 static struct class *uverbs_class;
67 DEFINE_SPINLOCK(ib_uverbs_idr_lock);
68 DEFINE_IDR(ib_uverbs_pd_idr);
69 DEFINE_IDR(ib_uverbs_mr_idr);
70 DEFINE_IDR(ib_uverbs_mw_idr);
71 DEFINE_IDR(ib_uverbs_ah_idr);
72 DEFINE_IDR(ib_uverbs_cq_idr);
73 DEFINE_IDR(ib_uverbs_qp_idr);
74 DEFINE_IDR(ib_uverbs_srq_idr);
75 DEFINE_IDR(ib_uverbs_xrcd_idr);
76 DEFINE_IDR(ib_uverbs_rule_idr);
78 static DEFINE_SPINLOCK(map_lock);
79 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
81 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
82 const char __user *buf, int in_len,
83 int out_len) = {
84 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
85 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
86 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
87 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
88 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
89 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
90 [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
91 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
92 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
93 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
94 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
95 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
96 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
97 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
98 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
99 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
100 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
101 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
102 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
103 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
104 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
105 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
106 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
107 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
108 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
109 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
110 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
111 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
112 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
113 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
114 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
115 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
116 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
117 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
118 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
121 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
122 struct ib_udata *ucore,
123 struct ib_udata *uhw) = {
124 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
125 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
126 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
127 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
130 static void ib_uverbs_add_one(struct ib_device *device);
131 static void ib_uverbs_remove_one(struct ib_device *device);
133 static void ib_uverbs_release_dev(struct kref *ref)
135 struct ib_uverbs_device *dev =
136 container_of(ref, struct ib_uverbs_device, ref);
138 complete(&dev->comp);
141 static void ib_uverbs_release_event_file(struct kref *ref)
143 struct ib_uverbs_event_file *file =
144 container_of(ref, struct ib_uverbs_event_file, ref);
146 kfree(file);
149 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
150 struct ib_uverbs_event_file *ev_file,
151 struct ib_ucq_object *uobj)
153 struct ib_uverbs_event *evt, *tmp;
155 if (ev_file) {
156 spin_lock_irq(&ev_file->lock);
157 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
158 list_del(&evt->list);
159 kfree(evt);
161 spin_unlock_irq(&ev_file->lock);
163 kref_put(&ev_file->ref, ib_uverbs_release_event_file);
166 spin_lock_irq(&file->async_file->lock);
167 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
168 list_del(&evt->list);
169 kfree(evt);
171 spin_unlock_irq(&file->async_file->lock);
174 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
175 struct ib_uevent_object *uobj)
177 struct ib_uverbs_event *evt, *tmp;
179 spin_lock_irq(&file->async_file->lock);
180 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
181 list_del(&evt->list);
182 kfree(evt);
184 spin_unlock_irq(&file->async_file->lock);
187 static void ib_uverbs_detach_umcast(struct ib_qp *qp,
188 struct ib_uqp_object *uobj)
190 struct ib_uverbs_mcast_entry *mcast, *tmp;
192 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
193 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
194 list_del(&mcast->list);
195 kfree(mcast);
199 static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
200 struct ib_ucontext *context)
202 struct ib_uobject *uobj, *tmp;
204 if (!context)
205 return 0;
207 context->closing = 1;
209 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
210 struct ib_ah *ah = uobj->object;
212 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
213 ib_destroy_ah(ah);
214 kfree(uobj);
217 /* Remove MWs before QPs, in order to support type 2A MWs. */
218 list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
219 struct ib_mw *mw = uobj->object;
221 idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
222 ib_dealloc_mw(mw);
223 kfree(uobj);
226 list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
227 struct ib_flow *flow_id = uobj->object;
229 idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
230 ib_destroy_flow(flow_id);
231 kfree(uobj);
234 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
235 struct ib_qp *qp = uobj->object;
236 struct ib_uqp_object *uqp =
237 container_of(uobj, struct ib_uqp_object, uevent.uobject);
239 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
240 if (qp != qp->real_qp) {
241 ib_close_qp(qp);
242 } else {
243 ib_uverbs_detach_umcast(qp, uqp);
244 ib_destroy_qp(qp);
246 ib_uverbs_release_uevent(file, &uqp->uevent);
247 kfree(uqp);
250 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
251 struct ib_srq *srq = uobj->object;
252 struct ib_uevent_object *uevent =
253 container_of(uobj, struct ib_uevent_object, uobject);
255 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
256 ib_destroy_srq(srq);
257 ib_uverbs_release_uevent(file, uevent);
258 kfree(uevent);
261 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
262 struct ib_cq *cq = uobj->object;
263 struct ib_uverbs_event_file *ev_file = cq->cq_context;
264 struct ib_ucq_object *ucq =
265 container_of(uobj, struct ib_ucq_object, uobject);
267 idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
268 ib_destroy_cq(cq);
269 ib_uverbs_release_ucq(file, ev_file, ucq);
270 kfree(ucq);
273 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
274 struct ib_mr *mr = uobj->object;
276 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
277 ib_dereg_mr(mr);
278 kfree(uobj);
281 mutex_lock(&file->device->xrcd_tree_mutex);
282 list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
283 struct ib_xrcd *xrcd = uobj->object;
284 struct ib_uxrcd_object *uxrcd =
285 container_of(uobj, struct ib_uxrcd_object, uobject);
287 idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
288 ib_uverbs_dealloc_xrcd(file->device, xrcd);
289 kfree(uxrcd);
291 mutex_unlock(&file->device->xrcd_tree_mutex);
293 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
294 struct ib_pd *pd = uobj->object;
296 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
297 ib_dealloc_pd(pd);
298 kfree(uobj);
301 put_pid(context->tgid);
303 return context->device->dealloc_ucontext(context);
306 static void ib_uverbs_release_file(struct kref *ref)
308 struct ib_uverbs_file *file =
309 container_of(ref, struct ib_uverbs_file, ref);
311 module_put(file->device->ib_dev->owner);
312 kref_put(&file->device->ref, ib_uverbs_release_dev);
314 kfree(file);
317 static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
318 size_t count, loff_t *pos)
320 struct ib_uverbs_event_file *file = filp->private_data;
321 struct ib_uverbs_event *event;
322 int eventsz;
323 int ret = 0;
325 spin_lock_irq(&file->lock);
327 while (list_empty(&file->event_list)) {
328 spin_unlock_irq(&file->lock);
330 if (filp->f_flags & O_NONBLOCK)
331 return -EAGAIN;
333 if (wait_event_interruptible(file->poll_wait,
334 !list_empty(&file->event_list)))
335 return -ERESTARTSYS;
337 spin_lock_irq(&file->lock);
340 event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
342 if (file->is_async)
343 eventsz = sizeof (struct ib_uverbs_async_event_desc);
344 else
345 eventsz = sizeof (struct ib_uverbs_comp_event_desc);
347 if (eventsz > count) {
348 ret = -EINVAL;
349 event = NULL;
350 } else {
351 list_del(file->event_list.next);
352 if (event->counter) {
353 ++(*event->counter);
354 list_del(&event->obj_list);
358 spin_unlock_irq(&file->lock);
360 if (event) {
361 if (copy_to_user(buf, event, eventsz))
362 ret = -EFAULT;
363 else
364 ret = eventsz;
367 kfree(event);
369 return ret;
372 static unsigned int ib_uverbs_event_poll(struct file *filp,
373 struct poll_table_struct *wait)
375 unsigned int pollflags = 0;
376 struct ib_uverbs_event_file *file = filp->private_data;
378 poll_wait(filp, &file->poll_wait, wait);
380 spin_lock_irq(&file->lock);
381 if (!list_empty(&file->event_list))
382 pollflags = POLLIN | POLLRDNORM;
383 spin_unlock_irq(&file->lock);
385 return pollflags;
388 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
390 struct ib_uverbs_event_file *file = filp->private_data;
392 return fasync_helper(fd, filp, on, &file->async_queue);
395 static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
397 struct ib_uverbs_event_file *file = filp->private_data;
398 struct ib_uverbs_event *entry, *tmp;
400 spin_lock_irq(&file->lock);
401 file->is_closed = 1;
402 list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
403 if (entry->counter)
404 list_del(&entry->obj_list);
405 kfree(entry);
407 spin_unlock_irq(&file->lock);
409 if (file->is_async) {
410 ib_unregister_event_handler(&file->uverbs_file->event_handler);
411 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
413 kref_put(&file->ref, ib_uverbs_release_event_file);
415 return 0;
418 static const struct file_operations uverbs_event_fops = {
419 .owner = THIS_MODULE,
420 .read = ib_uverbs_event_read,
421 .poll = ib_uverbs_event_poll,
422 .release = ib_uverbs_event_close,
423 .fasync = ib_uverbs_event_fasync,
424 .llseek = no_llseek,
427 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
429 struct ib_uverbs_event_file *file = cq_context;
430 struct ib_ucq_object *uobj;
431 struct ib_uverbs_event *entry;
432 unsigned long flags;
434 if (!file)
435 return;
437 spin_lock_irqsave(&file->lock, flags);
438 if (file->is_closed) {
439 spin_unlock_irqrestore(&file->lock, flags);
440 return;
443 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
444 if (!entry) {
445 spin_unlock_irqrestore(&file->lock, flags);
446 return;
449 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
451 entry->desc.comp.cq_handle = cq->uobject->user_handle;
452 entry->counter = &uobj->comp_events_reported;
454 list_add_tail(&entry->list, &file->event_list);
455 list_add_tail(&entry->obj_list, &uobj->comp_list);
456 spin_unlock_irqrestore(&file->lock, flags);
458 wake_up_interruptible(&file->poll_wait);
459 kill_fasync(&file->async_queue, SIGIO, POLL_IN);
462 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
463 __u64 element, __u64 event,
464 struct list_head *obj_list,
465 u32 *counter)
467 struct ib_uverbs_event *entry;
468 unsigned long flags;
470 spin_lock_irqsave(&file->async_file->lock, flags);
471 if (file->async_file->is_closed) {
472 spin_unlock_irqrestore(&file->async_file->lock, flags);
473 return;
476 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
477 if (!entry) {
478 spin_unlock_irqrestore(&file->async_file->lock, flags);
479 return;
482 entry->desc.async.element = element;
483 entry->desc.async.event_type = event;
484 entry->desc.async.reserved = 0;
485 entry->counter = counter;
487 list_add_tail(&entry->list, &file->async_file->event_list);
488 if (obj_list)
489 list_add_tail(&entry->obj_list, obj_list);
490 spin_unlock_irqrestore(&file->async_file->lock, flags);
492 wake_up_interruptible(&file->async_file->poll_wait);
493 kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
496 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
498 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
499 struct ib_ucq_object, uobject);
501 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
502 event->event, &uobj->async_list,
503 &uobj->async_events_reported);
506 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
508 struct ib_uevent_object *uobj;
510 /* for XRC target qp's, check that qp is live */
511 if (!event->element.qp->uobject || !event->element.qp->uobject->live)
512 return;
514 uobj = container_of(event->element.qp->uobject,
515 struct ib_uevent_object, uobject);
517 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
518 event->event, &uobj->event_list,
519 &uobj->events_reported);
522 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
524 struct ib_uevent_object *uobj;
526 uobj = container_of(event->element.srq->uobject,
527 struct ib_uevent_object, uobject);
529 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
530 event->event, &uobj->event_list,
531 &uobj->events_reported);
534 void ib_uverbs_event_handler(struct ib_event_handler *handler,
535 struct ib_event *event)
537 struct ib_uverbs_file *file =
538 container_of(handler, struct ib_uverbs_file, event_handler);
540 ib_uverbs_async_handler(file, event->element.port_num, event->event,
541 NULL, NULL);
544 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
545 int is_async)
547 struct ib_uverbs_event_file *ev_file;
548 struct file *filp;
550 ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
551 if (!ev_file)
552 return ERR_PTR(-ENOMEM);
554 kref_init(&ev_file->ref);
555 spin_lock_init(&ev_file->lock);
556 INIT_LIST_HEAD(&ev_file->event_list);
557 init_waitqueue_head(&ev_file->poll_wait);
558 ev_file->uverbs_file = uverbs_file;
559 ev_file->async_queue = NULL;
560 ev_file->is_async = is_async;
561 ev_file->is_closed = 0;
563 filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
564 ev_file, O_RDONLY);
565 if (IS_ERR(filp))
566 kfree(ev_file);
568 return filp;
572 * Look up a completion event file by FD. If lookup is successful,
573 * takes a ref to the event file struct that it returns; if
574 * unsuccessful, returns NULL.
576 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
578 struct ib_uverbs_event_file *ev_file = NULL;
579 struct fd f = fdget(fd);
581 if (!f.file)
582 return NULL;
584 if (f.file->f_op != &uverbs_event_fops)
585 goto out;
587 ev_file = f.file->private_data;
588 if (ev_file->is_async) {
589 ev_file = NULL;
590 goto out;
593 kref_get(&ev_file->ref);
595 out:
596 fdput(f);
597 return ev_file;
600 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
601 size_t count, loff_t *pos)
603 struct ib_uverbs_file *file = filp->private_data;
604 struct ib_uverbs_cmd_hdr hdr;
605 __u32 flags;
607 if (count < sizeof hdr)
608 return -EINVAL;
610 if (copy_from_user(&hdr, buf, sizeof hdr))
611 return -EFAULT;
613 flags = (hdr.command &
614 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
616 if (!flags) {
617 __u32 command;
619 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
620 IB_USER_VERBS_CMD_COMMAND_MASK))
621 return -EINVAL;
623 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
625 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
626 !uverbs_cmd_table[command])
627 return -EINVAL;
629 if (!file->ucontext &&
630 command != IB_USER_VERBS_CMD_GET_CONTEXT)
631 return -EINVAL;
633 if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
634 return -ENOSYS;
636 if (hdr.in_words * 4 != count)
637 return -EINVAL;
639 return uverbs_cmd_table[command](file,
640 buf + sizeof(hdr),
641 hdr.in_words * 4,
642 hdr.out_words * 4);
644 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
645 __u32 command;
647 struct ib_uverbs_ex_cmd_hdr ex_hdr;
648 struct ib_udata ucore;
649 struct ib_udata uhw;
650 int err;
651 size_t written_count = count;
653 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
654 IB_USER_VERBS_CMD_COMMAND_MASK))
655 return -EINVAL;
657 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
659 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
660 !uverbs_ex_cmd_table[command])
661 return -ENOSYS;
663 if (!file->ucontext)
664 return -EINVAL;
666 if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
667 return -ENOSYS;
669 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
670 return -EINVAL;
672 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
673 return -EFAULT;
675 count -= sizeof(hdr) + sizeof(ex_hdr);
676 buf += sizeof(hdr) + sizeof(ex_hdr);
678 if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
679 return -EINVAL;
681 if (ex_hdr.cmd_hdr_reserved)
682 return -EINVAL;
684 if (ex_hdr.response) {
685 if (!hdr.out_words && !ex_hdr.provider_out_words)
686 return -EINVAL;
688 if (!access_ok(VERIFY_WRITE,
689 (void __user *) (unsigned long) ex_hdr.response,
690 (hdr.out_words + ex_hdr.provider_out_words) * 8))
691 return -EFAULT;
692 } else {
693 if (hdr.out_words || ex_hdr.provider_out_words)
694 return -EINVAL;
697 INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
698 hdr.in_words * 8, hdr.out_words * 8);
700 INIT_UDATA_BUF_OR_NULL(&uhw,
701 buf + ucore.inlen,
702 (unsigned long) ex_hdr.response + ucore.outlen,
703 ex_hdr.provider_in_words * 8,
704 ex_hdr.provider_out_words * 8);
706 err = uverbs_ex_cmd_table[command](file,
707 &ucore,
708 &uhw);
710 if (err)
711 return err;
713 return written_count;
716 return -ENOSYS;
719 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
721 struct ib_uverbs_file *file = filp->private_data;
723 if (!file->ucontext)
724 return -ENODEV;
725 else
726 return file->device->ib_dev->mmap(file->ucontext, vma);
730 * ib_uverbs_open() does not need the BKL:
732 * - the ib_uverbs_device structures are properly reference counted and
733 * everything else is purely local to the file being created, so
734 * races against other open calls are not a problem;
735 * - there is no ioctl method to race against;
736 * - the open method will either immediately run -ENXIO, or all
737 * required initialization will be done.
739 static int ib_uverbs_open(struct inode *inode, struct file *filp)
741 struct ib_uverbs_device *dev;
742 struct ib_uverbs_file *file;
743 int ret;
745 dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
746 if (dev)
747 kref_get(&dev->ref);
748 else
749 return -ENXIO;
751 if (!try_module_get(dev->ib_dev->owner)) {
752 ret = -ENODEV;
753 goto err;
756 file = kmalloc(sizeof *file, GFP_KERNEL);
757 if (!file) {
758 ret = -ENOMEM;
759 goto err_module;
762 file->device = dev;
763 file->ucontext = NULL;
764 file->async_file = NULL;
765 kref_init(&file->ref);
766 mutex_init(&file->mutex);
768 filp->private_data = file;
770 return nonseekable_open(inode, filp);
772 err_module:
773 module_put(dev->ib_dev->owner);
775 err:
776 kref_put(&dev->ref, ib_uverbs_release_dev);
777 return ret;
780 static int ib_uverbs_close(struct inode *inode, struct file *filp)
782 struct ib_uverbs_file *file = filp->private_data;
784 ib_uverbs_cleanup_ucontext(file, file->ucontext);
786 if (file->async_file)
787 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
789 kref_put(&file->ref, ib_uverbs_release_file);
791 return 0;
794 static const struct file_operations uverbs_fops = {
795 .owner = THIS_MODULE,
796 .write = ib_uverbs_write,
797 .open = ib_uverbs_open,
798 .release = ib_uverbs_close,
799 .llseek = no_llseek,
802 static const struct file_operations uverbs_mmap_fops = {
803 .owner = THIS_MODULE,
804 .write = ib_uverbs_write,
805 .mmap = ib_uverbs_mmap,
806 .open = ib_uverbs_open,
807 .release = ib_uverbs_close,
808 .llseek = no_llseek,
811 static struct ib_client uverbs_client = {
812 .name = "uverbs",
813 .add = ib_uverbs_add_one,
814 .remove = ib_uverbs_remove_one
817 static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
818 char *buf)
820 struct ib_uverbs_device *dev = dev_get_drvdata(device);
822 if (!dev)
823 return -ENODEV;
825 return sprintf(buf, "%s\n", dev->ib_dev->name);
827 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
829 static ssize_t show_dev_abi_version(struct device *device,
830 struct device_attribute *attr, char *buf)
832 struct ib_uverbs_device *dev = dev_get_drvdata(device);
834 if (!dev)
835 return -ENODEV;
837 return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
839 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
841 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
842 __stringify(IB_USER_VERBS_ABI_VERSION));
844 static dev_t overflow_maj;
845 static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
848 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
849 * requesting a new major number and doubling the number of max devices we
850 * support. It's stupid, but simple.
852 static int find_overflow_devnum(void)
854 int ret;
856 if (!overflow_maj) {
857 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
858 "infiniband_verbs");
859 if (ret) {
860 printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
861 return ret;
865 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
866 if (ret >= IB_UVERBS_MAX_DEVICES)
867 return -1;
869 return ret;
872 static void ib_uverbs_add_one(struct ib_device *device)
874 int devnum;
875 dev_t base;
876 struct ib_uverbs_device *uverbs_dev;
878 if (!device->alloc_ucontext)
879 return;
881 uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
882 if (!uverbs_dev)
883 return;
885 kref_init(&uverbs_dev->ref);
886 init_completion(&uverbs_dev->comp);
887 uverbs_dev->xrcd_tree = RB_ROOT;
888 mutex_init(&uverbs_dev->xrcd_tree_mutex);
890 spin_lock(&map_lock);
891 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
892 if (devnum >= IB_UVERBS_MAX_DEVICES) {
893 spin_unlock(&map_lock);
894 devnum = find_overflow_devnum();
895 if (devnum < 0)
896 goto err;
898 spin_lock(&map_lock);
899 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
900 base = devnum + overflow_maj;
901 set_bit(devnum, overflow_map);
902 } else {
903 uverbs_dev->devnum = devnum;
904 base = devnum + IB_UVERBS_BASE_DEV;
905 set_bit(devnum, dev_map);
907 spin_unlock(&map_lock);
909 uverbs_dev->ib_dev = device;
910 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
912 cdev_init(&uverbs_dev->cdev, NULL);
913 uverbs_dev->cdev.owner = THIS_MODULE;
914 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
915 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
916 if (cdev_add(&uverbs_dev->cdev, base, 1))
917 goto err_cdev;
919 uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
920 uverbs_dev->cdev.dev, uverbs_dev,
921 "uverbs%d", uverbs_dev->devnum);
922 if (IS_ERR(uverbs_dev->dev))
923 goto err_cdev;
925 if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
926 goto err_class;
927 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
928 goto err_class;
930 ib_set_client_data(device, &uverbs_client, uverbs_dev);
932 return;
934 err_class:
935 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
937 err_cdev:
938 cdev_del(&uverbs_dev->cdev);
939 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
940 clear_bit(devnum, dev_map);
941 else
942 clear_bit(devnum, overflow_map);
944 err:
945 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
946 wait_for_completion(&uverbs_dev->comp);
947 kfree(uverbs_dev);
948 return;
951 static void ib_uverbs_remove_one(struct ib_device *device)
953 struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
955 if (!uverbs_dev)
956 return;
958 dev_set_drvdata(uverbs_dev->dev, NULL);
959 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
960 cdev_del(&uverbs_dev->cdev);
962 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
963 clear_bit(uverbs_dev->devnum, dev_map);
964 else
965 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
967 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
968 wait_for_completion(&uverbs_dev->comp);
969 kfree(uverbs_dev);
972 static char *uverbs_devnode(struct device *dev, umode_t *mode)
974 if (mode)
975 *mode = 0666;
976 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
979 static int __init ib_uverbs_init(void)
981 int ret;
983 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
984 "infiniband_verbs");
985 if (ret) {
986 printk(KERN_ERR "user_verbs: couldn't register device number\n");
987 goto out;
990 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
991 if (IS_ERR(uverbs_class)) {
992 ret = PTR_ERR(uverbs_class);
993 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
994 goto out_chrdev;
997 uverbs_class->devnode = uverbs_devnode;
999 ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1000 if (ret) {
1001 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
1002 goto out_class;
1005 ret = ib_register_client(&uverbs_client);
1006 if (ret) {
1007 printk(KERN_ERR "user_verbs: couldn't register client\n");
1008 goto out_class;
1011 return 0;
1013 out_class:
1014 class_destroy(uverbs_class);
1016 out_chrdev:
1017 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1019 out:
1020 return ret;
1023 static void __exit ib_uverbs_cleanup(void)
1025 ib_unregister_client(&uverbs_client);
1026 class_destroy(uverbs_class);
1027 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1028 if (overflow_maj)
1029 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1030 idr_destroy(&ib_uverbs_pd_idr);
1031 idr_destroy(&ib_uverbs_mr_idr);
1032 idr_destroy(&ib_uverbs_mw_idr);
1033 idr_destroy(&ib_uverbs_ah_idr);
1034 idr_destroy(&ib_uverbs_cq_idr);
1035 idr_destroy(&ib_uverbs_qp_idr);
1036 idr_destroy(&ib_uverbs_srq_idr);
1039 module_init(ib_uverbs_init);
1040 module_exit(ib_uverbs_cleanup);