2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/device.h>
39 #include <linux/err.h>
41 #include <linux/poll.h>
42 #include <linux/file.h>
43 #include <linux/mount.h>
45 #include <asm/uaccess.h>
49 MODULE_AUTHOR("Roland Dreier");
50 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
51 MODULE_LICENSE("Dual BSD/GPL");
53 #define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */
56 IB_UVERBS_MAJOR
= 231,
57 IB_UVERBS_BASE_MINOR
= 192,
58 IB_UVERBS_MAX_DEVICES
= 32
61 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
63 DECLARE_MUTEX(ib_uverbs_idr_mutex
);
64 DEFINE_IDR(ib_uverbs_pd_idr
);
65 DEFINE_IDR(ib_uverbs_mr_idr
);
66 DEFINE_IDR(ib_uverbs_mw_idr
);
67 DEFINE_IDR(ib_uverbs_ah_idr
);
68 DEFINE_IDR(ib_uverbs_cq_idr
);
69 DEFINE_IDR(ib_uverbs_qp_idr
);
71 static spinlock_t map_lock
;
72 static DECLARE_BITMAP(dev_map
, IB_UVERBS_MAX_DEVICES
);
74 static ssize_t (*uverbs_cmd_table
[])(struct ib_uverbs_file
*file
,
75 const char __user
*buf
, int in_len
,
77 [IB_USER_VERBS_CMD_QUERY_PARAMS
] = ib_uverbs_query_params
,
78 [IB_USER_VERBS_CMD_GET_CONTEXT
] = ib_uverbs_get_context
,
79 [IB_USER_VERBS_CMD_QUERY_DEVICE
] = ib_uverbs_query_device
,
80 [IB_USER_VERBS_CMD_QUERY_PORT
] = ib_uverbs_query_port
,
81 [IB_USER_VERBS_CMD_QUERY_GID
] = ib_uverbs_query_gid
,
82 [IB_USER_VERBS_CMD_QUERY_PKEY
] = ib_uverbs_query_pkey
,
83 [IB_USER_VERBS_CMD_ALLOC_PD
] = ib_uverbs_alloc_pd
,
84 [IB_USER_VERBS_CMD_DEALLOC_PD
] = ib_uverbs_dealloc_pd
,
85 [IB_USER_VERBS_CMD_REG_MR
] = ib_uverbs_reg_mr
,
86 [IB_USER_VERBS_CMD_DEREG_MR
] = ib_uverbs_dereg_mr
,
87 [IB_USER_VERBS_CMD_CREATE_CQ
] = ib_uverbs_create_cq
,
88 [IB_USER_VERBS_CMD_DESTROY_CQ
] = ib_uverbs_destroy_cq
,
89 [IB_USER_VERBS_CMD_CREATE_QP
] = ib_uverbs_create_qp
,
90 [IB_USER_VERBS_CMD_MODIFY_QP
] = ib_uverbs_modify_qp
,
91 [IB_USER_VERBS_CMD_DESTROY_QP
] = ib_uverbs_destroy_qp
,
92 [IB_USER_VERBS_CMD_ATTACH_MCAST
] = ib_uverbs_attach_mcast
,
93 [IB_USER_VERBS_CMD_DETACH_MCAST
] = ib_uverbs_detach_mcast
,
96 static struct vfsmount
*uverbs_event_mnt
;
98 static void ib_uverbs_add_one(struct ib_device
*device
);
99 static void ib_uverbs_remove_one(struct ib_device
*device
);
101 static int ib_dealloc_ucontext(struct ib_ucontext
*context
)
103 struct ib_uobject
*uobj
, *tmp
;
108 down(&ib_uverbs_idr_mutex
);
112 list_for_each_entry_safe(uobj
, tmp
, &context
->qp_list
, list
) {
113 struct ib_qp
*qp
= idr_find(&ib_uverbs_qp_idr
, uobj
->id
);
114 idr_remove(&ib_uverbs_qp_idr
, uobj
->id
);
116 list_del(&uobj
->list
);
120 list_for_each_entry_safe(uobj
, tmp
, &context
->cq_list
, list
) {
121 struct ib_cq
*cq
= idr_find(&ib_uverbs_cq_idr
, uobj
->id
);
122 idr_remove(&ib_uverbs_cq_idr
, uobj
->id
);
124 list_del(&uobj
->list
);
131 list_for_each_entry_safe(uobj
, tmp
, &context
->mr_list
, list
) {
132 struct ib_mr
*mr
= idr_find(&ib_uverbs_mr_idr
, uobj
->id
);
133 struct ib_umem_object
*memobj
;
135 idr_remove(&ib_uverbs_mr_idr
, uobj
->id
);
138 memobj
= container_of(uobj
, struct ib_umem_object
, uobject
);
139 ib_umem_release_on_close(mr
->device
, &memobj
->umem
);
141 list_del(&uobj
->list
);
145 list_for_each_entry_safe(uobj
, tmp
, &context
->pd_list
, list
) {
146 struct ib_pd
*pd
= idr_find(&ib_uverbs_pd_idr
, uobj
->id
);
147 idr_remove(&ib_uverbs_pd_idr
, uobj
->id
);
149 list_del(&uobj
->list
);
153 up(&ib_uverbs_idr_mutex
);
155 return context
->device
->dealloc_ucontext(context
);
158 static void ib_uverbs_release_file(struct kref
*ref
)
160 struct ib_uverbs_file
*file
=
161 container_of(ref
, struct ib_uverbs_file
, ref
);
163 module_put(file
->device
->ib_dev
->owner
);
167 static ssize_t
ib_uverbs_event_read(struct file
*filp
, char __user
*buf
,
168 size_t count
, loff_t
*pos
)
170 struct ib_uverbs_event_file
*file
= filp
->private_data
;
175 spin_lock_irq(&file
->lock
);
177 while (list_empty(&file
->event_list
) && file
->fd
>= 0) {
178 spin_unlock_irq(&file
->lock
);
180 if (filp
->f_flags
& O_NONBLOCK
)
183 if (wait_event_interruptible(file
->poll_wait
,
184 !list_empty(&file
->event_list
) ||
188 spin_lock_irq(&file
->lock
);
192 spin_unlock_irq(&file
->lock
);
196 if (file
->is_async
) {
197 event
= list_entry(file
->event_list
.next
,
198 struct ib_uverbs_async_event
, list
);
199 eventsz
= sizeof (struct ib_uverbs_async_event_desc
);
201 event
= list_entry(file
->event_list
.next
,
202 struct ib_uverbs_comp_event
, list
);
203 eventsz
= sizeof (struct ib_uverbs_comp_event_desc
);
206 if (eventsz
> count
) {
210 list_del(file
->event_list
.next
);
212 spin_unlock_irq(&file
->lock
);
215 if (copy_to_user(buf
, event
, eventsz
))
226 static unsigned int ib_uverbs_event_poll(struct file
*filp
,
227 struct poll_table_struct
*wait
)
229 unsigned int pollflags
= 0;
230 struct ib_uverbs_event_file
*file
= filp
->private_data
;
232 poll_wait(filp
, &file
->poll_wait
, wait
);
234 spin_lock_irq(&file
->lock
);
237 else if (!list_empty(&file
->event_list
))
238 pollflags
= POLLIN
| POLLRDNORM
;
239 spin_unlock_irq(&file
->lock
);
244 static void ib_uverbs_event_release(struct ib_uverbs_event_file
*file
)
246 struct list_head
*entry
, *tmp
;
248 spin_lock_irq(&file
->lock
);
249 if (file
->fd
!= -1) {
251 list_for_each_safe(entry
, tmp
, &file
->event_list
)
253 kfree(list_entry(entry
, struct ib_uverbs_async_event
, list
));
255 kfree(list_entry(entry
, struct ib_uverbs_comp_event
, list
));
257 spin_unlock_irq(&file
->lock
);
260 static int ib_uverbs_event_fasync(int fd
, struct file
*filp
, int on
)
262 struct ib_uverbs_event_file
*file
= filp
->private_data
;
264 return fasync_helper(fd
, filp
, on
, &file
->async_queue
);
267 static int ib_uverbs_event_close(struct inode
*inode
, struct file
*filp
)
269 struct ib_uverbs_event_file
*file
= filp
->private_data
;
271 ib_uverbs_event_release(file
);
272 ib_uverbs_event_fasync(-1, filp
, 0);
273 kref_put(&file
->uverbs_file
->ref
, ib_uverbs_release_file
);
278 static struct file_operations uverbs_event_fops
= {
280 * No .owner field since we artificially create event files,
281 * so there is no increment to the module reference count in
282 * the open path. All event files come from a uverbs command
283 * file, which already takes a module reference, so this is OK.
285 .read
= ib_uverbs_event_read
,
286 .poll
= ib_uverbs_event_poll
,
287 .release
= ib_uverbs_event_close
,
288 .fasync
= ib_uverbs_event_fasync
291 void ib_uverbs_comp_handler(struct ib_cq
*cq
, void *cq_context
)
293 struct ib_uverbs_file
*file
= cq_context
;
294 struct ib_uverbs_comp_event
*entry
;
297 entry
= kmalloc(sizeof *entry
, GFP_ATOMIC
);
301 entry
->desc
.cq_handle
= cq
->uobject
->user_handle
;
303 spin_lock_irqsave(&file
->comp_file
[0].lock
, flags
);
304 list_add_tail(&entry
->list
, &file
->comp_file
[0].event_list
);
305 spin_unlock_irqrestore(&file
->comp_file
[0].lock
, flags
);
307 wake_up_interruptible(&file
->comp_file
[0].poll_wait
);
308 kill_fasync(&file
->comp_file
[0].async_queue
, SIGIO
, POLL_IN
);
311 static void ib_uverbs_async_handler(struct ib_uverbs_file
*file
,
312 __u64 element
, __u64 event
)
314 struct ib_uverbs_async_event
*entry
;
317 entry
= kmalloc(sizeof *entry
, GFP_ATOMIC
);
321 entry
->desc
.element
= element
;
322 entry
->desc
.event_type
= event
;
324 spin_lock_irqsave(&file
->async_file
.lock
, flags
);
325 list_add_tail(&entry
->list
, &file
->async_file
.event_list
);
326 spin_unlock_irqrestore(&file
->async_file
.lock
, flags
);
328 wake_up_interruptible(&file
->async_file
.poll_wait
);
329 kill_fasync(&file
->async_file
.async_queue
, SIGIO
, POLL_IN
);
332 void ib_uverbs_cq_event_handler(struct ib_event
*event
, void *context_ptr
)
334 ib_uverbs_async_handler(context_ptr
,
335 event
->element
.cq
->uobject
->user_handle
,
339 void ib_uverbs_qp_event_handler(struct ib_event
*event
, void *context_ptr
)
341 ib_uverbs_async_handler(context_ptr
,
342 event
->element
.qp
->uobject
->user_handle
,
346 static void ib_uverbs_event_handler(struct ib_event_handler
*handler
,
347 struct ib_event
*event
)
349 struct ib_uverbs_file
*file
=
350 container_of(handler
, struct ib_uverbs_file
, event_handler
);
352 ib_uverbs_async_handler(file
, event
->element
.port_num
, event
->event
);
355 static int ib_uverbs_event_init(struct ib_uverbs_event_file
*file
,
356 struct ib_uverbs_file
*uverbs_file
)
360 spin_lock_init(&file
->lock
);
361 INIT_LIST_HEAD(&file
->event_list
);
362 init_waitqueue_head(&file
->poll_wait
);
363 file
->uverbs_file
= uverbs_file
;
364 file
->async_queue
= NULL
;
366 file
->fd
= get_unused_fd();
370 filp
= get_empty_filp();
372 put_unused_fd(file
->fd
);
376 filp
->f_op
= &uverbs_event_fops
;
377 filp
->f_vfsmnt
= mntget(uverbs_event_mnt
);
378 filp
->f_dentry
= dget(uverbs_event_mnt
->mnt_root
);
379 filp
->f_mapping
= filp
->f_dentry
->d_inode
->i_mapping
;
380 filp
->f_flags
= O_RDONLY
;
381 filp
->f_mode
= FMODE_READ
;
382 filp
->private_data
= file
;
384 fd_install(file
->fd
, filp
);
389 static ssize_t
ib_uverbs_write(struct file
*filp
, const char __user
*buf
,
390 size_t count
, loff_t
*pos
)
392 struct ib_uverbs_file
*file
= filp
->private_data
;
393 struct ib_uverbs_cmd_hdr hdr
;
395 if (count
< sizeof hdr
)
398 if (copy_from_user(&hdr
, buf
, sizeof hdr
))
401 if (hdr
.in_words
* 4 != count
)
404 if (hdr
.command
< 0 || hdr
.command
>= ARRAY_SIZE(uverbs_cmd_table
))
407 if (!file
->ucontext
&&
408 hdr
.command
!= IB_USER_VERBS_CMD_QUERY_PARAMS
&&
409 hdr
.command
!= IB_USER_VERBS_CMD_GET_CONTEXT
)
412 return uverbs_cmd_table
[hdr
.command
](file
, buf
+ sizeof hdr
,
413 hdr
.in_words
* 4, hdr
.out_words
* 4);
416 static int ib_uverbs_mmap(struct file
*filp
, struct vm_area_struct
*vma
)
418 struct ib_uverbs_file
*file
= filp
->private_data
;
423 return file
->device
->ib_dev
->mmap(file
->ucontext
, vma
);
426 static int ib_uverbs_open(struct inode
*inode
, struct file
*filp
)
428 struct ib_uverbs_device
*dev
=
429 container_of(inode
->i_cdev
, struct ib_uverbs_device
, dev
);
430 struct ib_uverbs_file
*file
;
434 if (!try_module_get(dev
->ib_dev
->owner
))
437 file
= kmalloc(sizeof *file
+
438 (dev
->num_comp
- 1) * sizeof (struct ib_uverbs_event_file
),
444 kref_init(&file
->ref
);
446 file
->ucontext
= NULL
;
448 ret
= ib_uverbs_event_init(&file
->async_file
, file
);
452 file
->async_file
.is_async
= 1;
454 kref_get(&file
->ref
);
456 for (i
= 0; i
< dev
->num_comp
; ++i
) {
457 ret
= ib_uverbs_event_init(&file
->comp_file
[i
], file
);
460 kref_get(&file
->ref
);
461 file
->comp_file
[i
].is_async
= 0;
465 filp
->private_data
= file
;
467 INIT_IB_EVENT_HANDLER(&file
->event_handler
, dev
->ib_dev
,
468 ib_uverbs_event_handler
);
469 if (ib_register_event_handler(&file
->event_handler
))
476 ib_uverbs_event_release(&file
->comp_file
[i
]);
478 ib_uverbs_event_release(&file
->async_file
);
481 kref_put(&file
->ref
, ib_uverbs_release_file
);
486 static int ib_uverbs_close(struct inode
*inode
, struct file
*filp
)
488 struct ib_uverbs_file
*file
= filp
->private_data
;
491 ib_unregister_event_handler(&file
->event_handler
);
492 ib_uverbs_event_release(&file
->async_file
);
493 ib_dealloc_ucontext(file
->ucontext
);
495 for (i
= 0; i
< file
->device
->num_comp
; ++i
)
496 ib_uverbs_event_release(&file
->comp_file
[i
]);
498 kref_put(&file
->ref
, ib_uverbs_release_file
);
503 static struct file_operations uverbs_fops
= {
504 .owner
= THIS_MODULE
,
505 .write
= ib_uverbs_write
,
506 .open
= ib_uverbs_open
,
507 .release
= ib_uverbs_close
510 static struct file_operations uverbs_mmap_fops
= {
511 .owner
= THIS_MODULE
,
512 .write
= ib_uverbs_write
,
513 .mmap
= ib_uverbs_mmap
,
514 .open
= ib_uverbs_open
,
515 .release
= ib_uverbs_close
518 static struct ib_client uverbs_client
= {
520 .add
= ib_uverbs_add_one
,
521 .remove
= ib_uverbs_remove_one
524 static ssize_t
show_ibdev(struct class_device
*class_dev
, char *buf
)
526 struct ib_uverbs_device
*dev
=
527 container_of(class_dev
, struct ib_uverbs_device
, class_dev
);
529 return sprintf(buf
, "%s\n", dev
->ib_dev
->name
);
531 static CLASS_DEVICE_ATTR(ibdev
, S_IRUGO
, show_ibdev
, NULL
);
533 static void ib_uverbs_release_class_dev(struct class_device
*class_dev
)
535 struct ib_uverbs_device
*dev
=
536 container_of(class_dev
, struct ib_uverbs_device
, class_dev
);
539 clear_bit(dev
->devnum
, dev_map
);
543 static struct class uverbs_class
= {
544 .name
= "infiniband_verbs",
545 .release
= ib_uverbs_release_class_dev
548 static ssize_t
show_abi_version(struct class *class, char *buf
)
550 return sprintf(buf
, "%d\n", IB_USER_VERBS_ABI_VERSION
);
552 static CLASS_ATTR(abi_version
, S_IRUGO
, show_abi_version
, NULL
);
554 static void ib_uverbs_add_one(struct ib_device
*device
)
556 struct ib_uverbs_device
*uverbs_dev
;
558 if (!device
->alloc_ucontext
)
561 uverbs_dev
= kmalloc(sizeof *uverbs_dev
, GFP_KERNEL
);
565 memset(uverbs_dev
, 0, sizeof *uverbs_dev
);
567 spin_lock(&map_lock
);
568 uverbs_dev
->devnum
= find_first_zero_bit(dev_map
, IB_UVERBS_MAX_DEVICES
);
569 if (uverbs_dev
->devnum
>= IB_UVERBS_MAX_DEVICES
) {
570 spin_unlock(&map_lock
);
573 set_bit(uverbs_dev
->devnum
, dev_map
);
574 spin_unlock(&map_lock
);
576 uverbs_dev
->ib_dev
= device
;
577 uverbs_dev
->num_comp
= 1;
580 cdev_init(&uverbs_dev
->dev
, &uverbs_mmap_fops
);
582 cdev_init(&uverbs_dev
->dev
, &uverbs_fops
);
583 uverbs_dev
->dev
.owner
= THIS_MODULE
;
584 kobject_set_name(&uverbs_dev
->dev
.kobj
, "uverbs%d", uverbs_dev
->devnum
);
585 if (cdev_add(&uverbs_dev
->dev
, IB_UVERBS_BASE_DEV
+ uverbs_dev
->devnum
, 1))
588 uverbs_dev
->class_dev
.class = &uverbs_class
;
589 uverbs_dev
->class_dev
.dev
= device
->dma_device
;
590 uverbs_dev
->class_dev
.devt
= uverbs_dev
->dev
.dev
;
591 snprintf(uverbs_dev
->class_dev
.class_id
, BUS_ID_SIZE
, "uverbs%d", uverbs_dev
->devnum
);
592 if (class_device_register(&uverbs_dev
->class_dev
))
595 if (class_device_create_file(&uverbs_dev
->class_dev
, &class_device_attr_ibdev
))
598 ib_set_client_data(device
, &uverbs_client
, uverbs_dev
);
603 class_device_unregister(&uverbs_dev
->class_dev
);
606 cdev_del(&uverbs_dev
->dev
);
607 clear_bit(uverbs_dev
->devnum
, dev_map
);
614 static void ib_uverbs_remove_one(struct ib_device
*device
)
616 struct ib_uverbs_device
*uverbs_dev
= ib_get_client_data(device
, &uverbs_client
);
621 class_device_unregister(&uverbs_dev
->class_dev
);
624 static struct super_block
*uverbs_event_get_sb(struct file_system_type
*fs_type
, int flags
,
625 const char *dev_name
, void *data
)
627 return get_sb_pseudo(fs_type
, "infinibandevent:", NULL
,
628 INFINIBANDEVENTFS_MAGIC
);
631 static struct file_system_type uverbs_event_fs
= {
632 /* No owner field so module can be unloaded */
633 .name
= "infinibandeventfs",
634 .get_sb
= uverbs_event_get_sb
,
635 .kill_sb
= kill_litter_super
638 static int __init
ib_uverbs_init(void)
642 spin_lock_init(&map_lock
);
644 ret
= register_chrdev_region(IB_UVERBS_BASE_DEV
, IB_UVERBS_MAX_DEVICES
,
647 printk(KERN_ERR
"user_verbs: couldn't register device number\n");
651 ret
= class_register(&uverbs_class
);
653 printk(KERN_ERR
"user_verbs: couldn't create class infiniband_verbs\n");
657 ret
= class_create_file(&uverbs_class
, &class_attr_abi_version
);
659 printk(KERN_ERR
"user_verbs: couldn't create abi_version attribute\n");
663 ret
= register_filesystem(&uverbs_event_fs
);
665 printk(KERN_ERR
"user_verbs: couldn't register infinibandeventfs\n");
669 uverbs_event_mnt
= kern_mount(&uverbs_event_fs
);
670 if (IS_ERR(uverbs_event_mnt
)) {
671 ret
= PTR_ERR(uverbs_event_mnt
);
672 printk(KERN_ERR
"user_verbs: couldn't mount infinibandeventfs\n");
676 ret
= ib_register_client(&uverbs_client
);
678 printk(KERN_ERR
"user_verbs: couldn't register client\n");
685 mntput(uverbs_event_mnt
);
688 unregister_filesystem(&uverbs_event_fs
);
691 class_unregister(&uverbs_class
);
694 unregister_chrdev_region(IB_UVERBS_BASE_DEV
, IB_UVERBS_MAX_DEVICES
);
700 static void __exit
ib_uverbs_cleanup(void)
702 ib_unregister_client(&uverbs_client
);
703 mntput(uverbs_event_mnt
);
704 unregister_filesystem(&uverbs_event_fs
);
705 class_unregister(&uverbs_class
);
706 unregister_chrdev_region(IB_UVERBS_BASE_DEV
, IB_UVERBS_MAX_DEVICES
);
709 module_init(ib_uverbs_init
);
710 module_exit(ib_uverbs_cleanup
);