1 #include <linux/module.h>
3 #include <linux/moduleparam.h>
4 #include <linux/sched.h>
6 #include <linux/blkdev.h>
7 #include <linux/init.h>
8 #include <linux/slab.h>
9 #include <linux/blk-mq.h>
10 #include <linux/hrtimer.h>
13 struct list_head list
;
14 struct llist_node ll_list
;
15 struct call_single_data csd
;
19 struct nullb_queue
*nq
;
23 unsigned long *tag_map
;
24 wait_queue_head_t wait
;
25 unsigned int queue_depth
;
27 struct nullb_cmd
*cmds
;
31 struct list_head list
;
33 struct request_queue
*q
;
35 struct blk_mq_tag_set tag_set
;
37 unsigned int queue_depth
;
40 struct nullb_queue
*queues
;
41 unsigned int nr_queues
;
44 static LIST_HEAD(nullb_list
);
45 static struct mutex lock
;
46 static int null_major
;
47 static int nullb_indexes
;
49 struct completion_queue
{
50 struct llist_head list
;
55 * These are per-cpu for now, they will need to be configured by the
56 * complete_queues parameter and appropriately mapped.
58 static DEFINE_PER_CPU(struct completion_queue
, completion_queues
);
72 static int submit_queues
;
73 module_param(submit_queues
, int, S_IRUGO
);
74 MODULE_PARM_DESC(submit_queues
, "Number of submission queues");
76 static int home_node
= NUMA_NO_NODE
;
77 module_param(home_node
, int, S_IRUGO
);
78 MODULE_PARM_DESC(home_node
, "Home node for the device");
80 static int queue_mode
= NULL_Q_MQ
;
82 static int null_param_store_val(const char *str
, int *val
, int min
, int max
)
86 ret
= kstrtoint(str
, 10, &new_val
);
90 if (new_val
< min
|| new_val
> max
)
97 static int null_set_queue_mode(const char *str
, const struct kernel_param
*kp
)
99 return null_param_store_val(str
, &queue_mode
, NULL_Q_BIO
, NULL_Q_MQ
);
102 static const struct kernel_param_ops null_queue_mode_param_ops
= {
103 .set
= null_set_queue_mode
,
104 .get
= param_get_int
,
107 device_param_cb(queue_mode
, &null_queue_mode_param_ops
, &queue_mode
, S_IRUGO
);
108 MODULE_PARM_DESC(queue_mode
, "Block interface to use (0=bio,1=rq,2=multiqueue)");
111 module_param(gb
, int, S_IRUGO
);
112 MODULE_PARM_DESC(gb
, "Size in GB");
115 module_param(bs
, int, S_IRUGO
);
116 MODULE_PARM_DESC(bs
, "Block size (in bytes)");
118 static int nr_devices
= 2;
119 module_param(nr_devices
, int, S_IRUGO
);
120 MODULE_PARM_DESC(nr_devices
, "Number of devices to register");
122 static int irqmode
= NULL_IRQ_SOFTIRQ
;
124 static int null_set_irqmode(const char *str
, const struct kernel_param
*kp
)
126 return null_param_store_val(str
, &irqmode
, NULL_IRQ_NONE
,
130 static const struct kernel_param_ops null_irqmode_param_ops
= {
131 .set
= null_set_irqmode
,
132 .get
= param_get_int
,
135 device_param_cb(irqmode
, &null_irqmode_param_ops
, &irqmode
, S_IRUGO
);
136 MODULE_PARM_DESC(irqmode
, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
138 static int completion_nsec
= 10000;
139 module_param(completion_nsec
, int, S_IRUGO
);
140 MODULE_PARM_DESC(completion_nsec
, "Time in ns to complete a request in hardware. Default: 10,000ns");
142 static int hw_queue_depth
= 64;
143 module_param(hw_queue_depth
, int, S_IRUGO
);
144 MODULE_PARM_DESC(hw_queue_depth
, "Queue depth for each hardware queue. Default: 64");
146 static bool use_per_node_hctx
= false;
147 module_param(use_per_node_hctx
, bool, S_IRUGO
);
148 MODULE_PARM_DESC(use_per_node_hctx
, "Use per-node allocation for hardware context queues. Default: false");
150 static void put_tag(struct nullb_queue
*nq
, unsigned int tag
)
152 clear_bit_unlock(tag
, nq
->tag_map
);
154 if (waitqueue_active(&nq
->wait
))
158 static unsigned int get_tag(struct nullb_queue
*nq
)
163 tag
= find_first_zero_bit(nq
->tag_map
, nq
->queue_depth
);
164 if (tag
>= nq
->queue_depth
)
166 } while (test_and_set_bit_lock(tag
, nq
->tag_map
));
171 static void free_cmd(struct nullb_cmd
*cmd
)
173 put_tag(cmd
->nq
, cmd
->tag
);
176 static struct nullb_cmd
*__alloc_cmd(struct nullb_queue
*nq
)
178 struct nullb_cmd
*cmd
;
183 cmd
= &nq
->cmds
[tag
];
192 static struct nullb_cmd
*alloc_cmd(struct nullb_queue
*nq
, int can_wait
)
194 struct nullb_cmd
*cmd
;
197 cmd
= __alloc_cmd(nq
);
198 if (cmd
|| !can_wait
)
202 prepare_to_wait(&nq
->wait
, &wait
, TASK_UNINTERRUPTIBLE
);
203 cmd
= __alloc_cmd(nq
);
210 finish_wait(&nq
->wait
, &wait
);
214 static void end_cmd(struct nullb_cmd
*cmd
)
216 switch (queue_mode
) {
218 blk_mq_end_request(cmd
->rq
, 0);
221 INIT_LIST_HEAD(&cmd
->rq
->queuelist
);
222 blk_end_request_all(cmd
->rq
, 0);
225 bio_endio(cmd
->bio
, 0);
232 static enum hrtimer_restart
null_cmd_timer_expired(struct hrtimer
*timer
)
234 struct completion_queue
*cq
;
235 struct llist_node
*entry
;
236 struct nullb_cmd
*cmd
;
238 cq
= &per_cpu(completion_queues
, smp_processor_id());
240 while ((entry
= llist_del_all(&cq
->list
)) != NULL
) {
241 entry
= llist_reverse_order(entry
);
243 struct request_queue
*q
= NULL
;
245 cmd
= container_of(entry
, struct nullb_cmd
, ll_list
);
251 if (q
&& !q
->mq_ops
&& blk_queue_stopped(q
)) {
252 spin_lock(q
->queue_lock
);
253 if (blk_queue_stopped(q
))
255 spin_unlock(q
->queue_lock
);
260 return HRTIMER_NORESTART
;
263 static void null_cmd_end_timer(struct nullb_cmd
*cmd
)
265 struct completion_queue
*cq
= &per_cpu(completion_queues
, get_cpu());
267 cmd
->ll_list
.next
= NULL
;
268 if (llist_add(&cmd
->ll_list
, &cq
->list
)) {
269 ktime_t kt
= ktime_set(0, completion_nsec
);
271 hrtimer_start(&cq
->timer
, kt
, HRTIMER_MODE_REL_PINNED
);
277 static void null_softirq_done_fn(struct request
*rq
)
279 if (queue_mode
== NULL_Q_MQ
)
280 end_cmd(blk_mq_rq_to_pdu(rq
));
282 end_cmd(rq
->special
);
285 static inline void null_handle_cmd(struct nullb_cmd
*cmd
)
287 /* Complete IO by inline, softirq or timer */
289 case NULL_IRQ_SOFTIRQ
:
290 switch (queue_mode
) {
292 blk_mq_complete_request(cmd
->rq
);
295 blk_complete_request(cmd
->rq
);
299 * XXX: no proper submitting cpu information available.
309 null_cmd_end_timer(cmd
);
314 static struct nullb_queue
*nullb_to_queue(struct nullb
*nullb
)
318 if (nullb
->nr_queues
!= 1)
319 index
= raw_smp_processor_id() / ((nr_cpu_ids
+ nullb
->nr_queues
- 1) / nullb
->nr_queues
);
321 return &nullb
->queues
[index
];
324 static void null_queue_bio(struct request_queue
*q
, struct bio
*bio
)
326 struct nullb
*nullb
= q
->queuedata
;
327 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
328 struct nullb_cmd
*cmd
;
330 cmd
= alloc_cmd(nq
, 1);
333 null_handle_cmd(cmd
);
336 static int null_rq_prep_fn(struct request_queue
*q
, struct request
*req
)
338 struct nullb
*nullb
= q
->queuedata
;
339 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
340 struct nullb_cmd
*cmd
;
342 cmd
= alloc_cmd(nq
, 0);
350 return BLKPREP_DEFER
;
353 static void null_request_fn(struct request_queue
*q
)
357 while ((rq
= blk_fetch_request(q
)) != NULL
) {
358 struct nullb_cmd
*cmd
= rq
->special
;
360 spin_unlock_irq(q
->queue_lock
);
361 null_handle_cmd(cmd
);
362 spin_lock_irq(q
->queue_lock
);
366 static int null_queue_rq(struct blk_mq_hw_ctx
*hctx
,
367 const struct blk_mq_queue_data
*bd
)
369 struct nullb_cmd
*cmd
= blk_mq_rq_to_pdu(bd
->rq
);
372 cmd
->nq
= hctx
->driver_data
;
374 blk_mq_start_request(bd
->rq
);
376 null_handle_cmd(cmd
);
377 return BLK_MQ_RQ_QUEUE_OK
;
380 static void null_init_queue(struct nullb
*nullb
, struct nullb_queue
*nq
)
385 init_waitqueue_head(&nq
->wait
);
386 nq
->queue_depth
= nullb
->queue_depth
;
389 static int null_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
392 struct nullb
*nullb
= data
;
393 struct nullb_queue
*nq
= &nullb
->queues
[index
];
395 hctx
->driver_data
= nq
;
396 null_init_queue(nullb
, nq
);
402 static struct blk_mq_ops null_mq_ops
= {
403 .queue_rq
= null_queue_rq
,
404 .map_queue
= blk_mq_map_queue
,
405 .init_hctx
= null_init_hctx
,
406 .complete
= null_softirq_done_fn
,
409 static void null_del_dev(struct nullb
*nullb
)
411 list_del_init(&nullb
->list
);
413 del_gendisk(nullb
->disk
);
414 blk_cleanup_queue(nullb
->q
);
415 if (queue_mode
== NULL_Q_MQ
)
416 blk_mq_free_tag_set(&nullb
->tag_set
);
417 put_disk(nullb
->disk
);
421 static int null_open(struct block_device
*bdev
, fmode_t mode
)
426 static void null_release(struct gendisk
*disk
, fmode_t mode
)
430 static const struct block_device_operations null_fops
= {
431 .owner
= THIS_MODULE
,
433 .release
= null_release
,
436 static int setup_commands(struct nullb_queue
*nq
)
438 struct nullb_cmd
*cmd
;
441 nq
->cmds
= kzalloc(nq
->queue_depth
* sizeof(*cmd
), GFP_KERNEL
);
445 tag_size
= ALIGN(nq
->queue_depth
, BITS_PER_LONG
) / BITS_PER_LONG
;
446 nq
->tag_map
= kzalloc(tag_size
* sizeof(unsigned long), GFP_KERNEL
);
452 for (i
= 0; i
< nq
->queue_depth
; i
++) {
454 INIT_LIST_HEAD(&cmd
->list
);
455 cmd
->ll_list
.next
= NULL
;
462 static void cleanup_queue(struct nullb_queue
*nq
)
468 static void cleanup_queues(struct nullb
*nullb
)
472 for (i
= 0; i
< nullb
->nr_queues
; i
++)
473 cleanup_queue(&nullb
->queues
[i
]);
475 kfree(nullb
->queues
);
478 static int setup_queues(struct nullb
*nullb
)
480 nullb
->queues
= kzalloc(submit_queues
* sizeof(struct nullb_queue
),
485 nullb
->nr_queues
= 0;
486 nullb
->queue_depth
= hw_queue_depth
;
491 static int init_driver_queues(struct nullb
*nullb
)
493 struct nullb_queue
*nq
;
496 for (i
= 0; i
< submit_queues
; i
++) {
497 nq
= &nullb
->queues
[i
];
499 null_init_queue(nullb
, nq
);
501 ret
= setup_commands(nq
);
509 static int null_add_dev(void)
511 struct gendisk
*disk
;
516 nullb
= kzalloc_node(sizeof(*nullb
), GFP_KERNEL
, home_node
);
522 spin_lock_init(&nullb
->lock
);
524 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
)
525 submit_queues
= nr_online_nodes
;
527 rv
= setup_queues(nullb
);
531 if (queue_mode
== NULL_Q_MQ
) {
532 nullb
->tag_set
.ops
= &null_mq_ops
;
533 nullb
->tag_set
.nr_hw_queues
= submit_queues
;
534 nullb
->tag_set
.queue_depth
= hw_queue_depth
;
535 nullb
->tag_set
.numa_node
= home_node
;
536 nullb
->tag_set
.cmd_size
= sizeof(struct nullb_cmd
);
537 nullb
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
538 nullb
->tag_set
.driver_data
= nullb
;
540 rv
= blk_mq_alloc_tag_set(&nullb
->tag_set
);
542 goto out_cleanup_queues
;
544 nullb
->q
= blk_mq_init_queue(&nullb
->tag_set
);
545 if (IS_ERR(nullb
->q
)) {
547 goto out_cleanup_tags
;
549 } else if (queue_mode
== NULL_Q_BIO
) {
550 nullb
->q
= blk_alloc_queue_node(GFP_KERNEL
, home_node
);
553 goto out_cleanup_queues
;
555 blk_queue_make_request(nullb
->q
, null_queue_bio
);
556 rv
= init_driver_queues(nullb
);
558 goto out_cleanup_blk_queue
;
560 nullb
->q
= blk_init_queue_node(null_request_fn
, &nullb
->lock
, home_node
);
563 goto out_cleanup_queues
;
565 blk_queue_prep_rq(nullb
->q
, null_rq_prep_fn
);
566 blk_queue_softirq_done(nullb
->q
, null_softirq_done_fn
);
567 rv
= init_driver_queues(nullb
);
569 goto out_cleanup_blk_queue
;
572 nullb
->q
->queuedata
= nullb
;
573 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, nullb
->q
);
574 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM
, nullb
->q
);
576 disk
= nullb
->disk
= alloc_disk_node(1, home_node
);
579 goto out_cleanup_blk_queue
;
583 list_add_tail(&nullb
->list
, &nullb_list
);
584 nullb
->index
= nullb_indexes
++;
587 blk_queue_logical_block_size(nullb
->q
, bs
);
588 blk_queue_physical_block_size(nullb
->q
, bs
);
590 size
= gb
* 1024 * 1024 * 1024ULL;
591 sector_div(size
, bs
);
592 set_capacity(disk
, size
);
594 disk
->flags
|= GENHD_FL_EXT_DEVT
| GENHD_FL_SUPPRESS_PARTITION_INFO
;
595 disk
->major
= null_major
;
596 disk
->first_minor
= nullb
->index
;
597 disk
->fops
= &null_fops
;
598 disk
->private_data
= nullb
;
599 disk
->queue
= nullb
->q
;
600 sprintf(disk
->disk_name
, "nullb%d", nullb
->index
);
604 out_cleanup_blk_queue
:
605 blk_cleanup_queue(nullb
->q
);
607 if (queue_mode
== NULL_Q_MQ
)
608 blk_mq_free_tag_set(&nullb
->tag_set
);
610 cleanup_queues(nullb
);
617 static int __init
null_init(void)
621 if (bs
> PAGE_SIZE
) {
622 pr_warn("null_blk: invalid block size\n");
623 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE
);
627 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
) {
628 if (submit_queues
< nr_online_nodes
) {
629 pr_warn("null_blk: submit_queues param is set to %u.",
631 submit_queues
= nr_online_nodes
;
633 } else if (submit_queues
> nr_cpu_ids
)
634 submit_queues
= nr_cpu_ids
;
635 else if (!submit_queues
)
640 /* Initialize a separate list for each CPU for issuing softirqs */
641 for_each_possible_cpu(i
) {
642 struct completion_queue
*cq
= &per_cpu(completion_queues
, i
);
644 init_llist_head(&cq
->list
);
646 if (irqmode
!= NULL_IRQ_TIMER
)
649 hrtimer_init(&cq
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
650 cq
->timer
.function
= null_cmd_timer_expired
;
653 null_major
= register_blkdev(0, "nullb");
657 for (i
= 0; i
< nr_devices
; i
++) {
658 if (null_add_dev()) {
659 unregister_blkdev(null_major
, "nullb");
664 pr_info("null: module loaded\n");
668 static void __exit
null_exit(void)
672 unregister_blkdev(null_major
, "nullb");
675 while (!list_empty(&nullb_list
)) {
676 nullb
= list_entry(nullb_list
.next
, struct nullb
, list
);
682 module_init(null_init
);
683 module_exit(null_exit
);
685 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
686 MODULE_LICENSE("GPL");