1 #include <linux/module.h>
3 #include <linux/moduleparam.h>
4 #include <linux/sched.h>
6 #include <linux/blkdev.h>
7 #include <linux/init.h>
8 #include <linux/slab.h>
9 #include <linux/blk-mq.h>
10 #include <linux/hrtimer.h>
13 struct list_head list
;
14 struct llist_node ll_list
;
15 struct call_single_data csd
;
19 struct nullb_queue
*nq
;
23 unsigned long *tag_map
;
24 wait_queue_head_t wait
;
25 unsigned int queue_depth
;
27 struct nullb_cmd
*cmds
;
31 struct list_head list
;
33 struct request_queue
*q
;
36 unsigned int queue_depth
;
39 struct nullb_queue
*queues
;
40 unsigned int nr_queues
;
43 static LIST_HEAD(nullb_list
);
44 static struct mutex lock
;
45 static int null_major
;
46 static int nullb_indexes
;
48 struct completion_queue
{
49 struct llist_head list
;
54 * These are per-cpu for now, they will need to be configured by the
55 * complete_queues parameter and appropriately mapped.
57 static DEFINE_PER_CPU(struct completion_queue
, completion_queues
);
69 static int submit_queues
;
70 module_param(submit_queues
, int, S_IRUGO
);
71 MODULE_PARM_DESC(submit_queues
, "Number of submission queues");
73 static int home_node
= NUMA_NO_NODE
;
74 module_param(home_node
, int, S_IRUGO
);
75 MODULE_PARM_DESC(home_node
, "Home node for the device");
77 static int queue_mode
= NULL_Q_MQ
;
78 module_param(queue_mode
, int, S_IRUGO
);
79 MODULE_PARM_DESC(use_mq
, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
82 module_param(gb
, int, S_IRUGO
);
83 MODULE_PARM_DESC(gb
, "Size in GB");
86 module_param(bs
, int, S_IRUGO
);
87 MODULE_PARM_DESC(bs
, "Block size (in bytes)");
89 static int nr_devices
= 2;
90 module_param(nr_devices
, int, S_IRUGO
);
91 MODULE_PARM_DESC(nr_devices
, "Number of devices to register");
93 static int irqmode
= NULL_IRQ_SOFTIRQ
;
94 module_param(irqmode
, int, S_IRUGO
);
95 MODULE_PARM_DESC(irqmode
, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
97 static int completion_nsec
= 10000;
98 module_param(completion_nsec
, int, S_IRUGO
);
99 MODULE_PARM_DESC(completion_nsec
, "Time in ns to complete a request in hardware. Default: 10,000ns");
101 static int hw_queue_depth
= 64;
102 module_param(hw_queue_depth
, int, S_IRUGO
);
103 MODULE_PARM_DESC(hw_queue_depth
, "Queue depth for each hardware queue. Default: 64");
105 static bool use_per_node_hctx
= false;
106 module_param(use_per_node_hctx
, bool, S_IRUGO
);
107 MODULE_PARM_DESC(use_per_node_hctx
, "Use per-node allocation for hardware context queues. Default: false");
109 static void put_tag(struct nullb_queue
*nq
, unsigned int tag
)
111 clear_bit_unlock(tag
, nq
->tag_map
);
113 if (waitqueue_active(&nq
->wait
))
117 static unsigned int get_tag(struct nullb_queue
*nq
)
122 tag
= find_first_zero_bit(nq
->tag_map
, nq
->queue_depth
);
123 if (tag
>= nq
->queue_depth
)
125 } while (test_and_set_bit_lock(tag
, nq
->tag_map
));
130 static void free_cmd(struct nullb_cmd
*cmd
)
132 put_tag(cmd
->nq
, cmd
->tag
);
135 static struct nullb_cmd
*__alloc_cmd(struct nullb_queue
*nq
)
137 struct nullb_cmd
*cmd
;
142 cmd
= &nq
->cmds
[tag
];
151 static struct nullb_cmd
*alloc_cmd(struct nullb_queue
*nq
, int can_wait
)
153 struct nullb_cmd
*cmd
;
156 cmd
= __alloc_cmd(nq
);
157 if (cmd
|| !can_wait
)
161 prepare_to_wait(&nq
->wait
, &wait
, TASK_UNINTERRUPTIBLE
);
162 cmd
= __alloc_cmd(nq
);
169 finish_wait(&nq
->wait
, &wait
);
173 static void end_cmd(struct nullb_cmd
*cmd
)
176 if (queue_mode
== NULL_Q_MQ
)
177 blk_mq_end_io(cmd
->rq
, 0);
179 INIT_LIST_HEAD(&cmd
->rq
->queuelist
);
180 blk_end_request_all(cmd
->rq
, 0);
183 bio_endio(cmd
->bio
, 0);
185 if (queue_mode
!= NULL_Q_MQ
)
189 static enum hrtimer_restart
null_cmd_timer_expired(struct hrtimer
*timer
)
191 struct completion_queue
*cq
;
192 struct llist_node
*entry
;
193 struct nullb_cmd
*cmd
;
195 cq
= &per_cpu(completion_queues
, smp_processor_id());
197 while ((entry
= llist_del_all(&cq
->list
)) != NULL
) {
199 cmd
= container_of(entry
, struct nullb_cmd
, ll_list
);
205 return HRTIMER_NORESTART
;
208 static void null_cmd_end_timer(struct nullb_cmd
*cmd
)
210 struct completion_queue
*cq
= &per_cpu(completion_queues
, get_cpu());
212 cmd
->ll_list
.next
= NULL
;
213 if (llist_add(&cmd
->ll_list
, &cq
->list
)) {
214 ktime_t kt
= ktime_set(0, completion_nsec
);
216 hrtimer_start(&cq
->timer
, kt
, HRTIMER_MODE_REL
);
222 static void null_softirq_done_fn(struct request
*rq
)
224 blk_end_request_all(rq
, 0);
229 static void null_ipi_cmd_end_io(void *data
)
231 struct completion_queue
*cq
;
232 struct llist_node
*entry
, *next
;
233 struct nullb_cmd
*cmd
;
235 cq
= &per_cpu(completion_queues
, smp_processor_id());
237 entry
= llist_del_all(&cq
->list
);
241 cmd
= llist_entry(entry
, struct nullb_cmd
, ll_list
);
247 static void null_cmd_end_ipi(struct nullb_cmd
*cmd
)
249 struct call_single_data
*data
= &cmd
->csd
;
251 struct completion_queue
*cq
= &per_cpu(completion_queues
, cpu
);
253 cmd
->ll_list
.next
= NULL
;
255 if (llist_add(&cmd
->ll_list
, &cq
->list
)) {
256 data
->func
= null_ipi_cmd_end_io
;
258 __smp_call_function_single(cpu
, data
, 0);
264 #endif /* CONFIG_SMP */
266 static inline void null_handle_cmd(struct nullb_cmd
*cmd
)
268 /* Complete IO by inline, softirq or timer */
273 case NULL_IRQ_SOFTIRQ
:
275 null_cmd_end_ipi(cmd
);
281 null_cmd_end_timer(cmd
);
286 static struct nullb_queue
*nullb_to_queue(struct nullb
*nullb
)
290 if (nullb
->nr_queues
!= 1)
291 index
= raw_smp_processor_id() / ((nr_cpu_ids
+ nullb
->nr_queues
- 1) / nullb
->nr_queues
);
293 return &nullb
->queues
[index
];
296 static void null_queue_bio(struct request_queue
*q
, struct bio
*bio
)
298 struct nullb
*nullb
= q
->queuedata
;
299 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
300 struct nullb_cmd
*cmd
;
302 cmd
= alloc_cmd(nq
, 1);
305 null_handle_cmd(cmd
);
308 static int null_rq_prep_fn(struct request_queue
*q
, struct request
*req
)
310 struct nullb
*nullb
= q
->queuedata
;
311 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
312 struct nullb_cmd
*cmd
;
314 cmd
= alloc_cmd(nq
, 0);
321 return BLKPREP_DEFER
;
324 static void null_request_fn(struct request_queue
*q
)
328 while ((rq
= blk_fetch_request(q
)) != NULL
) {
329 struct nullb_cmd
*cmd
= rq
->special
;
331 spin_unlock_irq(q
->queue_lock
);
332 null_handle_cmd(cmd
);
333 spin_lock_irq(q
->queue_lock
);
337 static int null_queue_rq(struct blk_mq_hw_ctx
*hctx
, struct request
*rq
)
339 struct nullb_cmd
*cmd
= rq
->special
;
342 cmd
->nq
= hctx
->driver_data
;
344 null_handle_cmd(cmd
);
345 return BLK_MQ_RQ_QUEUE_OK
;
348 static struct blk_mq_hw_ctx
*null_alloc_hctx(struct blk_mq_reg
*reg
, unsigned int hctx_index
)
350 int b_size
= DIV_ROUND_UP(reg
->nr_hw_queues
, nr_online_nodes
);
351 int tip
= (reg
->nr_hw_queues
% nr_online_nodes
);
355 * Split submit queues evenly wrt to the number of nodes. If uneven,
356 * fill the first buckets with one extra, until the rest is filled with
359 for (i
= 0, n
= 1; i
< hctx_index
; i
++, n
++) {
360 if (n
% b_size
== 0) {
366 b_size
= reg
->nr_hw_queues
/ nr_online_nodes
;
371 * A node might not be online, therefore map the relative node id to the
374 for_each_online_node(n
) {
380 return kzalloc_node(sizeof(struct blk_mq_hw_ctx
), GFP_KERNEL
, n
);
383 static void null_free_hctx(struct blk_mq_hw_ctx
*hctx
, unsigned int hctx_index
)
388 static void null_init_queue(struct nullb
*nullb
, struct nullb_queue
*nq
)
393 init_waitqueue_head(&nq
->wait
);
394 nq
->queue_depth
= nullb
->queue_depth
;
397 static int null_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
400 struct nullb
*nullb
= data
;
401 struct nullb_queue
*nq
= &nullb
->queues
[index
];
403 hctx
->driver_data
= nq
;
404 null_init_queue(nullb
, nq
);
410 static struct blk_mq_ops null_mq_ops
= {
411 .queue_rq
= null_queue_rq
,
412 .map_queue
= blk_mq_map_queue
,
413 .init_hctx
= null_init_hctx
,
416 static struct blk_mq_reg null_mq_reg
= {
419 .cmd_size
= sizeof(struct nullb_cmd
),
420 .flags
= BLK_MQ_F_SHOULD_MERGE
,
423 static void null_del_dev(struct nullb
*nullb
)
425 list_del_init(&nullb
->list
);
427 del_gendisk(nullb
->disk
);
428 blk_cleanup_queue(nullb
->q
);
429 put_disk(nullb
->disk
);
433 static int null_open(struct block_device
*bdev
, fmode_t mode
)
438 static void null_release(struct gendisk
*disk
, fmode_t mode
)
442 static const struct block_device_operations null_fops
= {
443 .owner
= THIS_MODULE
,
445 .release
= null_release
,
448 static int setup_commands(struct nullb_queue
*nq
)
450 struct nullb_cmd
*cmd
;
453 nq
->cmds
= kzalloc(nq
->queue_depth
* sizeof(*cmd
), GFP_KERNEL
);
457 tag_size
= ALIGN(nq
->queue_depth
, BITS_PER_LONG
) / BITS_PER_LONG
;
458 nq
->tag_map
= kzalloc(tag_size
* sizeof(unsigned long), GFP_KERNEL
);
464 for (i
= 0; i
< nq
->queue_depth
; i
++) {
466 INIT_LIST_HEAD(&cmd
->list
);
467 cmd
->ll_list
.next
= NULL
;
474 static void cleanup_queue(struct nullb_queue
*nq
)
480 static void cleanup_queues(struct nullb
*nullb
)
484 for (i
= 0; i
< nullb
->nr_queues
; i
++)
485 cleanup_queue(&nullb
->queues
[i
]);
487 kfree(nullb
->queues
);
490 static int setup_queues(struct nullb
*nullb
)
492 nullb
->queues
= kzalloc(submit_queues
* sizeof(struct nullb_queue
),
497 nullb
->nr_queues
= 0;
498 nullb
->queue_depth
= hw_queue_depth
;
503 static int init_driver_queues(struct nullb
*nullb
)
505 struct nullb_queue
*nq
;
508 for (i
= 0; i
< submit_queues
; i
++) {
509 nq
= &nullb
->queues
[i
];
511 null_init_queue(nullb
, nq
);
513 ret
= setup_commands(nq
);
521 cleanup_queues(nullb
);
525 static int null_add_dev(void)
527 struct gendisk
*disk
;
531 nullb
= kzalloc_node(sizeof(*nullb
), GFP_KERNEL
, home_node
);
535 spin_lock_init(&nullb
->lock
);
537 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
)
538 submit_queues
= nr_online_nodes
;
540 if (setup_queues(nullb
))
543 if (queue_mode
== NULL_Q_MQ
) {
544 null_mq_reg
.numa_node
= home_node
;
545 null_mq_reg
.queue_depth
= hw_queue_depth
;
546 null_mq_reg
.nr_hw_queues
= submit_queues
;
548 if (use_per_node_hctx
) {
549 null_mq_reg
.ops
->alloc_hctx
= null_alloc_hctx
;
550 null_mq_reg
.ops
->free_hctx
= null_free_hctx
;
552 null_mq_reg
.ops
->alloc_hctx
= blk_mq_alloc_single_hw_queue
;
553 null_mq_reg
.ops
->free_hctx
= blk_mq_free_single_hw_queue
;
556 nullb
->q
= blk_mq_init_queue(&null_mq_reg
, nullb
);
557 } else if (queue_mode
== NULL_Q_BIO
) {
558 nullb
->q
= blk_alloc_queue_node(GFP_KERNEL
, home_node
);
559 blk_queue_make_request(nullb
->q
, null_queue_bio
);
560 init_driver_queues(nullb
);
562 nullb
->q
= blk_init_queue_node(null_request_fn
, &nullb
->lock
, home_node
);
563 blk_queue_prep_rq(nullb
->q
, null_rq_prep_fn
);
565 blk_queue_softirq_done(nullb
->q
, null_softirq_done_fn
);
566 init_driver_queues(nullb
);
572 nullb
->q
->queuedata
= nullb
;
573 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, nullb
->q
);
575 disk
= nullb
->disk
= alloc_disk_node(1, home_node
);
578 blk_cleanup_queue(nullb
->q
);
579 cleanup_queues(nullb
);
586 list_add_tail(&nullb
->list
, &nullb_list
);
587 nullb
->index
= nullb_indexes
++;
590 blk_queue_logical_block_size(nullb
->q
, bs
);
591 blk_queue_physical_block_size(nullb
->q
, bs
);
593 size
= gb
* 1024 * 1024 * 1024ULL;
594 sector_div(size
, bs
);
595 set_capacity(disk
, size
);
597 disk
->flags
|= GENHD_FL_EXT_DEVT
;
598 disk
->major
= null_major
;
599 disk
->first_minor
= nullb
->index
;
600 disk
->fops
= &null_fops
;
601 disk
->private_data
= nullb
;
602 disk
->queue
= nullb
->q
;
603 sprintf(disk
->disk_name
, "nullb%d", nullb
->index
);
608 static int __init
null_init(void)
612 #if !defined(CONFIG_SMP)
613 if (irqmode
== NULL_IRQ_SOFTIRQ
) {
614 pr_warn("null_blk: softirq completions not available.\n");
615 pr_warn("null_blk: using direct completions.\n");
616 irqmode
= NULL_IRQ_NONE
;
619 if (bs
> PAGE_SIZE
) {
620 pr_warn("null_blk: invalid block size\n");
621 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE
);
625 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
) {
626 if (submit_queues
< nr_online_nodes
) {
627 pr_warn("null_blk: submit_queues param is set to %u.",
629 submit_queues
= nr_online_nodes
;
631 } else if (submit_queues
> nr_cpu_ids
)
632 submit_queues
= nr_cpu_ids
;
633 else if (!submit_queues
)
638 /* Initialize a separate list for each CPU for issuing softirqs */
639 for_each_possible_cpu(i
) {
640 struct completion_queue
*cq
= &per_cpu(completion_queues
, i
);
642 init_llist_head(&cq
->list
);
644 if (irqmode
!= NULL_IRQ_TIMER
)
647 hrtimer_init(&cq
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
648 cq
->timer
.function
= null_cmd_timer_expired
;
651 null_major
= register_blkdev(0, "nullb");
655 for (i
= 0; i
< nr_devices
; i
++) {
656 if (null_add_dev()) {
657 unregister_blkdev(null_major
, "nullb");
662 pr_info("null: module loaded\n");
666 static void __exit
null_exit(void)
670 unregister_blkdev(null_major
, "nullb");
673 while (!list_empty(&nullb_list
)) {
674 nullb
= list_entry(nullb_list
.next
, struct nullb
, list
);
680 module_init(null_init
);
681 module_exit(null_exit
);
683 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
684 MODULE_LICENSE("GPL");