1 #include <linux/module.h>
3 #include <linux/moduleparam.h>
4 #include <linux/sched.h>
6 #include <linux/blkdev.h>
7 #include <linux/init.h>
8 #include <linux/slab.h>
9 #include <linux/blk-mq.h>
10 #include <linux/hrtimer.h>
11 #include <linux/lightnvm.h>
14 struct list_head list
;
15 struct llist_node ll_list
;
16 struct call_single_data csd
;
20 struct nullb_queue
*nq
;
25 unsigned long *tag_map
;
26 wait_queue_head_t wait
;
27 unsigned int queue_depth
;
29 struct nullb_cmd
*cmds
;
33 struct list_head list
;
35 struct request_queue
*q
;
38 struct blk_mq_tag_set
*tag_set
;
39 struct blk_mq_tag_set __tag_set
;
41 unsigned int queue_depth
;
44 struct nullb_queue
*queues
;
45 unsigned int nr_queues
;
46 char disk_name
[DISK_NAME_LEN
];
49 static LIST_HEAD(nullb_list
);
50 static struct mutex lock
;
51 static int null_major
;
52 static int nullb_indexes
;
53 static struct kmem_cache
*ppa_cache
;
54 static struct blk_mq_tag_set tag_set
;
68 static int submit_queues
;
69 module_param(submit_queues
, int, S_IRUGO
);
70 MODULE_PARM_DESC(submit_queues
, "Number of submission queues");
72 static int home_node
= NUMA_NO_NODE
;
73 module_param(home_node
, int, S_IRUGO
);
74 MODULE_PARM_DESC(home_node
, "Home node for the device");
76 static int queue_mode
= NULL_Q_MQ
;
78 static int null_param_store_val(const char *str
, int *val
, int min
, int max
)
82 ret
= kstrtoint(str
, 10, &new_val
);
86 if (new_val
< min
|| new_val
> max
)
93 static int null_set_queue_mode(const char *str
, const struct kernel_param
*kp
)
95 return null_param_store_val(str
, &queue_mode
, NULL_Q_BIO
, NULL_Q_MQ
);
98 static const struct kernel_param_ops null_queue_mode_param_ops
= {
99 .set
= null_set_queue_mode
,
100 .get
= param_get_int
,
103 device_param_cb(queue_mode
, &null_queue_mode_param_ops
, &queue_mode
, S_IRUGO
);
104 MODULE_PARM_DESC(queue_mode
, "Block interface to use (0=bio,1=rq,2=multiqueue)");
107 module_param(gb
, int, S_IRUGO
);
108 MODULE_PARM_DESC(gb
, "Size in GB");
111 module_param(bs
, int, S_IRUGO
);
112 MODULE_PARM_DESC(bs
, "Block size (in bytes)");
114 static int nr_devices
= 1;
115 module_param(nr_devices
, int, S_IRUGO
);
116 MODULE_PARM_DESC(nr_devices
, "Number of devices to register");
118 static bool use_lightnvm
;
119 module_param(use_lightnvm
, bool, S_IRUGO
);
120 MODULE_PARM_DESC(use_lightnvm
, "Register as a LightNVM device");
122 static bool blocking
;
123 module_param(blocking
, bool, S_IRUGO
);
124 MODULE_PARM_DESC(blocking
, "Register as a blocking blk-mq driver device");
126 static bool shared_tags
;
127 module_param(shared_tags
, bool, S_IRUGO
);
128 MODULE_PARM_DESC(shared_tags
, "Share tag set between devices for blk-mq");
130 static int irqmode
= NULL_IRQ_SOFTIRQ
;
132 static int null_set_irqmode(const char *str
, const struct kernel_param
*kp
)
134 return null_param_store_val(str
, &irqmode
, NULL_IRQ_NONE
,
138 static const struct kernel_param_ops null_irqmode_param_ops
= {
139 .set
= null_set_irqmode
,
140 .get
= param_get_int
,
143 device_param_cb(irqmode
, &null_irqmode_param_ops
, &irqmode
, S_IRUGO
);
144 MODULE_PARM_DESC(irqmode
, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
146 static unsigned long completion_nsec
= 10000;
147 module_param(completion_nsec
, ulong
, S_IRUGO
);
148 MODULE_PARM_DESC(completion_nsec
, "Time in ns to complete a request in hardware. Default: 10,000ns");
150 static int hw_queue_depth
= 64;
151 module_param(hw_queue_depth
, int, S_IRUGO
);
152 MODULE_PARM_DESC(hw_queue_depth
, "Queue depth for each hardware queue. Default: 64");
154 static bool use_per_node_hctx
= false;
155 module_param(use_per_node_hctx
, bool, S_IRUGO
);
156 MODULE_PARM_DESC(use_per_node_hctx
, "Use per-node allocation for hardware context queues. Default: false");
158 static void put_tag(struct nullb_queue
*nq
, unsigned int tag
)
160 clear_bit_unlock(tag
, nq
->tag_map
);
162 if (waitqueue_active(&nq
->wait
))
166 static unsigned int get_tag(struct nullb_queue
*nq
)
171 tag
= find_first_zero_bit(nq
->tag_map
, nq
->queue_depth
);
172 if (tag
>= nq
->queue_depth
)
174 } while (test_and_set_bit_lock(tag
, nq
->tag_map
));
179 static void free_cmd(struct nullb_cmd
*cmd
)
181 put_tag(cmd
->nq
, cmd
->tag
);
184 static enum hrtimer_restart
null_cmd_timer_expired(struct hrtimer
*timer
);
186 static struct nullb_cmd
*__alloc_cmd(struct nullb_queue
*nq
)
188 struct nullb_cmd
*cmd
;
193 cmd
= &nq
->cmds
[tag
];
196 if (irqmode
== NULL_IRQ_TIMER
) {
197 hrtimer_init(&cmd
->timer
, CLOCK_MONOTONIC
,
199 cmd
->timer
.function
= null_cmd_timer_expired
;
207 static struct nullb_cmd
*alloc_cmd(struct nullb_queue
*nq
, int can_wait
)
209 struct nullb_cmd
*cmd
;
212 cmd
= __alloc_cmd(nq
);
213 if (cmd
|| !can_wait
)
217 prepare_to_wait(&nq
->wait
, &wait
, TASK_UNINTERRUPTIBLE
);
218 cmd
= __alloc_cmd(nq
);
225 finish_wait(&nq
->wait
, &wait
);
229 static void end_cmd(struct nullb_cmd
*cmd
)
231 struct request_queue
*q
= NULL
;
236 switch (queue_mode
) {
238 blk_mq_end_request(cmd
->rq
, BLK_STS_OK
);
241 INIT_LIST_HEAD(&cmd
->rq
->queuelist
);
242 blk_end_request_all(cmd
->rq
, BLK_STS_OK
);
251 /* Restart queue if needed, as we are freeing a tag */
252 if (queue_mode
== NULL_Q_RQ
&& blk_queue_stopped(q
)) {
255 spin_lock_irqsave(q
->queue_lock
, flags
);
256 blk_start_queue_async(q
);
257 spin_unlock_irqrestore(q
->queue_lock
, flags
);
261 static enum hrtimer_restart
null_cmd_timer_expired(struct hrtimer
*timer
)
263 end_cmd(container_of(timer
, struct nullb_cmd
, timer
));
265 return HRTIMER_NORESTART
;
268 static void null_cmd_end_timer(struct nullb_cmd
*cmd
)
270 ktime_t kt
= completion_nsec
;
272 hrtimer_start(&cmd
->timer
, kt
, HRTIMER_MODE_REL
);
275 static void null_softirq_done_fn(struct request
*rq
)
277 if (queue_mode
== NULL_Q_MQ
)
278 end_cmd(blk_mq_rq_to_pdu(rq
));
280 end_cmd(rq
->special
);
283 static inline void null_handle_cmd(struct nullb_cmd
*cmd
)
285 /* Complete IO by inline, softirq or timer */
287 case NULL_IRQ_SOFTIRQ
:
288 switch (queue_mode
) {
290 blk_mq_complete_request(cmd
->rq
);
293 blk_complete_request(cmd
->rq
);
297 * XXX: no proper submitting cpu information available.
307 null_cmd_end_timer(cmd
);
312 static struct nullb_queue
*nullb_to_queue(struct nullb
*nullb
)
316 if (nullb
->nr_queues
!= 1)
317 index
= raw_smp_processor_id() / ((nr_cpu_ids
+ nullb
->nr_queues
- 1) / nullb
->nr_queues
);
319 return &nullb
->queues
[index
];
322 static blk_qc_t
null_queue_bio(struct request_queue
*q
, struct bio
*bio
)
324 struct nullb
*nullb
= q
->queuedata
;
325 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
326 struct nullb_cmd
*cmd
;
328 cmd
= alloc_cmd(nq
, 1);
331 null_handle_cmd(cmd
);
332 return BLK_QC_T_NONE
;
335 static int null_rq_prep_fn(struct request_queue
*q
, struct request
*req
)
337 struct nullb
*nullb
= q
->queuedata
;
338 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
339 struct nullb_cmd
*cmd
;
341 cmd
= alloc_cmd(nq
, 0);
349 return BLKPREP_DEFER
;
352 static void null_request_fn(struct request_queue
*q
)
356 while ((rq
= blk_fetch_request(q
)) != NULL
) {
357 struct nullb_cmd
*cmd
= rq
->special
;
359 spin_unlock_irq(q
->queue_lock
);
360 null_handle_cmd(cmd
);
361 spin_lock_irq(q
->queue_lock
);
365 static blk_status_t
null_queue_rq(struct blk_mq_hw_ctx
*hctx
,
366 const struct blk_mq_queue_data
*bd
)
368 struct nullb_cmd
*cmd
= blk_mq_rq_to_pdu(bd
->rq
);
370 might_sleep_if(hctx
->flags
& BLK_MQ_F_BLOCKING
);
372 if (irqmode
== NULL_IRQ_TIMER
) {
373 hrtimer_init(&cmd
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
374 cmd
->timer
.function
= null_cmd_timer_expired
;
377 cmd
->nq
= hctx
->driver_data
;
379 blk_mq_start_request(bd
->rq
);
381 null_handle_cmd(cmd
);
385 static const struct blk_mq_ops null_mq_ops
= {
386 .queue_rq
= null_queue_rq
,
387 .complete
= null_softirq_done_fn
,
390 static void cleanup_queue(struct nullb_queue
*nq
)
396 static void cleanup_queues(struct nullb
*nullb
)
400 for (i
= 0; i
< nullb
->nr_queues
; i
++)
401 cleanup_queue(&nullb
->queues
[i
]);
403 kfree(nullb
->queues
);
408 static void null_lnvm_end_io(struct request
*rq
, blk_status_t status
)
410 struct nvm_rq
*rqd
= rq
->end_io_data
;
412 /* XXX: lighnvm core seems to expect NVM_RSP_* values here.. */
413 rqd
->error
= status
? -EIO
: 0;
419 static int null_lnvm_submit_io(struct nvm_dev
*dev
, struct nvm_rq
*rqd
)
421 struct request_queue
*q
= dev
->q
;
423 struct bio
*bio
= rqd
->bio
;
425 rq
= blk_mq_alloc_request(q
,
426 op_is_write(bio_op(bio
)) ? REQ_OP_DRV_OUT
: REQ_OP_DRV_IN
, 0);
430 blk_init_request_from_bio(rq
, bio
);
432 rq
->end_io_data
= rqd
;
434 blk_execute_rq_nowait(q
, NULL
, rq
, 0, null_lnvm_end_io
);
439 static int null_lnvm_id(struct nvm_dev
*dev
, struct nvm_id
*id
)
441 sector_t size
= gb
* 1024 * 1024 * 1024ULL;
443 struct nvm_id_group
*grp
;
450 id
->ppaf
.blk_offset
= 0;
451 id
->ppaf
.blk_len
= 16;
452 id
->ppaf
.pg_offset
= 16;
453 id
->ppaf
.pg_len
= 16;
454 id
->ppaf
.sect_offset
= 32;
455 id
->ppaf
.sect_len
= 8;
456 id
->ppaf
.pln_offset
= 40;
457 id
->ppaf
.pln_len
= 8;
458 id
->ppaf
.lun_offset
= 48;
459 id
->ppaf
.lun_len
= 8;
460 id
->ppaf
.ch_offset
= 56;
463 sector_div(size
, bs
); /* convert size to pages */
464 size
>>= 8; /* concert size to pgs pr blk */
472 grp
->num_lun
= size
+ 1;
473 sector_div(blksize
, grp
->num_lun
);
474 grp
->num_blk
= blksize
;
485 grp
->mpos
= 0x010101; /* single plane rwe */
486 grp
->cpar
= hw_queue_depth
;
491 static void *null_lnvm_create_dma_pool(struct nvm_dev
*dev
, char *name
)
493 mempool_t
*virtmem_pool
;
495 virtmem_pool
= mempool_create_slab_pool(64, ppa_cache
);
497 pr_err("null_blk: Unable to create virtual memory pool\n");
504 static void null_lnvm_destroy_dma_pool(void *pool
)
506 mempool_destroy(pool
);
509 static void *null_lnvm_dev_dma_alloc(struct nvm_dev
*dev
, void *pool
,
510 gfp_t mem_flags
, dma_addr_t
*dma_handler
)
512 return mempool_alloc(pool
, mem_flags
);
515 static void null_lnvm_dev_dma_free(void *pool
, void *entry
,
516 dma_addr_t dma_handler
)
518 mempool_free(entry
, pool
);
521 static struct nvm_dev_ops null_lnvm_dev_ops
= {
522 .identity
= null_lnvm_id
,
523 .submit_io
= null_lnvm_submit_io
,
525 .create_dma_pool
= null_lnvm_create_dma_pool
,
526 .destroy_dma_pool
= null_lnvm_destroy_dma_pool
,
527 .dev_dma_alloc
= null_lnvm_dev_dma_alloc
,
528 .dev_dma_free
= null_lnvm_dev_dma_free
,
530 /* Simulate nvme protocol restriction */
534 static int null_nvm_register(struct nullb
*nullb
)
539 dev
= nvm_alloc_dev(0);
544 memcpy(dev
->name
, nullb
->disk_name
, DISK_NAME_LEN
);
545 dev
->ops
= &null_lnvm_dev_ops
;
547 rv
= nvm_register(dev
);
556 static void null_nvm_unregister(struct nullb
*nullb
)
558 nvm_unregister(nullb
->ndev
);
561 static int null_nvm_register(struct nullb
*nullb
)
563 pr_err("null_blk: CONFIG_NVM needs to be enabled for LightNVM\n");
566 static void null_nvm_unregister(struct nullb
*nullb
) {}
567 #endif /* CONFIG_NVM */
569 static void null_del_dev(struct nullb
*nullb
)
571 list_del_init(&nullb
->list
);
574 null_nvm_unregister(nullb
);
576 del_gendisk(nullb
->disk
);
577 blk_cleanup_queue(nullb
->q
);
578 if (queue_mode
== NULL_Q_MQ
&& nullb
->tag_set
== &nullb
->__tag_set
)
579 blk_mq_free_tag_set(nullb
->tag_set
);
581 put_disk(nullb
->disk
);
582 cleanup_queues(nullb
);
586 static int null_open(struct block_device
*bdev
, fmode_t mode
)
591 static void null_release(struct gendisk
*disk
, fmode_t mode
)
595 static const struct block_device_operations null_fops
= {
596 .owner
= THIS_MODULE
,
598 .release
= null_release
,
601 static void null_init_queue(struct nullb
*nullb
, struct nullb_queue
*nq
)
606 init_waitqueue_head(&nq
->wait
);
607 nq
->queue_depth
= nullb
->queue_depth
;
610 static void null_init_queues(struct nullb
*nullb
)
612 struct request_queue
*q
= nullb
->q
;
613 struct blk_mq_hw_ctx
*hctx
;
614 struct nullb_queue
*nq
;
617 queue_for_each_hw_ctx(q
, hctx
, i
) {
618 if (!hctx
->nr_ctx
|| !hctx
->tags
)
620 nq
= &nullb
->queues
[i
];
621 hctx
->driver_data
= nq
;
622 null_init_queue(nullb
, nq
);
627 static int setup_commands(struct nullb_queue
*nq
)
629 struct nullb_cmd
*cmd
;
632 nq
->cmds
= kzalloc(nq
->queue_depth
* sizeof(*cmd
), GFP_KERNEL
);
636 tag_size
= ALIGN(nq
->queue_depth
, BITS_PER_LONG
) / BITS_PER_LONG
;
637 nq
->tag_map
= kzalloc(tag_size
* sizeof(unsigned long), GFP_KERNEL
);
643 for (i
= 0; i
< nq
->queue_depth
; i
++) {
645 INIT_LIST_HEAD(&cmd
->list
);
646 cmd
->ll_list
.next
= NULL
;
653 static int setup_queues(struct nullb
*nullb
)
655 nullb
->queues
= kzalloc(submit_queues
* sizeof(struct nullb_queue
),
660 nullb
->nr_queues
= 0;
661 nullb
->queue_depth
= hw_queue_depth
;
666 static int init_driver_queues(struct nullb
*nullb
)
668 struct nullb_queue
*nq
;
671 for (i
= 0; i
< submit_queues
; i
++) {
672 nq
= &nullb
->queues
[i
];
674 null_init_queue(nullb
, nq
);
676 ret
= setup_commands(nq
);
684 static int null_gendisk_register(struct nullb
*nullb
)
686 struct gendisk
*disk
;
689 disk
= nullb
->disk
= alloc_disk_node(1, home_node
);
692 size
= gb
* 1024 * 1024 * 1024ULL;
693 set_capacity(disk
, size
>> 9);
695 disk
->flags
|= GENHD_FL_EXT_DEVT
| GENHD_FL_SUPPRESS_PARTITION_INFO
;
696 disk
->major
= null_major
;
697 disk
->first_minor
= nullb
->index
;
698 disk
->fops
= &null_fops
;
699 disk
->private_data
= nullb
;
700 disk
->queue
= nullb
->q
;
701 strncpy(disk
->disk_name
, nullb
->disk_name
, DISK_NAME_LEN
);
707 static int null_init_tag_set(struct blk_mq_tag_set
*set
)
709 set
->ops
= &null_mq_ops
;
710 set
->nr_hw_queues
= submit_queues
;
711 set
->queue_depth
= hw_queue_depth
;
712 set
->numa_node
= home_node
;
713 set
->cmd_size
= sizeof(struct nullb_cmd
);
714 set
->flags
= BLK_MQ_F_SHOULD_MERGE
;
715 set
->driver_data
= NULL
;
718 set
->flags
|= BLK_MQ_F_BLOCKING
;
720 return blk_mq_alloc_tag_set(set
);
723 static int null_add_dev(void)
728 nullb
= kzalloc_node(sizeof(*nullb
), GFP_KERNEL
, home_node
);
734 spin_lock_init(&nullb
->lock
);
736 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
)
737 submit_queues
= nr_online_nodes
;
739 rv
= setup_queues(nullb
);
743 if (queue_mode
== NULL_Q_MQ
) {
745 nullb
->tag_set
= &tag_set
;
748 nullb
->tag_set
= &nullb
->__tag_set
;
749 rv
= null_init_tag_set(nullb
->tag_set
);
753 goto out_cleanup_queues
;
755 nullb
->q
= blk_mq_init_queue(nullb
->tag_set
);
756 if (IS_ERR(nullb
->q
)) {
758 goto out_cleanup_tags
;
760 null_init_queues(nullb
);
761 } else if (queue_mode
== NULL_Q_BIO
) {
762 nullb
->q
= blk_alloc_queue_node(GFP_KERNEL
, home_node
);
765 goto out_cleanup_queues
;
767 blk_queue_make_request(nullb
->q
, null_queue_bio
);
768 rv
= init_driver_queues(nullb
);
770 goto out_cleanup_blk_queue
;
772 nullb
->q
= blk_init_queue_node(null_request_fn
, &nullb
->lock
, home_node
);
775 goto out_cleanup_queues
;
777 blk_queue_prep_rq(nullb
->q
, null_rq_prep_fn
);
778 blk_queue_softirq_done(nullb
->q
, null_softirq_done_fn
);
779 rv
= init_driver_queues(nullb
);
781 goto out_cleanup_blk_queue
;
784 nullb
->q
->queuedata
= nullb
;
785 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, nullb
->q
);
786 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM
, nullb
->q
);
789 nullb
->index
= nullb_indexes
++;
792 blk_queue_logical_block_size(nullb
->q
, bs
);
793 blk_queue_physical_block_size(nullb
->q
, bs
);
795 sprintf(nullb
->disk_name
, "nullb%d", nullb
->index
);
798 rv
= null_nvm_register(nullb
);
800 rv
= null_gendisk_register(nullb
);
803 goto out_cleanup_blk_queue
;
806 list_add_tail(&nullb
->list
, &nullb_list
);
810 out_cleanup_blk_queue
:
811 blk_cleanup_queue(nullb
->q
);
813 if (queue_mode
== NULL_Q_MQ
&& nullb
->tag_set
== &nullb
->__tag_set
)
814 blk_mq_free_tag_set(nullb
->tag_set
);
816 cleanup_queues(nullb
);
823 static int __init
null_init(void)
829 if (bs
> PAGE_SIZE
) {
830 pr_warn("null_blk: invalid block size\n");
831 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE
);
835 if (use_lightnvm
&& bs
!= 4096) {
836 pr_warn("null_blk: LightNVM only supports 4k block size\n");
837 pr_warn("null_blk: defaults block size to 4k\n");
841 if (use_lightnvm
&& queue_mode
!= NULL_Q_MQ
) {
842 pr_warn("null_blk: LightNVM only supported for blk-mq\n");
843 pr_warn("null_blk: defaults queue mode to blk-mq\n");
844 queue_mode
= NULL_Q_MQ
;
847 if (queue_mode
== NULL_Q_MQ
&& shared_tags
)
848 null_init_tag_set(&tag_set
);
850 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
) {
851 if (submit_queues
< nr_online_nodes
) {
852 pr_warn("null_blk: submit_queues param is set to %u.",
854 submit_queues
= nr_online_nodes
;
856 } else if (submit_queues
> nr_cpu_ids
)
857 submit_queues
= nr_cpu_ids
;
858 else if (!submit_queues
)
863 null_major
= register_blkdev(0, "nullb");
868 ppa_cache
= kmem_cache_create("ppa_cache", 64 * sizeof(u64
),
871 pr_err("null_blk: unable to create ppa cache\n");
877 for (i
= 0; i
< nr_devices
; i
++) {
878 ret
= null_add_dev();
883 pr_info("null: module loaded\n");
887 while (!list_empty(&nullb_list
)) {
888 nullb
= list_entry(nullb_list
.next
, struct nullb
, list
);
891 kmem_cache_destroy(ppa_cache
);
893 unregister_blkdev(null_major
, "nullb");
897 static void __exit
null_exit(void)
901 unregister_blkdev(null_major
, "nullb");
904 while (!list_empty(&nullb_list
)) {
905 nullb
= list_entry(nullb_list
.next
, struct nullb
, list
);
910 if (queue_mode
== NULL_Q_MQ
&& shared_tags
)
911 blk_mq_free_tag_set(&tag_set
);
913 kmem_cache_destroy(ppa_cache
);
916 module_init(null_init
);
917 module_exit(null_exit
);
919 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
920 MODULE_LICENSE("GPL");