2 * Block driver for s390 storage class memory.
4 * Copyright IBM Corp. 2012
5 * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
8 #define KMSG_COMPONENT "scm_block"
9 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
11 #include <linux/interrupt.h>
12 #include <linux/spinlock.h>
13 #include <linux/mempool.h>
14 #include <linux/module.h>
15 #include <linux/blkdev.h>
16 #include <linux/genhd.h>
17 #include <linux/slab.h>
18 #include <linux/list.h>
22 debug_info_t
*scm_debug
;
24 static mempool_t
*aidaw_pool
;
25 static DEFINE_SPINLOCK(list_lock
);
26 static LIST_HEAD(inactive_requests
);
27 static unsigned int nr_requests
= 64;
28 static unsigned int nr_requests_per_io
= 8;
29 static atomic_t nr_devices
= ATOMIC_INIT(0);
30 module_param(nr_requests
, uint
, S_IRUGO
);
31 MODULE_PARM_DESC(nr_requests
, "Number of parallel requests.");
33 module_param(nr_requests_per_io
, uint
, S_IRUGO
);
34 MODULE_PARM_DESC(nr_requests_per_io
, "Number of requests per IO.");
36 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
37 MODULE_LICENSE("GPL");
38 MODULE_ALIAS("scm:scmdev*");
40 static void __scm_free_rq(struct scm_request
*scmrq
)
42 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
44 free_page((unsigned long) scmrq
->aob
);
45 __scm_free_rq_cluster(scmrq
);
46 kfree(scmrq
->request
);
50 static void scm_free_rqs(void)
52 struct list_head
*iter
, *safe
;
53 struct scm_request
*scmrq
;
55 spin_lock_irq(&list_lock
);
56 list_for_each_safe(iter
, safe
, &inactive_requests
) {
57 scmrq
= list_entry(iter
, struct scm_request
, list
);
58 list_del(&scmrq
->list
);
61 spin_unlock_irq(&list_lock
);
63 mempool_destroy(aidaw_pool
);
66 static int __scm_alloc_rq(void)
68 struct aob_rq_header
*aobrq
;
69 struct scm_request
*scmrq
;
71 aobrq
= kzalloc(sizeof(*aobrq
) + sizeof(*scmrq
), GFP_KERNEL
);
75 scmrq
= (void *) aobrq
->data
;
76 scmrq
->aob
= (void *) get_zeroed_page(GFP_DMA
);
80 scmrq
->request
= kcalloc(nr_requests_per_io
, sizeof(scmrq
->request
[0]),
85 if (__scm_alloc_rq_cluster(scmrq
))
88 INIT_LIST_HEAD(&scmrq
->list
);
89 spin_lock_irq(&list_lock
);
90 list_add(&scmrq
->list
, &inactive_requests
);
91 spin_unlock_irq(&list_lock
);
99 static int scm_alloc_rqs(unsigned int nrqs
)
103 aidaw_pool
= mempool_create_page_pool(max(nrqs
/8, 1U), 0);
107 while (nrqs
-- && !ret
)
108 ret
= __scm_alloc_rq();
113 static struct scm_request
*scm_request_fetch(void)
115 struct scm_request
*scmrq
= NULL
;
117 spin_lock(&list_lock
);
118 if (list_empty(&inactive_requests
))
120 scmrq
= list_first_entry(&inactive_requests
, struct scm_request
, list
);
121 list_del(&scmrq
->list
);
123 spin_unlock(&list_lock
);
127 static void scm_request_done(struct scm_request
*scmrq
)
134 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
135 msb
= &scmrq
->aob
->msb
[i
];
136 aidaw
= msb
->data_addr
;
138 if ((msb
->flags
& MSB_FLAG_IDA
) && aidaw
&&
139 IS_ALIGNED(aidaw
, PAGE_SIZE
))
140 mempool_free(virt_to_page(aidaw
), aidaw_pool
);
143 spin_lock_irqsave(&list_lock
, flags
);
144 list_add(&scmrq
->list
, &inactive_requests
);
145 spin_unlock_irqrestore(&list_lock
, flags
);
148 static bool scm_permit_request(struct scm_blk_dev
*bdev
, struct request
*req
)
150 return rq_data_dir(req
) != WRITE
|| bdev
->state
!= SCM_WR_PROHIBIT
;
153 static inline struct aidaw
*scm_aidaw_alloc(void)
155 struct page
*page
= mempool_alloc(aidaw_pool
, GFP_ATOMIC
);
157 return page
? page_address(page
) : NULL
;
160 static inline unsigned long scm_aidaw_bytes(struct aidaw
*aidaw
)
162 unsigned long _aidaw
= (unsigned long) aidaw
;
163 unsigned long bytes
= ALIGN(_aidaw
, PAGE_SIZE
) - _aidaw
;
165 return (bytes
/ sizeof(*aidaw
)) * PAGE_SIZE
;
168 struct aidaw
*scm_aidaw_fetch(struct scm_request
*scmrq
, unsigned int bytes
)
172 if (scm_aidaw_bytes(scmrq
->next_aidaw
) >= bytes
)
173 return scmrq
->next_aidaw
;
175 aidaw
= scm_aidaw_alloc();
177 memset(aidaw
, 0, PAGE_SIZE
);
181 static int scm_request_prepare(struct scm_request
*scmrq
)
183 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
184 struct scm_device
*scmdev
= bdev
->gendisk
->private_data
;
185 int pos
= scmrq
->aob
->request
.msb_count
;
186 struct msb
*msb
= &scmrq
->aob
->msb
[pos
];
187 struct request
*req
= scmrq
->request
[pos
];
188 struct req_iterator iter
;
192 aidaw
= scm_aidaw_fetch(scmrq
, blk_rq_bytes(req
));
197 scmrq
->aob
->request
.msb_count
++;
198 msb
->scm_addr
= scmdev
->address
+ ((u64
) blk_rq_pos(req
) << 9);
199 msb
->oc
= (rq_data_dir(req
) == READ
) ? MSB_OC_READ
: MSB_OC_WRITE
;
200 msb
->flags
|= MSB_FLAG_IDA
;
201 msb
->data_addr
= (u64
) aidaw
;
203 rq_for_each_segment(bv
, req
, iter
) {
204 WARN_ON(bv
.bv_offset
);
205 msb
->blk_count
+= bv
.bv_len
>> 12;
206 aidaw
->data_addr
= (u64
) page_address(bv
.bv_page
);
210 scmrq
->next_aidaw
= aidaw
;
214 static inline void scm_request_set(struct scm_request
*scmrq
,
217 scmrq
->request
[scmrq
->aob
->request
.msb_count
] = req
;
220 static inline void scm_request_init(struct scm_blk_dev
*bdev
,
221 struct scm_request
*scmrq
)
223 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
224 struct aob
*aob
= scmrq
->aob
;
226 memset(scmrq
->request
, 0,
227 nr_requests_per_io
* sizeof(scmrq
->request
[0]));
228 memset(aob
, 0, sizeof(*aob
));
229 aobrq
->scmdev
= bdev
->scmdev
;
230 aob
->request
.cmd_code
= ARQB_CMD_MOVE
;
231 aob
->request
.data
= (u64
) aobrq
;
235 /* We don't use all msbs - place aidaws at the end of the aob page. */
236 scmrq
->next_aidaw
= (void *) &aob
->msb
[nr_requests_per_io
];
237 scm_request_cluster_init(scmrq
);
240 static void scm_ensure_queue_restart(struct scm_blk_dev
*bdev
)
242 if (atomic_read(&bdev
->queued_reqs
)) {
243 /* Queue restart is triggered by the next interrupt. */
246 blk_delay_queue(bdev
->rq
, SCM_QUEUE_DELAY
);
249 void scm_request_requeue(struct scm_request
*scmrq
)
251 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
254 scm_release_cluster(scmrq
);
255 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++)
256 blk_requeue_request(bdev
->rq
, scmrq
->request
[i
]);
258 atomic_dec(&bdev
->queued_reqs
);
259 scm_request_done(scmrq
);
260 scm_ensure_queue_restart(bdev
);
263 void scm_request_finish(struct scm_request
*scmrq
)
265 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
268 scm_release_cluster(scmrq
);
269 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++)
270 blk_end_request_all(scmrq
->request
[i
], scmrq
->error
);
272 atomic_dec(&bdev
->queued_reqs
);
273 scm_request_done(scmrq
);
276 static int scm_request_start(struct scm_request
*scmrq
)
278 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
281 atomic_inc(&bdev
->queued_reqs
);
282 if (!scmrq
->aob
->request
.msb_count
) {
283 scm_request_requeue(scmrq
);
287 ret
= eadm_start_aob(scmrq
->aob
);
289 SCM_LOG(5, "no subchannel");
290 scm_request_requeue(scmrq
);
295 static void scm_blk_request(struct request_queue
*rq
)
297 struct scm_device
*scmdev
= rq
->queuedata
;
298 struct scm_blk_dev
*bdev
= dev_get_drvdata(&scmdev
->dev
);
299 struct scm_request
*scmrq
= NULL
;
302 while ((req
= blk_peek_request(rq
))) {
303 if (req
->cmd_type
!= REQ_TYPE_FS
) {
304 blk_start_request(req
);
305 blk_dump_rq_flags(req
, KMSG_COMPONENT
" bad request");
306 blk_end_request_all(req
, -EIO
);
310 if (!scm_permit_request(bdev
, req
))
314 scmrq
= scm_request_fetch();
316 SCM_LOG(5, "no request");
319 scm_request_init(bdev
, scmrq
);
321 scm_request_set(scmrq
, req
);
323 if (!scm_reserve_cluster(scmrq
)) {
324 SCM_LOG(5, "cluster busy");
325 scm_request_set(scmrq
, NULL
);
326 if (scmrq
->aob
->request
.msb_count
)
329 scm_request_done(scmrq
);
333 if (scm_need_cluster_request(scmrq
)) {
334 if (scmrq
->aob
->request
.msb_count
) {
335 /* Start cluster requests separately. */
336 scm_request_set(scmrq
, NULL
);
337 if (scm_request_start(scmrq
))
340 atomic_inc(&bdev
->queued_reqs
);
341 blk_start_request(req
);
342 scm_initiate_cluster_request(scmrq
);
348 if (scm_request_prepare(scmrq
)) {
349 SCM_LOG(5, "aidaw alloc failed");
350 scm_request_set(scmrq
, NULL
);
353 blk_start_request(req
);
355 if (scmrq
->aob
->request
.msb_count
< nr_requests_per_io
)
358 if (scm_request_start(scmrq
))
365 scm_request_start(scmrq
);
367 scm_ensure_queue_restart(bdev
);
370 static void __scmrq_log_error(struct scm_request
*scmrq
)
372 struct aob
*aob
= scmrq
->aob
;
374 if (scmrq
->error
== -ETIMEDOUT
)
375 SCM_LOG(1, "Request timeout");
377 SCM_LOG(1, "Request error");
378 SCM_LOG_HEX(1, &aob
->response
, sizeof(aob
->response
));
381 SCM_LOG(1, "Retry request");
383 pr_err("An I/O operation to SCM failed with rc=%d\n",
387 void scm_blk_irq(struct scm_device
*scmdev
, void *data
, int error
)
389 struct scm_request
*scmrq
= data
;
390 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
392 scmrq
->error
= error
;
394 __scmrq_log_error(scmrq
);
396 spin_lock(&bdev
->lock
);
397 list_add_tail(&scmrq
->list
, &bdev
->finished_requests
);
398 spin_unlock(&bdev
->lock
);
399 tasklet_hi_schedule(&bdev
->tasklet
);
402 static void scm_blk_handle_error(struct scm_request
*scmrq
)
404 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
407 if (scmrq
->error
!= -EIO
)
410 /* For -EIO the response block is valid. */
411 switch (scmrq
->aob
->response
.eqc
) {
412 case EQC_WR_PROHIBIT
:
413 spin_lock_irqsave(&bdev
->lock
, flags
);
414 if (bdev
->state
!= SCM_WR_PROHIBIT
)
415 pr_info("%lx: Write access to the SCM increment is suspended\n",
416 (unsigned long) bdev
->scmdev
->address
);
417 bdev
->state
= SCM_WR_PROHIBIT
;
418 spin_unlock_irqrestore(&bdev
->lock
, flags
);
425 if (!eadm_start_aob(scmrq
->aob
))
429 spin_lock_irqsave(&bdev
->rq_lock
, flags
);
430 scm_request_requeue(scmrq
);
431 spin_unlock_irqrestore(&bdev
->rq_lock
, flags
);
434 static void scm_blk_tasklet(struct scm_blk_dev
*bdev
)
436 struct scm_request
*scmrq
;
439 spin_lock_irqsave(&bdev
->lock
, flags
);
440 while (!list_empty(&bdev
->finished_requests
)) {
441 scmrq
= list_first_entry(&bdev
->finished_requests
,
442 struct scm_request
, list
);
443 list_del(&scmrq
->list
);
444 spin_unlock_irqrestore(&bdev
->lock
, flags
);
446 if (scmrq
->error
&& scmrq
->retries
-- > 0) {
447 scm_blk_handle_error(scmrq
);
449 /* Request restarted or requeued, handle next. */
450 spin_lock_irqsave(&bdev
->lock
, flags
);
454 if (scm_test_cluster_request(scmrq
)) {
455 scm_cluster_request_irq(scmrq
);
456 spin_lock_irqsave(&bdev
->lock
, flags
);
460 scm_request_finish(scmrq
);
461 spin_lock_irqsave(&bdev
->lock
, flags
);
463 spin_unlock_irqrestore(&bdev
->lock
, flags
);
464 /* Look out for more requests. */
465 blk_run_queue(bdev
->rq
);
468 static const struct block_device_operations scm_blk_devops
= {
469 .owner
= THIS_MODULE
,
472 int scm_blk_dev_setup(struct scm_blk_dev
*bdev
, struct scm_device
*scmdev
)
474 struct request_queue
*rq
;
475 int len
, ret
= -ENOMEM
;
476 unsigned int devindex
, nr_max_blk
;
478 devindex
= atomic_inc_return(&nr_devices
) - 1;
479 /* scma..scmz + scmaa..scmzz */
480 if (devindex
> 701) {
485 bdev
->scmdev
= scmdev
;
486 bdev
->state
= SCM_OPER
;
487 spin_lock_init(&bdev
->rq_lock
);
488 spin_lock_init(&bdev
->lock
);
489 INIT_LIST_HEAD(&bdev
->finished_requests
);
490 atomic_set(&bdev
->queued_reqs
, 0);
491 tasklet_init(&bdev
->tasklet
,
492 (void (*)(unsigned long)) scm_blk_tasklet
,
493 (unsigned long) bdev
);
495 rq
= blk_init_queue(scm_blk_request
, &bdev
->rq_lock
);
500 nr_max_blk
= min(scmdev
->nr_max_block
,
501 (unsigned int) (PAGE_SIZE
/ sizeof(struct aidaw
)));
503 blk_queue_logical_block_size(rq
, 1 << 12);
504 blk_queue_max_hw_sectors(rq
, nr_max_blk
<< 3); /* 8 * 512 = blk_size */
505 blk_queue_max_segments(rq
, nr_max_blk
);
506 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, rq
);
507 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM
, rq
);
508 scm_blk_dev_cluster_setup(bdev
);
510 bdev
->gendisk
= alloc_disk(SCM_NR_PARTS
);
514 rq
->queuedata
= scmdev
;
515 bdev
->gendisk
->driverfs_dev
= &scmdev
->dev
;
516 bdev
->gendisk
->private_data
= scmdev
;
517 bdev
->gendisk
->fops
= &scm_blk_devops
;
518 bdev
->gendisk
->queue
= rq
;
519 bdev
->gendisk
->major
= scm_major
;
520 bdev
->gendisk
->first_minor
= devindex
* SCM_NR_PARTS
;
522 len
= snprintf(bdev
->gendisk
->disk_name
, DISK_NAME_LEN
, "scm");
524 len
+= snprintf(bdev
->gendisk
->disk_name
+ len
,
525 DISK_NAME_LEN
- len
, "%c",
526 'a' + (devindex
/ 26) - 1);
527 devindex
= devindex
% 26;
529 snprintf(bdev
->gendisk
->disk_name
+ len
, DISK_NAME_LEN
- len
, "%c",
532 /* 512 byte sectors */
533 set_capacity(bdev
->gendisk
, scmdev
->size
>> 9);
534 add_disk(bdev
->gendisk
);
538 blk_cleanup_queue(rq
);
540 atomic_dec(&nr_devices
);
544 void scm_blk_dev_cleanup(struct scm_blk_dev
*bdev
)
546 tasklet_kill(&bdev
->tasklet
);
547 del_gendisk(bdev
->gendisk
);
548 blk_cleanup_queue(bdev
->gendisk
->queue
);
549 put_disk(bdev
->gendisk
);
552 void scm_blk_set_available(struct scm_blk_dev
*bdev
)
556 spin_lock_irqsave(&bdev
->lock
, flags
);
557 if (bdev
->state
== SCM_WR_PROHIBIT
)
558 pr_info("%lx: Write access to the SCM increment is restored\n",
559 (unsigned long) bdev
->scmdev
->address
);
560 bdev
->state
= SCM_OPER
;
561 spin_unlock_irqrestore(&bdev
->lock
, flags
);
564 static bool __init
scm_blk_params_valid(void)
566 if (!nr_requests_per_io
|| nr_requests_per_io
> 64)
569 return scm_cluster_size_valid();
572 static int __init
scm_blk_init(void)
576 if (!scm_blk_params_valid())
579 ret
= register_blkdev(0, "scm");
584 ret
= scm_alloc_rqs(nr_requests
);
588 scm_debug
= debug_register("scm_log", 16, 1, 16);
594 debug_register_view(scm_debug
, &debug_hex_ascii_view
);
595 debug_set_level(scm_debug
, 2);
597 ret
= scm_drv_init();
604 debug_unregister(scm_debug
);
607 unregister_blkdev(scm_major
, "scm");
611 module_init(scm_blk_init
);
613 static void __exit
scm_blk_cleanup(void)
616 debug_unregister(scm_debug
);
618 unregister_blkdev(scm_major
, "scm");
620 module_exit(scm_blk_cleanup
);