1 // SPDX-License-Identifier: GPL-2.0
3 * Block driver for s390 storage class memory.
5 * Copyright IBM Corp. 2012
6 * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
9 #define KMSG_COMPONENT "scm_block"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
12 #include <linux/interrupt.h>
13 #include <linux/spinlock.h>
14 #include <linux/mempool.h>
15 #include <linux/module.h>
16 #include <linux/blkdev.h>
17 #include <linux/blk-mq.h>
18 #include <linux/genhd.h>
19 #include <linux/slab.h>
20 #include <linux/list.h>
24 debug_info_t
*scm_debug
;
26 static mempool_t
*aidaw_pool
;
27 static DEFINE_SPINLOCK(list_lock
);
28 static LIST_HEAD(inactive_requests
);
29 static unsigned int nr_requests
= 64;
30 static unsigned int nr_requests_per_io
= 8;
31 static atomic_t nr_devices
= ATOMIC_INIT(0);
32 module_param(nr_requests
, uint
, S_IRUGO
);
33 MODULE_PARM_DESC(nr_requests
, "Number of parallel requests.");
35 module_param(nr_requests_per_io
, uint
, S_IRUGO
);
36 MODULE_PARM_DESC(nr_requests_per_io
, "Number of requests per IO.");
38 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
39 MODULE_LICENSE("GPL");
40 MODULE_ALIAS("scm:scmdev*");
42 static void __scm_free_rq(struct scm_request
*scmrq
)
44 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
46 free_page((unsigned long) scmrq
->aob
);
47 kfree(scmrq
->request
);
51 static void scm_free_rqs(void)
53 struct list_head
*iter
, *safe
;
54 struct scm_request
*scmrq
;
56 spin_lock_irq(&list_lock
);
57 list_for_each_safe(iter
, safe
, &inactive_requests
) {
58 scmrq
= list_entry(iter
, struct scm_request
, list
);
59 list_del(&scmrq
->list
);
62 spin_unlock_irq(&list_lock
);
64 mempool_destroy(aidaw_pool
);
67 static int __scm_alloc_rq(void)
69 struct aob_rq_header
*aobrq
;
70 struct scm_request
*scmrq
;
72 aobrq
= kzalloc(sizeof(*aobrq
) + sizeof(*scmrq
), GFP_KERNEL
);
76 scmrq
= (void *) aobrq
->data
;
77 scmrq
->aob
= (void *) get_zeroed_page(GFP_DMA
);
81 scmrq
->request
= kcalloc(nr_requests_per_io
, sizeof(scmrq
->request
[0]),
86 INIT_LIST_HEAD(&scmrq
->list
);
87 spin_lock_irq(&list_lock
);
88 list_add(&scmrq
->list
, &inactive_requests
);
89 spin_unlock_irq(&list_lock
);
97 static int scm_alloc_rqs(unsigned int nrqs
)
101 aidaw_pool
= mempool_create_page_pool(max(nrqs
/8, 1U), 0);
105 while (nrqs
-- && !ret
)
106 ret
= __scm_alloc_rq();
111 static struct scm_request
*scm_request_fetch(void)
113 struct scm_request
*scmrq
= NULL
;
115 spin_lock_irq(&list_lock
);
116 if (list_empty(&inactive_requests
))
118 scmrq
= list_first_entry(&inactive_requests
, struct scm_request
, list
);
119 list_del(&scmrq
->list
);
121 spin_unlock_irq(&list_lock
);
125 static void scm_request_done(struct scm_request
*scmrq
)
132 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
133 msb
= &scmrq
->aob
->msb
[i
];
134 aidaw
= msb
->data_addr
;
136 if ((msb
->flags
& MSB_FLAG_IDA
) && aidaw
&&
137 IS_ALIGNED(aidaw
, PAGE_SIZE
))
138 mempool_free(virt_to_page(aidaw
), aidaw_pool
);
141 spin_lock_irqsave(&list_lock
, flags
);
142 list_add(&scmrq
->list
, &inactive_requests
);
143 spin_unlock_irqrestore(&list_lock
, flags
);
146 static bool scm_permit_request(struct scm_blk_dev
*bdev
, struct request
*req
)
148 return rq_data_dir(req
) != WRITE
|| bdev
->state
!= SCM_WR_PROHIBIT
;
151 static inline struct aidaw
*scm_aidaw_alloc(void)
153 struct page
*page
= mempool_alloc(aidaw_pool
, GFP_ATOMIC
);
155 return page
? page_address(page
) : NULL
;
158 static inline unsigned long scm_aidaw_bytes(struct aidaw
*aidaw
)
160 unsigned long _aidaw
= (unsigned long) aidaw
;
161 unsigned long bytes
= ALIGN(_aidaw
, PAGE_SIZE
) - _aidaw
;
163 return (bytes
/ sizeof(*aidaw
)) * PAGE_SIZE
;
166 struct aidaw
*scm_aidaw_fetch(struct scm_request
*scmrq
, unsigned int bytes
)
170 if (scm_aidaw_bytes(scmrq
->next_aidaw
) >= bytes
)
171 return scmrq
->next_aidaw
;
173 aidaw
= scm_aidaw_alloc();
175 memset(aidaw
, 0, PAGE_SIZE
);
179 static int scm_request_prepare(struct scm_request
*scmrq
)
181 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
182 struct scm_device
*scmdev
= bdev
->gendisk
->private_data
;
183 int pos
= scmrq
->aob
->request
.msb_count
;
184 struct msb
*msb
= &scmrq
->aob
->msb
[pos
];
185 struct request
*req
= scmrq
->request
[pos
];
186 struct req_iterator iter
;
190 aidaw
= scm_aidaw_fetch(scmrq
, blk_rq_bytes(req
));
195 scmrq
->aob
->request
.msb_count
++;
196 msb
->scm_addr
= scmdev
->address
+ ((u64
) blk_rq_pos(req
) << 9);
197 msb
->oc
= (rq_data_dir(req
) == READ
) ? MSB_OC_READ
: MSB_OC_WRITE
;
198 msb
->flags
|= MSB_FLAG_IDA
;
199 msb
->data_addr
= (u64
) aidaw
;
201 rq_for_each_segment(bv
, req
, iter
) {
202 WARN_ON(bv
.bv_offset
);
203 msb
->blk_count
+= bv
.bv_len
>> 12;
204 aidaw
->data_addr
= (u64
) page_address(bv
.bv_page
);
208 scmrq
->next_aidaw
= aidaw
;
212 static inline void scm_request_set(struct scm_request
*scmrq
,
215 scmrq
->request
[scmrq
->aob
->request
.msb_count
] = req
;
218 static inline void scm_request_init(struct scm_blk_dev
*bdev
,
219 struct scm_request
*scmrq
)
221 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
222 struct aob
*aob
= scmrq
->aob
;
224 memset(scmrq
->request
, 0,
225 nr_requests_per_io
* sizeof(scmrq
->request
[0]));
226 memset(aob
, 0, sizeof(*aob
));
227 aobrq
->scmdev
= bdev
->scmdev
;
228 aob
->request
.cmd_code
= ARQB_CMD_MOVE
;
229 aob
->request
.data
= (u64
) aobrq
;
232 scmrq
->error
= BLK_STS_OK
;
233 /* We don't use all msbs - place aidaws at the end of the aob page. */
234 scmrq
->next_aidaw
= (void *) &aob
->msb
[nr_requests_per_io
];
237 static void scm_request_requeue(struct scm_request
*scmrq
)
239 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
242 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++)
243 blk_mq_requeue_request(scmrq
->request
[i
], false);
245 atomic_dec(&bdev
->queued_reqs
);
246 scm_request_done(scmrq
);
247 blk_mq_kick_requeue_list(bdev
->rq
);
250 static void scm_request_finish(struct scm_request
*scmrq
)
252 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
256 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
257 error
= blk_mq_rq_to_pdu(scmrq
->request
[i
]);
258 *error
= scmrq
->error
;
259 if (likely(!blk_should_fake_timeout(scmrq
->request
[i
]->q
)))
260 blk_mq_complete_request(scmrq
->request
[i
]);
263 atomic_dec(&bdev
->queued_reqs
);
264 scm_request_done(scmrq
);
267 static void scm_request_start(struct scm_request
*scmrq
)
269 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
271 atomic_inc(&bdev
->queued_reqs
);
272 if (eadm_start_aob(scmrq
->aob
)) {
273 SCM_LOG(5, "no subchannel");
274 scm_request_requeue(scmrq
);
279 struct scm_request
*scmrq
;
283 static blk_status_t
scm_blk_request(struct blk_mq_hw_ctx
*hctx
,
284 const struct blk_mq_queue_data
*qd
)
286 struct scm_device
*scmdev
= hctx
->queue
->queuedata
;
287 struct scm_blk_dev
*bdev
= dev_get_drvdata(&scmdev
->dev
);
288 struct scm_queue
*sq
= hctx
->driver_data
;
289 struct request
*req
= qd
->rq
;
290 struct scm_request
*scmrq
;
292 spin_lock(&sq
->lock
);
293 if (!scm_permit_request(bdev
, req
)) {
294 spin_unlock(&sq
->lock
);
295 return BLK_STS_RESOURCE
;
300 scmrq
= scm_request_fetch();
302 SCM_LOG(5, "no request");
303 spin_unlock(&sq
->lock
);
304 return BLK_STS_RESOURCE
;
306 scm_request_init(bdev
, scmrq
);
309 scm_request_set(scmrq
, req
);
311 if (scm_request_prepare(scmrq
)) {
312 SCM_LOG(5, "aidaw alloc failed");
313 scm_request_set(scmrq
, NULL
);
315 if (scmrq
->aob
->request
.msb_count
)
316 scm_request_start(scmrq
);
319 spin_unlock(&sq
->lock
);
320 return BLK_STS_RESOURCE
;
322 blk_mq_start_request(req
);
324 if (qd
->last
|| scmrq
->aob
->request
.msb_count
== nr_requests_per_io
) {
325 scm_request_start(scmrq
);
328 spin_unlock(&sq
->lock
);
332 static int scm_blk_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
335 struct scm_queue
*qd
= kzalloc(sizeof(*qd
), GFP_KERNEL
);
340 spin_lock_init(&qd
->lock
);
341 hctx
->driver_data
= qd
;
346 static void scm_blk_exit_hctx(struct blk_mq_hw_ctx
*hctx
, unsigned int idx
)
348 struct scm_queue
*qd
= hctx
->driver_data
;
351 kfree(hctx
->driver_data
);
352 hctx
->driver_data
= NULL
;
355 static void __scmrq_log_error(struct scm_request
*scmrq
)
357 struct aob
*aob
= scmrq
->aob
;
359 if (scmrq
->error
== BLK_STS_TIMEOUT
)
360 SCM_LOG(1, "Request timeout");
362 SCM_LOG(1, "Request error");
363 SCM_LOG_HEX(1, &aob
->response
, sizeof(aob
->response
));
366 SCM_LOG(1, "Retry request");
368 pr_err("An I/O operation to SCM failed with rc=%d\n",
372 static void scm_blk_handle_error(struct scm_request
*scmrq
)
374 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
377 if (scmrq
->error
!= BLK_STS_IOERR
)
380 /* For -EIO the response block is valid. */
381 switch (scmrq
->aob
->response
.eqc
) {
382 case EQC_WR_PROHIBIT
:
383 spin_lock_irqsave(&bdev
->lock
, flags
);
384 if (bdev
->state
!= SCM_WR_PROHIBIT
)
385 pr_info("%lx: Write access to the SCM increment is suspended\n",
386 (unsigned long) bdev
->scmdev
->address
);
387 bdev
->state
= SCM_WR_PROHIBIT
;
388 spin_unlock_irqrestore(&bdev
->lock
, flags
);
395 if (!eadm_start_aob(scmrq
->aob
))
399 scm_request_requeue(scmrq
);
402 void scm_blk_irq(struct scm_device
*scmdev
, void *data
, blk_status_t error
)
404 struct scm_request
*scmrq
= data
;
406 scmrq
->error
= error
;
408 __scmrq_log_error(scmrq
);
409 if (scmrq
->retries
-- > 0) {
410 scm_blk_handle_error(scmrq
);
415 scm_request_finish(scmrq
);
418 static void scm_blk_request_done(struct request
*req
)
420 blk_status_t
*error
= blk_mq_rq_to_pdu(req
);
422 blk_mq_end_request(req
, *error
);
425 static const struct block_device_operations scm_blk_devops
= {
426 .owner
= THIS_MODULE
,
429 static const struct blk_mq_ops scm_mq_ops
= {
430 .queue_rq
= scm_blk_request
,
431 .complete
= scm_blk_request_done
,
432 .init_hctx
= scm_blk_init_hctx
,
433 .exit_hctx
= scm_blk_exit_hctx
,
436 int scm_blk_dev_setup(struct scm_blk_dev
*bdev
, struct scm_device
*scmdev
)
438 unsigned int devindex
, nr_max_blk
;
439 struct request_queue
*rq
;
442 devindex
= atomic_inc_return(&nr_devices
) - 1;
443 /* scma..scmz + scmaa..scmzz */
444 if (devindex
> 701) {
449 bdev
->scmdev
= scmdev
;
450 bdev
->state
= SCM_OPER
;
451 spin_lock_init(&bdev
->lock
);
452 atomic_set(&bdev
->queued_reqs
, 0);
454 bdev
->tag_set
.ops
= &scm_mq_ops
;
455 bdev
->tag_set
.cmd_size
= sizeof(blk_status_t
);
456 bdev
->tag_set
.nr_hw_queues
= nr_requests
;
457 bdev
->tag_set
.queue_depth
= nr_requests_per_io
* nr_requests
;
458 bdev
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
459 bdev
->tag_set
.numa_node
= NUMA_NO_NODE
;
461 ret
= blk_mq_alloc_tag_set(&bdev
->tag_set
);
465 rq
= blk_mq_init_queue(&bdev
->tag_set
);
471 nr_max_blk
= min(scmdev
->nr_max_block
,
472 (unsigned int) (PAGE_SIZE
/ sizeof(struct aidaw
)));
474 blk_queue_logical_block_size(rq
, 1 << 12);
475 blk_queue_max_hw_sectors(rq
, nr_max_blk
<< 3); /* 8 * 512 = blk_size */
476 blk_queue_max_segments(rq
, nr_max_blk
);
477 blk_queue_flag_set(QUEUE_FLAG_NONROT
, rq
);
478 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM
, rq
);
480 bdev
->gendisk
= alloc_disk(SCM_NR_PARTS
);
481 if (!bdev
->gendisk
) {
485 rq
->queuedata
= scmdev
;
486 bdev
->gendisk
->private_data
= scmdev
;
487 bdev
->gendisk
->fops
= &scm_blk_devops
;
488 bdev
->gendisk
->queue
= rq
;
489 bdev
->gendisk
->major
= scm_major
;
490 bdev
->gendisk
->first_minor
= devindex
* SCM_NR_PARTS
;
492 len
= snprintf(bdev
->gendisk
->disk_name
, DISK_NAME_LEN
, "scm");
494 len
+= snprintf(bdev
->gendisk
->disk_name
+ len
,
495 DISK_NAME_LEN
- len
, "%c",
496 'a' + (devindex
/ 26) - 1);
497 devindex
= devindex
% 26;
499 snprintf(bdev
->gendisk
->disk_name
+ len
, DISK_NAME_LEN
- len
, "%c",
502 /* 512 byte sectors */
503 set_capacity(bdev
->gendisk
, scmdev
->size
>> 9);
504 device_add_disk(&scmdev
->dev
, bdev
->gendisk
, NULL
);
508 blk_cleanup_queue(rq
);
510 blk_mq_free_tag_set(&bdev
->tag_set
);
512 atomic_dec(&nr_devices
);
516 void scm_blk_dev_cleanup(struct scm_blk_dev
*bdev
)
518 del_gendisk(bdev
->gendisk
);
519 blk_cleanup_queue(bdev
->gendisk
->queue
);
520 blk_mq_free_tag_set(&bdev
->tag_set
);
521 put_disk(bdev
->gendisk
);
524 void scm_blk_set_available(struct scm_blk_dev
*bdev
)
528 spin_lock_irqsave(&bdev
->lock
, flags
);
529 if (bdev
->state
== SCM_WR_PROHIBIT
)
530 pr_info("%lx: Write access to the SCM increment is restored\n",
531 (unsigned long) bdev
->scmdev
->address
);
532 bdev
->state
= SCM_OPER
;
533 spin_unlock_irqrestore(&bdev
->lock
, flags
);
536 static bool __init
scm_blk_params_valid(void)
538 if (!nr_requests_per_io
|| nr_requests_per_io
> 64)
544 static int __init
scm_blk_init(void)
548 if (!scm_blk_params_valid())
551 ret
= register_blkdev(0, "scm");
556 ret
= scm_alloc_rqs(nr_requests
);
560 scm_debug
= debug_register("scm_log", 16, 1, 16);
566 debug_register_view(scm_debug
, &debug_hex_ascii_view
);
567 debug_set_level(scm_debug
, 2);
569 ret
= scm_drv_init();
576 debug_unregister(scm_debug
);
579 unregister_blkdev(scm_major
, "scm");
583 module_init(scm_blk_init
);
585 static void __exit
scm_blk_cleanup(void)
588 debug_unregister(scm_debug
);
590 unregister_blkdev(scm_major
, "scm");
592 module_exit(scm_blk_cleanup
);