1 // SPDX-License-Identifier: GPL-2.0
3 * Block driver for s390 storage class memory.
5 * Copyright IBM Corp. 2012
6 * Author(s): Sebastian Ott <sebott@linux.vnet.ibm.com>
9 #define KMSG_COMPONENT "scm_block"
10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
12 #include <linux/interrupt.h>
13 #include <linux/spinlock.h>
14 #include <linux/mempool.h>
15 #include <linux/module.h>
16 #include <linux/blkdev.h>
17 #include <linux/blk-mq.h>
18 #include <linux/slab.h>
19 #include <linux/list.h>
24 debug_info_t
*scm_debug
;
26 static mempool_t
*aidaw_pool
;
27 static DEFINE_SPINLOCK(list_lock
);
28 static LIST_HEAD(inactive_requests
);
29 static unsigned int nr_requests
= 64;
30 static unsigned int nr_requests_per_io
= 8;
31 static atomic_t nr_devices
= ATOMIC_INIT(0);
32 module_param(nr_requests
, uint
, S_IRUGO
);
33 MODULE_PARM_DESC(nr_requests
, "Number of parallel requests.");
35 module_param(nr_requests_per_io
, uint
, S_IRUGO
);
36 MODULE_PARM_DESC(nr_requests_per_io
, "Number of requests per IO.");
38 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
39 MODULE_LICENSE("GPL");
40 MODULE_ALIAS("scm:scmdev*");
42 static void __scm_free_rq(struct scm_request
*scmrq
)
44 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
46 free_page((unsigned long) scmrq
->aob
);
47 kfree(scmrq
->request
);
51 static void scm_free_rqs(void)
53 struct list_head
*iter
, *safe
;
54 struct scm_request
*scmrq
;
56 spin_lock_irq(&list_lock
);
57 list_for_each_safe(iter
, safe
, &inactive_requests
) {
58 scmrq
= list_entry(iter
, struct scm_request
, list
);
59 list_del(&scmrq
->list
);
62 spin_unlock_irq(&list_lock
);
64 mempool_destroy(aidaw_pool
);
67 static int __scm_alloc_rq(void)
69 struct aob_rq_header
*aobrq
;
70 struct scm_request
*scmrq
;
72 aobrq
= kzalloc(sizeof(*aobrq
) + sizeof(*scmrq
), GFP_KERNEL
);
76 scmrq
= (void *) aobrq
->data
;
77 scmrq
->aob
= (void *) get_zeroed_page(GFP_DMA
);
81 scmrq
->request
= kcalloc(nr_requests_per_io
, sizeof(scmrq
->request
[0]),
86 INIT_LIST_HEAD(&scmrq
->list
);
87 spin_lock_irq(&list_lock
);
88 list_add(&scmrq
->list
, &inactive_requests
);
89 spin_unlock_irq(&list_lock
);
97 static int scm_alloc_rqs(unsigned int nrqs
)
101 aidaw_pool
= mempool_create_page_pool(max(nrqs
/8, 1U), 0);
105 while (nrqs
-- && !ret
)
106 ret
= __scm_alloc_rq();
111 static struct scm_request
*scm_request_fetch(void)
113 struct scm_request
*scmrq
= NULL
;
115 spin_lock_irq(&list_lock
);
116 if (list_empty(&inactive_requests
))
118 scmrq
= list_first_entry(&inactive_requests
, struct scm_request
, list
);
119 list_del(&scmrq
->list
);
121 spin_unlock_irq(&list_lock
);
125 static void scm_request_done(struct scm_request
*scmrq
)
132 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
133 msb
= &scmrq
->aob
->msb
[i
];
134 aidaw
= (u64
)dma64_to_virt(msb
->data_addr
);
136 if ((msb
->flags
& MSB_FLAG_IDA
) && aidaw
&&
137 IS_ALIGNED(aidaw
, PAGE_SIZE
))
138 mempool_free(virt_to_page((void *)aidaw
), aidaw_pool
);
141 spin_lock_irqsave(&list_lock
, flags
);
142 list_add(&scmrq
->list
, &inactive_requests
);
143 spin_unlock_irqrestore(&list_lock
, flags
);
146 static bool scm_permit_request(struct scm_blk_dev
*bdev
, struct request
*req
)
148 return rq_data_dir(req
) != WRITE
|| bdev
->state
!= SCM_WR_PROHIBIT
;
151 static inline struct aidaw
*scm_aidaw_alloc(void)
153 struct page
*page
= mempool_alloc(aidaw_pool
, GFP_ATOMIC
);
155 return page
? page_address(page
) : NULL
;
158 static inline unsigned long scm_aidaw_bytes(struct aidaw
*aidaw
)
160 unsigned long _aidaw
= (unsigned long) aidaw
;
161 unsigned long bytes
= ALIGN(_aidaw
, PAGE_SIZE
) - _aidaw
;
163 return (bytes
/ sizeof(*aidaw
)) * PAGE_SIZE
;
166 struct aidaw
*scm_aidaw_fetch(struct scm_request
*scmrq
, unsigned int bytes
)
170 if (scm_aidaw_bytes(scmrq
->next_aidaw
) >= bytes
)
171 return scmrq
->next_aidaw
;
173 aidaw
= scm_aidaw_alloc();
175 memset(aidaw
, 0, PAGE_SIZE
);
179 static int scm_request_prepare(struct scm_request
*scmrq
)
181 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
182 struct scm_device
*scmdev
= bdev
->gendisk
->private_data
;
183 int pos
= scmrq
->aob
->request
.msb_count
;
184 struct msb
*msb
= &scmrq
->aob
->msb
[pos
];
185 struct request
*req
= scmrq
->request
[pos
];
186 struct req_iterator iter
;
190 aidaw
= scm_aidaw_fetch(scmrq
, blk_rq_bytes(req
));
195 scmrq
->aob
->request
.msb_count
++;
196 msb
->scm_addr
= scmdev
->address
+ ((u64
) blk_rq_pos(req
) << 9);
197 msb
->oc
= (rq_data_dir(req
) == READ
) ? MSB_OC_READ
: MSB_OC_WRITE
;
198 msb
->flags
|= MSB_FLAG_IDA
;
199 msb
->data_addr
= virt_to_dma64(aidaw
);
201 rq_for_each_segment(bv
, req
, iter
) {
202 WARN_ON(bv
.bv_offset
);
203 msb
->blk_count
+= bv
.bv_len
>> 12;
204 aidaw
->data_addr
= virt_to_dma64(page_address(bv
.bv_page
));
208 scmrq
->next_aidaw
= aidaw
;
212 static inline void scm_request_set(struct scm_request
*scmrq
,
215 scmrq
->request
[scmrq
->aob
->request
.msb_count
] = req
;
218 static inline void scm_request_init(struct scm_blk_dev
*bdev
,
219 struct scm_request
*scmrq
)
221 struct aob_rq_header
*aobrq
= to_aobrq(scmrq
);
222 struct aob
*aob
= scmrq
->aob
;
224 memset(scmrq
->request
, 0,
225 nr_requests_per_io
* sizeof(scmrq
->request
[0]));
226 memset(aob
, 0, sizeof(*aob
));
227 aobrq
->scmdev
= bdev
->scmdev
;
228 aob
->request
.cmd_code
= ARQB_CMD_MOVE
;
229 aob
->request
.data
= (u64
) aobrq
;
232 scmrq
->error
= BLK_STS_OK
;
233 /* We don't use all msbs - place aidaws at the end of the aob page. */
234 scmrq
->next_aidaw
= (void *) &aob
->msb
[nr_requests_per_io
];
237 static void scm_request_requeue(struct scm_request
*scmrq
)
239 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
242 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++)
243 blk_mq_requeue_request(scmrq
->request
[i
], false);
245 atomic_dec(&bdev
->queued_reqs
);
246 scm_request_done(scmrq
);
247 blk_mq_kick_requeue_list(bdev
->rq
);
250 static void scm_request_finish(struct scm_request
*scmrq
)
252 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
256 for (i
= 0; i
< nr_requests_per_io
&& scmrq
->request
[i
]; i
++) {
257 error
= blk_mq_rq_to_pdu(scmrq
->request
[i
]);
258 *error
= scmrq
->error
;
259 if (likely(!blk_should_fake_timeout(scmrq
->request
[i
]->q
)))
260 blk_mq_complete_request(scmrq
->request
[i
]);
263 atomic_dec(&bdev
->queued_reqs
);
264 scm_request_done(scmrq
);
267 static void scm_request_start(struct scm_request
*scmrq
)
269 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
271 atomic_inc(&bdev
->queued_reqs
);
272 if (eadm_start_aob(scmrq
->aob
)) {
273 SCM_LOG(5, "no subchannel");
274 scm_request_requeue(scmrq
);
279 struct scm_request
*scmrq
;
283 static blk_status_t
scm_blk_request(struct blk_mq_hw_ctx
*hctx
,
284 const struct blk_mq_queue_data
*qd
)
286 struct scm_device
*scmdev
= hctx
->queue
->queuedata
;
287 struct scm_blk_dev
*bdev
= dev_get_drvdata(&scmdev
->dev
);
288 struct scm_queue
*sq
= hctx
->driver_data
;
289 struct request
*req
= qd
->rq
;
290 struct scm_request
*scmrq
;
292 spin_lock(&sq
->lock
);
293 if (!scm_permit_request(bdev
, req
)) {
294 spin_unlock(&sq
->lock
);
295 return BLK_STS_RESOURCE
;
300 scmrq
= scm_request_fetch();
302 SCM_LOG(5, "no request");
303 spin_unlock(&sq
->lock
);
304 return BLK_STS_RESOURCE
;
306 scm_request_init(bdev
, scmrq
);
309 scm_request_set(scmrq
, req
);
311 if (scm_request_prepare(scmrq
)) {
312 SCM_LOG(5, "aidaw alloc failed");
313 scm_request_set(scmrq
, NULL
);
315 if (scmrq
->aob
->request
.msb_count
)
316 scm_request_start(scmrq
);
319 spin_unlock(&sq
->lock
);
320 return BLK_STS_RESOURCE
;
322 blk_mq_start_request(req
);
324 if (qd
->last
|| scmrq
->aob
->request
.msb_count
== nr_requests_per_io
) {
325 scm_request_start(scmrq
);
328 spin_unlock(&sq
->lock
);
332 static int scm_blk_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
335 struct scm_queue
*qd
= kzalloc(sizeof(*qd
), GFP_KERNEL
);
340 spin_lock_init(&qd
->lock
);
341 hctx
->driver_data
= qd
;
346 static void scm_blk_exit_hctx(struct blk_mq_hw_ctx
*hctx
, unsigned int idx
)
348 struct scm_queue
*qd
= hctx
->driver_data
;
351 kfree(hctx
->driver_data
);
352 hctx
->driver_data
= NULL
;
355 static void __scmrq_log_error(struct scm_request
*scmrq
)
357 struct aob
*aob
= scmrq
->aob
;
359 if (scmrq
->error
== BLK_STS_TIMEOUT
)
360 SCM_LOG(1, "Request timeout");
362 SCM_LOG(1, "Request error");
363 SCM_LOG_HEX(1, &aob
->response
, sizeof(aob
->response
));
366 SCM_LOG(1, "Retry request");
368 pr_err("An I/O operation to SCM failed with rc=%d\n",
372 static void scm_blk_handle_error(struct scm_request
*scmrq
)
374 struct scm_blk_dev
*bdev
= scmrq
->bdev
;
377 if (scmrq
->error
!= BLK_STS_IOERR
)
380 /* For -EIO the response block is valid. */
381 switch (scmrq
->aob
->response
.eqc
) {
382 case EQC_WR_PROHIBIT
:
383 spin_lock_irqsave(&bdev
->lock
, flags
);
384 if (bdev
->state
!= SCM_WR_PROHIBIT
)
385 pr_info("%lx: Write access to the SCM increment is suspended\n",
386 (unsigned long) bdev
->scmdev
->address
);
387 bdev
->state
= SCM_WR_PROHIBIT
;
388 spin_unlock_irqrestore(&bdev
->lock
, flags
);
395 if (!eadm_start_aob(scmrq
->aob
))
399 scm_request_requeue(scmrq
);
402 void scm_blk_irq(struct scm_device
*scmdev
, void *data
, blk_status_t error
)
404 struct scm_request
*scmrq
= data
;
406 scmrq
->error
= error
;
408 __scmrq_log_error(scmrq
);
409 if (scmrq
->retries
-- > 0) {
410 scm_blk_handle_error(scmrq
);
415 scm_request_finish(scmrq
);
418 static void scm_blk_request_done(struct request
*req
)
420 blk_status_t
*error
= blk_mq_rq_to_pdu(req
);
422 blk_mq_end_request(req
, *error
);
425 static const struct block_device_operations scm_blk_devops
= {
426 .owner
= THIS_MODULE
,
429 static const struct blk_mq_ops scm_mq_ops
= {
430 .queue_rq
= scm_blk_request
,
431 .complete
= scm_blk_request_done
,
432 .init_hctx
= scm_blk_init_hctx
,
433 .exit_hctx
= scm_blk_exit_hctx
,
436 int scm_blk_dev_setup(struct scm_blk_dev
*bdev
, struct scm_device
*scmdev
)
438 struct queue_limits lim
= {
439 .logical_block_size
= 1 << 12,
441 unsigned int devindex
;
444 lim
.max_segments
= min(scmdev
->nr_max_block
,
445 (unsigned int) (PAGE_SIZE
/ sizeof(struct aidaw
)));
446 lim
.max_hw_sectors
= lim
.max_segments
<< 3; /* 8 * 512 = blk_size */
448 devindex
= atomic_inc_return(&nr_devices
) - 1;
449 /* scma..scmz + scmaa..scmzz */
450 if (devindex
> 701) {
455 bdev
->scmdev
= scmdev
;
456 bdev
->state
= SCM_OPER
;
457 spin_lock_init(&bdev
->lock
);
458 atomic_set(&bdev
->queued_reqs
, 0);
460 bdev
->tag_set
.ops
= &scm_mq_ops
;
461 bdev
->tag_set
.cmd_size
= sizeof(blk_status_t
);
462 bdev
->tag_set
.nr_hw_queues
= nr_requests
;
463 bdev
->tag_set
.queue_depth
= nr_requests_per_io
* nr_requests
;
464 bdev
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
465 bdev
->tag_set
.numa_node
= NUMA_NO_NODE
;
467 ret
= blk_mq_alloc_tag_set(&bdev
->tag_set
);
471 bdev
->gendisk
= blk_mq_alloc_disk(&bdev
->tag_set
, &lim
, scmdev
);
472 if (IS_ERR(bdev
->gendisk
)) {
473 ret
= PTR_ERR(bdev
->gendisk
);
476 bdev
->gendisk
->private_data
= scmdev
;
477 bdev
->gendisk
->fops
= &scm_blk_devops
;
478 bdev
->gendisk
->major
= scm_major
;
479 bdev
->gendisk
->first_minor
= devindex
* SCM_NR_PARTS
;
480 bdev
->gendisk
->minors
= SCM_NR_PARTS
;
482 len
= snprintf(bdev
->gendisk
->disk_name
, DISK_NAME_LEN
, "scm");
484 len
+= snprintf(bdev
->gendisk
->disk_name
+ len
,
485 DISK_NAME_LEN
- len
, "%c",
486 'a' + (devindex
/ 26) - 1);
487 devindex
= devindex
% 26;
489 snprintf(bdev
->gendisk
->disk_name
+ len
, DISK_NAME_LEN
- len
, "%c",
492 /* 512 byte sectors */
493 set_capacity(bdev
->gendisk
, scmdev
->size
>> 9);
494 ret
= device_add_disk(&scmdev
->dev
, bdev
->gendisk
, NULL
);
496 goto out_cleanup_disk
;
501 put_disk(bdev
->gendisk
);
503 blk_mq_free_tag_set(&bdev
->tag_set
);
505 atomic_dec(&nr_devices
);
509 void scm_blk_dev_cleanup(struct scm_blk_dev
*bdev
)
511 del_gendisk(bdev
->gendisk
);
512 put_disk(bdev
->gendisk
);
513 blk_mq_free_tag_set(&bdev
->tag_set
);
516 void scm_blk_set_available(struct scm_blk_dev
*bdev
)
520 spin_lock_irqsave(&bdev
->lock
, flags
);
521 if (bdev
->state
== SCM_WR_PROHIBIT
)
522 pr_info("%lx: Write access to the SCM increment is restored\n",
523 (unsigned long) bdev
->scmdev
->address
);
524 bdev
->state
= SCM_OPER
;
525 spin_unlock_irqrestore(&bdev
->lock
, flags
);
528 static bool __init
scm_blk_params_valid(void)
530 if (!nr_requests_per_io
|| nr_requests_per_io
> 64)
536 static int __init
scm_blk_init(void)
540 if (!scm_blk_params_valid())
543 ret
= register_blkdev(0, "scm");
548 ret
= scm_alloc_rqs(nr_requests
);
552 scm_debug
= debug_register("scm_log", 16, 1, 16);
558 debug_register_view(scm_debug
, &debug_hex_ascii_view
);
559 debug_set_level(scm_debug
, 2);
561 ret
= scm_drv_init();
568 debug_unregister(scm_debug
);
571 unregister_blkdev(scm_major
, "scm");
575 module_init(scm_blk_init
);
577 static void __exit
scm_blk_cleanup(void)
580 debug_unregister(scm_debug
);
582 unregister_blkdev(scm_major
, "scm");
584 module_exit(scm_blk_cleanup
);