1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/vmalloc.h>
3 #include <linux/bitmap.h>
6 #define CREATE_TRACE_POINTS
9 #define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT)
11 static inline unsigned int null_zone_no(struct nullb_device
*dev
, sector_t sect
)
13 return sect
>> ilog2(dev
->zone_size_sects
);
16 static inline void null_lock_zone_res(struct nullb_device
*dev
)
18 if (dev
->need_zone_res_mgmt
)
19 spin_lock_irq(&dev
->zone_res_lock
);
22 static inline void null_unlock_zone_res(struct nullb_device
*dev
)
24 if (dev
->need_zone_res_mgmt
)
25 spin_unlock_irq(&dev
->zone_res_lock
);
28 static inline void null_init_zone_lock(struct nullb_device
*dev
,
29 struct nullb_zone
*zone
)
31 if (!dev
->memory_backed
)
32 spin_lock_init(&zone
->spinlock
);
34 mutex_init(&zone
->mutex
);
37 static inline void null_lock_zone(struct nullb_device
*dev
,
38 struct nullb_zone
*zone
)
40 if (!dev
->memory_backed
)
41 spin_lock_irq(&zone
->spinlock
);
43 mutex_lock(&zone
->mutex
);
46 static inline void null_unlock_zone(struct nullb_device
*dev
,
47 struct nullb_zone
*zone
)
49 if (!dev
->memory_backed
)
50 spin_unlock_irq(&zone
->spinlock
);
52 mutex_unlock(&zone
->mutex
);
55 int null_init_zoned_dev(struct nullb_device
*dev
, struct request_queue
*q
)
57 sector_t dev_capacity_sects
, zone_capacity_sects
;
58 struct nullb_zone
*zone
;
62 if (!is_power_of_2(dev
->zone_size
)) {
63 pr_err("zone_size must be power-of-two\n");
66 if (dev
->zone_size
> dev
->size
) {
67 pr_err("Zone size larger than device capacity\n");
71 if (!dev
->zone_capacity
)
72 dev
->zone_capacity
= dev
->zone_size
;
74 if (dev
->zone_capacity
> dev
->zone_size
) {
75 pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
76 dev
->zone_capacity
, dev
->zone_size
);
80 zone_capacity_sects
= MB_TO_SECTS(dev
->zone_capacity
);
81 dev_capacity_sects
= MB_TO_SECTS(dev
->size
);
82 dev
->zone_size_sects
= MB_TO_SECTS(dev
->zone_size
);
83 dev
->nr_zones
= dev_capacity_sects
>> ilog2(dev
->zone_size_sects
);
84 if (dev_capacity_sects
& (dev
->zone_size_sects
- 1))
87 dev
->zones
= kvmalloc_array(dev
->nr_zones
, sizeof(struct nullb_zone
),
88 GFP_KERNEL
| __GFP_ZERO
);
92 spin_lock_init(&dev
->zone_res_lock
);
94 if (dev
->zone_nr_conv
>= dev
->nr_zones
) {
95 dev
->zone_nr_conv
= dev
->nr_zones
- 1;
96 pr_info("changed the number of conventional zones to %u",
100 /* Max active zones has to be < nbr of seq zones in order to be enforceable */
101 if (dev
->zone_max_active
>= dev
->nr_zones
- dev
->zone_nr_conv
) {
102 dev
->zone_max_active
= 0;
103 pr_info("zone_max_active limit disabled, limit >= zone count\n");
106 /* Max open zones has to be <= max active zones */
107 if (dev
->zone_max_active
&& dev
->zone_max_open
> dev
->zone_max_active
) {
108 dev
->zone_max_open
= dev
->zone_max_active
;
109 pr_info("changed the maximum number of open zones to %u\n",
111 } else if (dev
->zone_max_open
>= dev
->nr_zones
- dev
->zone_nr_conv
) {
112 dev
->zone_max_open
= 0;
113 pr_info("zone_max_open limit disabled, limit >= zone count\n");
115 dev
->need_zone_res_mgmt
= dev
->zone_max_active
|| dev
->zone_max_open
;
116 dev
->imp_close_zone_no
= dev
->zone_nr_conv
;
118 for (i
= 0; i
< dev
->zone_nr_conv
; i
++) {
119 zone
= &dev
->zones
[i
];
121 null_init_zone_lock(dev
, zone
);
122 zone
->start
= sector
;
123 zone
->len
= dev
->zone_size_sects
;
124 zone
->capacity
= zone
->len
;
125 zone
->wp
= zone
->start
+ zone
->len
;
126 zone
->type
= BLK_ZONE_TYPE_CONVENTIONAL
;
127 zone
->cond
= BLK_ZONE_COND_NOT_WP
;
129 sector
+= dev
->zone_size_sects
;
132 for (i
= dev
->zone_nr_conv
; i
< dev
->nr_zones
; i
++) {
133 zone
= &dev
->zones
[i
];
135 null_init_zone_lock(dev
, zone
);
136 zone
->start
= zone
->wp
= sector
;
137 if (zone
->start
+ dev
->zone_size_sects
> dev_capacity_sects
)
138 zone
->len
= dev_capacity_sects
- zone
->start
;
140 zone
->len
= dev
->zone_size_sects
;
142 min_t(sector_t
, zone
->len
, zone_capacity_sects
);
143 zone
->type
= BLK_ZONE_TYPE_SEQWRITE_REQ
;
144 zone
->cond
= BLK_ZONE_COND_EMPTY
;
146 sector
+= dev
->zone_size_sects
;
149 q
->limits
.zoned
= BLK_ZONED_HM
;
150 blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL
, q
);
151 blk_queue_required_elevator_features(q
, ELEVATOR_F_ZBD_SEQ_WRITE
);
156 int null_register_zoned_dev(struct nullb
*nullb
)
158 struct nullb_device
*dev
= nullb
->dev
;
159 struct request_queue
*q
= nullb
->q
;
161 if (queue_is_mq(q
)) {
162 int ret
= blk_revalidate_disk_zones(nullb
->disk
, NULL
);
167 blk_queue_chunk_sectors(q
, dev
->zone_size_sects
);
168 q
->nr_zones
= blkdev_nr_zones(nullb
->disk
);
171 blk_queue_max_zone_append_sectors(q
, dev
->zone_size_sects
);
172 blk_queue_max_open_zones(q
, dev
->zone_max_open
);
173 blk_queue_max_active_zones(q
, dev
->zone_max_active
);
178 void null_free_zoned_dev(struct nullb_device
*dev
)
183 int null_report_zones(struct gendisk
*disk
, sector_t sector
,
184 unsigned int nr_zones
, report_zones_cb cb
, void *data
)
186 struct nullb
*nullb
= disk
->private_data
;
187 struct nullb_device
*dev
= nullb
->dev
;
188 unsigned int first_zone
, i
;
189 struct nullb_zone
*zone
;
190 struct blk_zone blkz
;
193 first_zone
= null_zone_no(dev
, sector
);
194 if (first_zone
>= dev
->nr_zones
)
197 nr_zones
= min(nr_zones
, dev
->nr_zones
- first_zone
);
198 trace_nullb_report_zones(nullb
, nr_zones
);
200 memset(&blkz
, 0, sizeof(struct blk_zone
));
201 zone
= &dev
->zones
[first_zone
];
202 for (i
= 0; i
< nr_zones
; i
++, zone
++) {
204 * Stacked DM target drivers will remap the zone information by
205 * modifying the zone information passed to the report callback.
206 * So use a local copy to avoid corruption of the device zone
209 null_lock_zone(dev
, zone
);
210 blkz
.start
= zone
->start
;
211 blkz
.len
= zone
->len
;
213 blkz
.type
= zone
->type
;
214 blkz
.cond
= zone
->cond
;
215 blkz
.capacity
= zone
->capacity
;
216 null_unlock_zone(dev
, zone
);
218 error
= cb(&blkz
, i
, data
);
227 * This is called in the case of memory backing from null_process_cmd()
228 * with the target zone already locked.
230 size_t null_zone_valid_read_len(struct nullb
*nullb
,
231 sector_t sector
, unsigned int len
)
233 struct nullb_device
*dev
= nullb
->dev
;
234 struct nullb_zone
*zone
= &dev
->zones
[null_zone_no(dev
, sector
)];
235 unsigned int nr_sectors
= len
>> SECTOR_SHIFT
;
237 /* Read must be below the write pointer position */
238 if (zone
->type
== BLK_ZONE_TYPE_CONVENTIONAL
||
239 sector
+ nr_sectors
<= zone
->wp
)
242 if (sector
> zone
->wp
)
245 return (zone
->wp
- sector
) << SECTOR_SHIFT
;
248 static blk_status_t
__null_close_zone(struct nullb_device
*dev
,
249 struct nullb_zone
*zone
)
251 switch (zone
->cond
) {
252 case BLK_ZONE_COND_CLOSED
:
253 /* close operation on closed is not an error */
255 case BLK_ZONE_COND_IMP_OPEN
:
256 dev
->nr_zones_imp_open
--;
258 case BLK_ZONE_COND_EXP_OPEN
:
259 dev
->nr_zones_exp_open
--;
261 case BLK_ZONE_COND_EMPTY
:
262 case BLK_ZONE_COND_FULL
:
264 return BLK_STS_IOERR
;
267 if (zone
->wp
== zone
->start
) {
268 zone
->cond
= BLK_ZONE_COND_EMPTY
;
270 zone
->cond
= BLK_ZONE_COND_CLOSED
;
271 dev
->nr_zones_closed
++;
277 static void null_close_imp_open_zone(struct nullb_device
*dev
)
279 struct nullb_zone
*zone
;
282 zno
= dev
->imp_close_zone_no
;
283 if (zno
>= dev
->nr_zones
)
284 zno
= dev
->zone_nr_conv
;
286 for (i
= dev
->zone_nr_conv
; i
< dev
->nr_zones
; i
++) {
287 zone
= &dev
->zones
[zno
];
289 if (zno
>= dev
->nr_zones
)
290 zno
= dev
->zone_nr_conv
;
292 if (zone
->cond
== BLK_ZONE_COND_IMP_OPEN
) {
293 __null_close_zone(dev
, zone
);
294 dev
->imp_close_zone_no
= zno
;
300 static blk_status_t
null_check_active(struct nullb_device
*dev
)
302 if (!dev
->zone_max_active
)
305 if (dev
->nr_zones_exp_open
+ dev
->nr_zones_imp_open
+
306 dev
->nr_zones_closed
< dev
->zone_max_active
)
309 return BLK_STS_ZONE_ACTIVE_RESOURCE
;
312 static blk_status_t
null_check_open(struct nullb_device
*dev
)
314 if (!dev
->zone_max_open
)
317 if (dev
->nr_zones_exp_open
+ dev
->nr_zones_imp_open
< dev
->zone_max_open
)
320 if (dev
->nr_zones_imp_open
) {
321 if (null_check_active(dev
) == BLK_STS_OK
) {
322 null_close_imp_open_zone(dev
);
327 return BLK_STS_ZONE_OPEN_RESOURCE
;
331 * This function matches the manage open zone resources function in the ZBC standard,
332 * with the addition of max active zones support (added in the ZNS standard).
334 * The function determines if a zone can transition to implicit open or explicit open,
335 * while maintaining the max open zone (and max active zone) limit(s). It may close an
336 * implicit open zone in order to make additional zone resources available.
338 * ZBC states that an implicit open zone shall be closed only if there is not
339 * room within the open limit. However, with the addition of an active limit,
340 * it is not certain that closing an implicit open zone will allow a new zone
341 * to be opened, since we might already be at the active limit capacity.
343 static blk_status_t
null_check_zone_resources(struct nullb_device
*dev
,
344 struct nullb_zone
*zone
)
348 switch (zone
->cond
) {
349 case BLK_ZONE_COND_EMPTY
:
350 ret
= null_check_active(dev
);
351 if (ret
!= BLK_STS_OK
)
354 case BLK_ZONE_COND_CLOSED
:
355 return null_check_open(dev
);
357 /* Should never be called for other states */
359 return BLK_STS_IOERR
;
363 static blk_status_t
null_zone_write(struct nullb_cmd
*cmd
, sector_t sector
,
364 unsigned int nr_sectors
, bool append
)
366 struct nullb_device
*dev
= cmd
->nq
->dev
;
367 unsigned int zno
= null_zone_no(dev
, sector
);
368 struct nullb_zone
*zone
= &dev
->zones
[zno
];
371 trace_nullb_zone_op(cmd
, zno
, zone
->cond
);
373 if (zone
->type
== BLK_ZONE_TYPE_CONVENTIONAL
) {
375 return BLK_STS_IOERR
;
376 return null_process_cmd(cmd
, REQ_OP_WRITE
, sector
, nr_sectors
);
379 null_lock_zone(dev
, zone
);
381 if (zone
->cond
== BLK_ZONE_COND_FULL
) {
382 /* Cannot write to a full zone */
388 * Regular writes must be at the write pointer position.
389 * Zone append writes are automatically issued at the write
390 * pointer and the position returned using the request or BIO
396 cmd
->bio
->bi_iter
.bi_sector
= sector
;
398 cmd
->rq
->__sector
= sector
;
399 } else if (sector
!= zone
->wp
) {
404 if (zone
->wp
+ nr_sectors
> zone
->start
+ zone
->capacity
) {
409 if (zone
->cond
== BLK_ZONE_COND_CLOSED
||
410 zone
->cond
== BLK_ZONE_COND_EMPTY
) {
411 null_lock_zone_res(dev
);
413 ret
= null_check_zone_resources(dev
, zone
);
414 if (ret
!= BLK_STS_OK
) {
415 null_unlock_zone_res(dev
);
418 if (zone
->cond
== BLK_ZONE_COND_CLOSED
) {
419 dev
->nr_zones_closed
--;
420 dev
->nr_zones_imp_open
++;
421 } else if (zone
->cond
== BLK_ZONE_COND_EMPTY
) {
422 dev
->nr_zones_imp_open
++;
425 if (zone
->cond
!= BLK_ZONE_COND_EXP_OPEN
)
426 zone
->cond
= BLK_ZONE_COND_IMP_OPEN
;
428 null_unlock_zone_res(dev
);
431 ret
= null_process_cmd(cmd
, REQ_OP_WRITE
, sector
, nr_sectors
);
432 if (ret
!= BLK_STS_OK
)
435 zone
->wp
+= nr_sectors
;
436 if (zone
->wp
== zone
->start
+ zone
->capacity
) {
437 null_lock_zone_res(dev
);
438 if (zone
->cond
== BLK_ZONE_COND_EXP_OPEN
)
439 dev
->nr_zones_exp_open
--;
440 else if (zone
->cond
== BLK_ZONE_COND_IMP_OPEN
)
441 dev
->nr_zones_imp_open
--;
442 zone
->cond
= BLK_ZONE_COND_FULL
;
443 null_unlock_zone_res(dev
);
449 null_unlock_zone(dev
, zone
);
454 static blk_status_t
null_open_zone(struct nullb_device
*dev
,
455 struct nullb_zone
*zone
)
457 blk_status_t ret
= BLK_STS_OK
;
459 if (zone
->type
== BLK_ZONE_TYPE_CONVENTIONAL
)
460 return BLK_STS_IOERR
;
462 null_lock_zone_res(dev
);
464 switch (zone
->cond
) {
465 case BLK_ZONE_COND_EXP_OPEN
:
466 /* open operation on exp open is not an error */
468 case BLK_ZONE_COND_EMPTY
:
469 ret
= null_check_zone_resources(dev
, zone
);
470 if (ret
!= BLK_STS_OK
)
473 case BLK_ZONE_COND_IMP_OPEN
:
474 dev
->nr_zones_imp_open
--;
476 case BLK_ZONE_COND_CLOSED
:
477 ret
= null_check_zone_resources(dev
, zone
);
478 if (ret
!= BLK_STS_OK
)
480 dev
->nr_zones_closed
--;
482 case BLK_ZONE_COND_FULL
:
488 zone
->cond
= BLK_ZONE_COND_EXP_OPEN
;
489 dev
->nr_zones_exp_open
++;
492 null_unlock_zone_res(dev
);
497 static blk_status_t
null_close_zone(struct nullb_device
*dev
,
498 struct nullb_zone
*zone
)
502 if (zone
->type
== BLK_ZONE_TYPE_CONVENTIONAL
)
503 return BLK_STS_IOERR
;
505 null_lock_zone_res(dev
);
506 ret
= __null_close_zone(dev
, zone
);
507 null_unlock_zone_res(dev
);
512 static blk_status_t
null_finish_zone(struct nullb_device
*dev
,
513 struct nullb_zone
*zone
)
515 blk_status_t ret
= BLK_STS_OK
;
517 if (zone
->type
== BLK_ZONE_TYPE_CONVENTIONAL
)
518 return BLK_STS_IOERR
;
520 null_lock_zone_res(dev
);
522 switch (zone
->cond
) {
523 case BLK_ZONE_COND_FULL
:
524 /* finish operation on full is not an error */
526 case BLK_ZONE_COND_EMPTY
:
527 ret
= null_check_zone_resources(dev
, zone
);
528 if (ret
!= BLK_STS_OK
)
531 case BLK_ZONE_COND_IMP_OPEN
:
532 dev
->nr_zones_imp_open
--;
534 case BLK_ZONE_COND_EXP_OPEN
:
535 dev
->nr_zones_exp_open
--;
537 case BLK_ZONE_COND_CLOSED
:
538 ret
= null_check_zone_resources(dev
, zone
);
539 if (ret
!= BLK_STS_OK
)
541 dev
->nr_zones_closed
--;
548 zone
->cond
= BLK_ZONE_COND_FULL
;
549 zone
->wp
= zone
->start
+ zone
->len
;
552 null_unlock_zone_res(dev
);
557 static blk_status_t
null_reset_zone(struct nullb_device
*dev
,
558 struct nullb_zone
*zone
)
560 if (zone
->type
== BLK_ZONE_TYPE_CONVENTIONAL
)
561 return BLK_STS_IOERR
;
563 null_lock_zone_res(dev
);
565 switch (zone
->cond
) {
566 case BLK_ZONE_COND_EMPTY
:
567 /* reset operation on empty is not an error */
568 null_unlock_zone_res(dev
);
570 case BLK_ZONE_COND_IMP_OPEN
:
571 dev
->nr_zones_imp_open
--;
573 case BLK_ZONE_COND_EXP_OPEN
:
574 dev
->nr_zones_exp_open
--;
576 case BLK_ZONE_COND_CLOSED
:
577 dev
->nr_zones_closed
--;
579 case BLK_ZONE_COND_FULL
:
582 null_unlock_zone_res(dev
);
583 return BLK_STS_IOERR
;
586 zone
->cond
= BLK_ZONE_COND_EMPTY
;
587 zone
->wp
= zone
->start
;
589 null_unlock_zone_res(dev
);
591 if (dev
->memory_backed
)
592 return null_handle_discard(dev
, zone
->start
, zone
->len
);
597 static blk_status_t
null_zone_mgmt(struct nullb_cmd
*cmd
, enum req_opf op
,
600 struct nullb_device
*dev
= cmd
->nq
->dev
;
601 unsigned int zone_no
;
602 struct nullb_zone
*zone
;
606 if (op
== REQ_OP_ZONE_RESET_ALL
) {
607 for (i
= dev
->zone_nr_conv
; i
< dev
->nr_zones
; i
++) {
608 zone
= &dev
->zones
[i
];
609 null_lock_zone(dev
, zone
);
610 if (zone
->cond
!= BLK_ZONE_COND_EMPTY
) {
611 null_reset_zone(dev
, zone
);
612 trace_nullb_zone_op(cmd
, i
, zone
->cond
);
614 null_unlock_zone(dev
, zone
);
619 zone_no
= null_zone_no(dev
, sector
);
620 zone
= &dev
->zones
[zone_no
];
622 null_lock_zone(dev
, zone
);
625 case REQ_OP_ZONE_RESET
:
626 ret
= null_reset_zone(dev
, zone
);
628 case REQ_OP_ZONE_OPEN
:
629 ret
= null_open_zone(dev
, zone
);
631 case REQ_OP_ZONE_CLOSE
:
632 ret
= null_close_zone(dev
, zone
);
634 case REQ_OP_ZONE_FINISH
:
635 ret
= null_finish_zone(dev
, zone
);
638 ret
= BLK_STS_NOTSUPP
;
642 if (ret
== BLK_STS_OK
)
643 trace_nullb_zone_op(cmd
, zone_no
, zone
->cond
);
645 null_unlock_zone(dev
, zone
);
650 blk_status_t
null_process_zoned_cmd(struct nullb_cmd
*cmd
, enum req_opf op
,
651 sector_t sector
, sector_t nr_sectors
)
653 struct nullb_device
*dev
;
654 struct nullb_zone
*zone
;
659 return null_zone_write(cmd
, sector
, nr_sectors
, false);
660 case REQ_OP_ZONE_APPEND
:
661 return null_zone_write(cmd
, sector
, nr_sectors
, true);
662 case REQ_OP_ZONE_RESET
:
663 case REQ_OP_ZONE_RESET_ALL
:
664 case REQ_OP_ZONE_OPEN
:
665 case REQ_OP_ZONE_CLOSE
:
666 case REQ_OP_ZONE_FINISH
:
667 return null_zone_mgmt(cmd
, op
, sector
);
670 zone
= &dev
->zones
[null_zone_no(dev
, sector
)];
672 null_lock_zone(dev
, zone
);
673 sts
= null_process_cmd(cmd
, op
, sector
, nr_sectors
);
674 null_unlock_zone(dev
, zone
);