1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (C) 2021 Western Digital Corporation or its affiliates.
6 #include <linux/blkdev.h>
8 #include <linux/sched/mm.h>
9 #include <linux/slab.h>
10 #include <linux/bitmap.h>
14 #define DM_MSG_PREFIX "zone"
17 * For internal zone reports bypassing the top BIO submission path.
19 static int dm_blk_do_report_zones(struct mapped_device
*md
, struct dm_table
*t
,
20 sector_t sector
, unsigned int nr_zones
,
21 report_zones_cb cb
, void *data
)
23 struct gendisk
*disk
= md
->disk
;
25 struct dm_report_zones_args args
= {
26 .next_sector
= sector
,
32 struct dm_target
*tgt
;
34 tgt
= dm_table_find_target(t
, args
.next_sector
);
35 if (WARN_ON_ONCE(!tgt
->type
->report_zones
))
39 ret
= tgt
->type
->report_zones(tgt
, &args
,
40 nr_zones
- args
.zone_idx
);
43 } while (args
.zone_idx
< nr_zones
&&
44 args
.next_sector
< get_capacity(disk
));
50 * User facing dm device block device report zone operation. This calls the
51 * report_zones operation for each target of a device table. This operation is
52 * generally implemented by targets using dm_report_zones().
54 int dm_blk_report_zones(struct gendisk
*disk
, sector_t sector
,
55 unsigned int nr_zones
, report_zones_cb cb
, void *data
)
57 struct mapped_device
*md
= disk
->private_data
;
61 if (!md
->zone_revalidate_map
) {
62 /* Regular user context */
63 if (dm_suspended_md(md
))
66 map
= dm_get_live_table(md
, &srcu_idx
);
70 /* Zone revalidation during __bind() */
71 map
= md
->zone_revalidate_map
;
74 ret
= dm_blk_do_report_zones(md
, map
, sector
, nr_zones
, cb
, data
);
76 if (!md
->zone_revalidate_map
)
77 dm_put_live_table(md
, srcu_idx
);
82 static int dm_report_zones_cb(struct blk_zone
*zone
, unsigned int idx
,
85 struct dm_report_zones_args
*args
= data
;
86 sector_t sector_diff
= args
->tgt
->begin
- args
->start
;
89 * Ignore zones beyond the target range.
91 if (zone
->start
>= args
->start
+ args
->tgt
->len
)
95 * Remap the start sector and write pointer position of the zone
96 * to match its position in the target range.
98 zone
->start
+= sector_diff
;
99 if (zone
->type
!= BLK_ZONE_TYPE_CONVENTIONAL
) {
100 if (zone
->cond
== BLK_ZONE_COND_FULL
)
101 zone
->wp
= zone
->start
+ zone
->len
;
102 else if (zone
->cond
== BLK_ZONE_COND_EMPTY
)
103 zone
->wp
= zone
->start
;
105 zone
->wp
+= sector_diff
;
108 args
->next_sector
= zone
->start
+ zone
->len
;
109 return args
->orig_cb(zone
, args
->zone_idx
++, args
->orig_data
);
113 * Helper for drivers of zoned targets to implement struct target_type
114 * report_zones operation.
116 int dm_report_zones(struct block_device
*bdev
, sector_t start
, sector_t sector
,
117 struct dm_report_zones_args
*args
, unsigned int nr_zones
)
120 * Set the target mapping start sector first so that
121 * dm_report_zones_cb() can correctly remap zone information.
125 return blkdev_report_zones(bdev
, sector
, nr_zones
,
126 dm_report_zones_cb
, args
);
128 EXPORT_SYMBOL_GPL(dm_report_zones
);
130 bool dm_is_zone_write(struct mapped_device
*md
, struct bio
*bio
)
132 struct request_queue
*q
= md
->queue
;
134 if (!blk_queue_is_zoned(q
))
137 switch (bio_op(bio
)) {
138 case REQ_OP_WRITE_ZEROES
:
140 return !op_is_flush(bio
->bi_opf
) && bio_sectors(bio
);
147 * Revalidate the zones of a mapped device to initialize resource necessary
148 * for zone append emulation. Note that we cannot simply use the block layer
149 * blk_revalidate_disk_zones() function here as the mapped device is suspended
150 * (this is called from __bind() context).
152 int dm_revalidate_zones(struct dm_table
*t
, struct request_queue
*q
)
154 struct mapped_device
*md
= t
->md
;
155 struct gendisk
*disk
= md
->disk
;
158 if (!get_capacity(disk
))
161 /* Revalidate only if something changed. */
162 if (!disk
->nr_zones
|| disk
->nr_zones
!= md
->nr_zones
) {
163 DMINFO("%s using %s zone append",
165 queue_emulates_zone_append(q
) ? "emulated" : "native");
173 * Our table is not live yet. So the call to dm_get_live_table()
174 * in dm_blk_report_zones() will fail. Set a temporary pointer to
175 * our table for dm_blk_report_zones() to use directly.
177 md
->zone_revalidate_map
= t
;
178 ret
= blk_revalidate_disk_zones(disk
);
179 md
->zone_revalidate_map
= NULL
;
182 DMERR("Revalidate zones failed %d", ret
);
186 md
->nr_zones
= disk
->nr_zones
;
191 static int device_not_zone_append_capable(struct dm_target
*ti
,
192 struct dm_dev
*dev
, sector_t start
,
193 sector_t len
, void *data
)
195 return !bdev_is_zoned(dev
->bdev
);
198 static bool dm_table_supports_zone_append(struct dm_table
*t
)
200 for (unsigned int i
= 0; i
< t
->num_targets
; i
++) {
201 struct dm_target
*ti
= dm_table_get_target(t
, i
);
203 if (ti
->emulate_zone_append
)
206 if (!ti
->type
->iterate_devices
||
207 ti
->type
->iterate_devices(ti
, device_not_zone_append_capable
, NULL
))
214 struct dm_device_zone_count
{
217 unsigned int total_nr_seq_zones
;
218 unsigned int target_nr_seq_zones
;
222 * Count the total number of and the number of mapped sequential zones of a
223 * target zoned device.
225 static int dm_device_count_zones_cb(struct blk_zone
*zone
,
226 unsigned int idx
, void *data
)
228 struct dm_device_zone_count
*zc
= data
;
230 if (zone
->type
!= BLK_ZONE_TYPE_CONVENTIONAL
) {
231 zc
->total_nr_seq_zones
++;
232 if (zone
->start
>= zc
->start
&&
233 zone
->start
< zc
->start
+ zc
->len
)
234 zc
->target_nr_seq_zones
++;
240 static int dm_device_count_zones(struct dm_dev
*dev
,
241 struct dm_device_zone_count
*zc
)
245 ret
= blkdev_report_zones(dev
->bdev
, 0, BLK_ALL_ZONES
,
246 dm_device_count_zones_cb
, zc
);
254 struct dm_zone_resource_limits
{
255 unsigned int mapped_nr_seq_zones
;
256 struct queue_limits
*lim
;
257 bool reliable_limits
;
260 static int device_get_zone_resource_limits(struct dm_target
*ti
,
261 struct dm_dev
*dev
, sector_t start
,
262 sector_t len
, void *data
)
264 struct dm_zone_resource_limits
*zlim
= data
;
265 struct gendisk
*disk
= dev
->bdev
->bd_disk
;
266 unsigned int max_open_zones
, max_active_zones
;
268 struct dm_device_zone_count zc
= {
274 * If the target is not the whole device, the device zone resources may
275 * be shared between different targets. Check this by counting the
276 * number of mapped sequential zones: if this number is smaller than the
277 * total number of sequential zones of the target device, then resource
278 * sharing may happen and the zone limits will not be reliable.
280 ret
= dm_device_count_zones(dev
, &zc
);
282 DMERR("Count %s zones failed %d", disk
->disk_name
, ret
);
287 * If the target does not map any sequential zones, then we do not need
288 * any zone resource limits.
290 if (!zc
.target_nr_seq_zones
)
294 * If the target does not map all sequential zones, the limits
295 * will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
297 if (zc
.target_nr_seq_zones
< zc
.total_nr_seq_zones
) {
298 zlim
->reliable_limits
= false;
299 ti
->zone_reset_all_supported
= false;
303 * If the target maps less sequential zones than the limit values, then
304 * we do not have limits for this target.
306 max_active_zones
= disk
->queue
->limits
.max_active_zones
;
307 if (max_active_zones
>= zc
.target_nr_seq_zones
)
308 max_active_zones
= 0;
309 zlim
->lim
->max_active_zones
=
310 min_not_zero(max_active_zones
, zlim
->lim
->max_active_zones
);
312 max_open_zones
= disk
->queue
->limits
.max_open_zones
;
313 if (max_open_zones
>= zc
.target_nr_seq_zones
)
315 zlim
->lim
->max_open_zones
=
316 min_not_zero(max_open_zones
, zlim
->lim
->max_open_zones
);
319 * Also count the total number of sequential zones for the mapped
320 * device so that when we are done inspecting all its targets, we are
321 * able to check if the mapped device actually has any sequential zones.
323 zlim
->mapped_nr_seq_zones
+= zc
.target_nr_seq_zones
;
328 int dm_set_zones_restrictions(struct dm_table
*t
, struct request_queue
*q
,
329 struct queue_limits
*lim
)
331 struct mapped_device
*md
= t
->md
;
332 struct gendisk
*disk
= md
->disk
;
333 struct dm_zone_resource_limits zlim
= {
334 .reliable_limits
= true,
339 * Check if zone append is natively supported, and if not, set the
340 * mapped device queue as needing zone append emulation.
342 WARN_ON_ONCE(queue_is_mq(q
));
343 if (dm_table_supports_zone_append(t
)) {
344 clear_bit(DMF_EMULATE_ZONE_APPEND
, &md
->flags
);
346 set_bit(DMF_EMULATE_ZONE_APPEND
, &md
->flags
);
347 lim
->max_hw_zone_append_sectors
= 0;
351 * Determine the max open and max active zone limits for the mapped
352 * device by inspecting the zone resource limits and the zones mapped
355 for (unsigned int i
= 0; i
< t
->num_targets
; i
++) {
356 struct dm_target
*ti
= dm_table_get_target(t
, i
);
359 * Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
360 * device_get_zone_resource_limits() may adjust this if one of
361 * the device used by the target does not have all its
362 * sequential write required zones mapped.
364 ti
->zone_reset_all_supported
= true;
366 if (!ti
->type
->iterate_devices
||
367 ti
->type
->iterate_devices(ti
,
368 device_get_zone_resource_limits
, &zlim
)) {
369 DMERR("Could not determine %s zone resource limits",
376 * If we only have conventional zones mapped, expose the mapped device
377 + as a regular device.
379 if (!zlim
.mapped_nr_seq_zones
) {
380 lim
->max_open_zones
= 0;
381 lim
->max_active_zones
= 0;
382 lim
->max_hw_zone_append_sectors
= 0;
383 lim
->zone_write_granularity
= 0;
384 lim
->chunk_sectors
= 0;
385 lim
->features
&= ~BLK_FEAT_ZONED
;
386 clear_bit(DMF_EMULATE_ZONE_APPEND
, &md
->flags
);
393 * Warn once (when the capacity is not yet set) if the mapped device is
394 * partially using zone resources of the target devices as that leads to
395 * unreliable limits, i.e. if another mapped device uses the same
396 * underlying devices, we cannot enforce zone limits to guarantee that
397 * writing will not lead to errors. Note that we really should return
398 * an error for such case but there is no easy way to find out if
399 * another mapped device uses the same underlying zoned devices.
401 if (!get_capacity(disk
) && !zlim
.reliable_limits
)
402 DMWARN("%s zone resource limits may be unreliable",
405 if (lim
->features
& BLK_FEAT_ZONED
&&
406 !static_key_enabled(&zoned_enabled
.key
))
407 static_branch_enable(&zoned_enabled
);
412 * IO completion callback called from clone_endio().
414 void dm_zone_endio(struct dm_io
*io
, struct bio
*clone
)
416 struct mapped_device
*md
= io
->md
;
417 struct gendisk
*disk
= md
->disk
;
418 struct bio
*orig_bio
= io
->orig_bio
;
421 * Get the offset within the zone of the written sector
422 * and add that to the original bio sector position.
424 if (clone
->bi_status
== BLK_STS_OK
&&
425 bio_op(clone
) == REQ_OP_ZONE_APPEND
) {
426 sector_t mask
= bdev_zone_sectors(disk
->part0
) - 1;
428 orig_bio
->bi_iter
.bi_sector
+= clone
->bi_iter
.bi_sector
& mask
;
434 static int dm_zone_need_reset_cb(struct blk_zone
*zone
, unsigned int idx
,
438 * For an all-zones reset, ignore conventional, empty, read-only
441 switch (zone
->cond
) {
442 case BLK_ZONE_COND_NOT_WP
:
443 case BLK_ZONE_COND_EMPTY
:
444 case BLK_ZONE_COND_READONLY
:
445 case BLK_ZONE_COND_OFFLINE
:
448 set_bit(idx
, (unsigned long *)data
);
453 int dm_zone_get_reset_bitmap(struct mapped_device
*md
, struct dm_table
*t
,
454 sector_t sector
, unsigned int nr_zones
,
455 unsigned long *need_reset
)
459 ret
= dm_blk_do_report_zones(md
, t
, sector
, nr_zones
,
460 dm_zone_need_reset_cb
, need_reset
);
461 if (ret
!= nr_zones
) {
462 DMERR("Get %s zone reset bitmap failed\n",
463 md
->disk
->disk_name
);