2 * Zoned block device handling
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/rbtree.h>
14 #include <linux/blkdev.h>
16 static inline sector_t
blk_zone_start(struct request_queue
*q
,
19 sector_t zone_mask
= blk_queue_zone_sectors(q
) - 1;
21 return sector
& ~zone_mask
;
25 * Return true if a request is a write requests that needs zone write locking.
27 bool blk_req_needs_zone_write_lock(struct request
*rq
)
29 if (!rq
->q
->seq_zones_wlock
)
32 if (blk_rq_is_passthrough(rq
))
36 case REQ_OP_WRITE_ZEROES
:
37 case REQ_OP_WRITE_SAME
:
39 return blk_rq_zone_is_seq(rq
);
44 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock
);
46 void __blk_req_zone_write_lock(struct request
*rq
)
48 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq
),
49 rq
->q
->seq_zones_wlock
)))
52 WARN_ON_ONCE(rq
->rq_flags
& RQF_ZONE_WRITE_LOCKED
);
53 rq
->rq_flags
|= RQF_ZONE_WRITE_LOCKED
;
55 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock
);
57 void __blk_req_zone_write_unlock(struct request
*rq
)
59 rq
->rq_flags
&= ~RQF_ZONE_WRITE_LOCKED
;
60 if (rq
->q
->seq_zones_wlock
)
61 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq
),
62 rq
->q
->seq_zones_wlock
));
64 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock
);
67 * Check that a zone report belongs to the partition.
68 * If yes, fix its start sector and write pointer, copy it in the
69 * zone information array and return true. Return false otherwise.
71 static bool blkdev_report_zone(struct block_device
*bdev
,
73 struct blk_zone
*zone
)
75 sector_t offset
= get_start_sect(bdev
);
77 if (rep
->start
< offset
)
81 if (rep
->start
+ rep
->len
> bdev
->bd_part
->nr_sects
)
84 if (rep
->type
== BLK_ZONE_TYPE_CONVENTIONAL
)
85 rep
->wp
= rep
->start
+ rep
->len
;
88 memcpy(zone
, rep
, sizeof(struct blk_zone
));
94 * blkdev_report_zones - Get zones information
95 * @bdev: Target block device
96 * @sector: Sector from which to report zones
97 * @zones: Array of zone structures where to return the zones information
98 * @nr_zones: Number of zone structures in the zone array
99 * @gfp_mask: Memory allocation flags (for bio_alloc)
102 * Get zone information starting from the zone containing @sector.
103 * The number of zone information reported may be less than the number
104 * requested by @nr_zones. The number of zones actually reported is
105 * returned in @nr_zones.
107 int blkdev_report_zones(struct block_device
*bdev
,
109 struct blk_zone
*zones
,
110 unsigned int *nr_zones
,
113 struct request_queue
*q
= bdev_get_queue(bdev
);
114 struct blk_zone_report_hdr
*hdr
;
115 unsigned int nrz
= *nr_zones
;
119 unsigned int nr_pages
;
122 unsigned int i
, n
, nz
;
130 if (!blk_queue_is_zoned(q
))
136 if (sector
> bdev
->bd_part
->nr_sects
) {
142 * The zone report has a header. So make room for it in the
143 * payload. Also make sure that the report fits in a single BIO
144 * that will not be split down the stack.
146 rep_bytes
= sizeof(struct blk_zone_report_hdr
) +
147 sizeof(struct blk_zone
) * nrz
;
148 rep_bytes
= (rep_bytes
+ PAGE_SIZE
- 1) & PAGE_MASK
;
149 if (rep_bytes
> (queue_max_sectors(q
) << 9))
150 rep_bytes
= queue_max_sectors(q
) << 9;
152 nr_pages
= min_t(unsigned int, BIO_MAX_PAGES
,
153 rep_bytes
>> PAGE_SHIFT
);
154 nr_pages
= min_t(unsigned int, nr_pages
,
155 queue_max_segments(q
));
157 bio
= bio_alloc(gfp_mask
, nr_pages
);
161 bio_set_dev(bio
, bdev
);
162 bio
->bi_iter
.bi_sector
= blk_zone_start(q
, sector
);
163 bio_set_op_attrs(bio
, REQ_OP_ZONE_REPORT
, 0);
165 for (i
= 0; i
< nr_pages
; i
++) {
166 page
= alloc_page(gfp_mask
);
171 if (!bio_add_page(bio
, page
, PAGE_SIZE
, 0)) {
180 ret
= submit_bio_wait(bio
);
185 * Process the report result: skip the header and go through the
186 * reported zones to fixup and fixup the zone information for
187 * partitions. At the same time, return the zone information into
193 bio_for_each_segment_all(bv
, bio
, i
) {
198 addr
= kmap_atomic(bv
->bv_page
);
200 /* Get header in the first page */
204 nr_rep
= hdr
->nr_zones
;
205 ofst
= sizeof(struct blk_zone_report_hdr
);
208 /* Fixup and report zones */
209 while (ofst
< bv
->bv_len
&&
210 n
< nr_rep
&& nz
< nrz
) {
211 if (blkdev_report_zone(bdev
, addr
+ ofst
, &zones
[nz
]))
213 ofst
+= sizeof(struct blk_zone
);
219 if (n
>= nr_rep
|| nz
>= nrz
)
226 bio_for_each_segment_all(bv
, bio
, i
)
227 __free_page(bv
->bv_page
);
232 EXPORT_SYMBOL_GPL(blkdev_report_zones
);
235 * blkdev_reset_zones - Reset zones write pointer
236 * @bdev: Target block device
237 * @sector: Start sector of the first zone to reset
238 * @nr_sectors: Number of sectors, at least the length of one zone
239 * @gfp_mask: Memory allocation flags (for bio_alloc)
242 * Reset the write pointer of the zones contained in the range
243 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
244 * is valid, but the specified range should not contain conventional zones.
246 int blkdev_reset_zones(struct block_device
*bdev
,
247 sector_t sector
, sector_t nr_sectors
,
250 struct request_queue
*q
= bdev_get_queue(bdev
);
251 sector_t zone_sectors
;
252 sector_t end_sector
= sector
+ nr_sectors
;
259 if (!blk_queue_is_zoned(q
))
262 if (end_sector
> bdev
->bd_part
->nr_sects
)
266 /* Check alignment (handle eventual smaller last zone) */
267 zone_sectors
= blk_queue_zone_sectors(q
);
268 if (sector
& (zone_sectors
- 1))
271 if ((nr_sectors
& (zone_sectors
- 1)) &&
272 end_sector
!= bdev
->bd_part
->nr_sects
)
275 while (sector
< end_sector
) {
277 bio
= bio_alloc(gfp_mask
, 0);
278 bio
->bi_iter
.bi_sector
= sector
;
279 bio_set_dev(bio
, bdev
);
280 bio_set_op_attrs(bio
, REQ_OP_ZONE_RESET
, 0);
282 ret
= submit_bio_wait(bio
);
288 sector
+= zone_sectors
;
290 /* This may take a while, so be nice to others */
297 EXPORT_SYMBOL_GPL(blkdev_reset_zones
);
300 * BLKREPORTZONE ioctl processing.
301 * Called from blkdev_ioctl.
303 int blkdev_report_zones_ioctl(struct block_device
*bdev
, fmode_t mode
,
304 unsigned int cmd
, unsigned long arg
)
306 void __user
*argp
= (void __user
*)arg
;
307 struct request_queue
*q
;
308 struct blk_zone_report rep
;
309 struct blk_zone
*zones
;
315 q
= bdev_get_queue(bdev
);
319 if (!blk_queue_is_zoned(q
))
322 if (!capable(CAP_SYS_ADMIN
))
325 if (copy_from_user(&rep
, argp
, sizeof(struct blk_zone_report
)))
331 if (rep
.nr_zones
> INT_MAX
/ sizeof(struct blk_zone
))
334 zones
= kvmalloc_array(rep
.nr_zones
, sizeof(struct blk_zone
),
335 GFP_KERNEL
| __GFP_ZERO
);
339 ret
= blkdev_report_zones(bdev
, rep
.sector
,
340 zones
, &rep
.nr_zones
,
345 if (copy_to_user(argp
, &rep
, sizeof(struct blk_zone_report
))) {
351 if (copy_to_user(argp
+ sizeof(struct blk_zone_report
), zones
,
352 sizeof(struct blk_zone
) * rep
.nr_zones
))
363 * BLKRESETZONE ioctl processing.
364 * Called from blkdev_ioctl.
366 int blkdev_reset_zones_ioctl(struct block_device
*bdev
, fmode_t mode
,
367 unsigned int cmd
, unsigned long arg
)
369 void __user
*argp
= (void __user
*)arg
;
370 struct request_queue
*q
;
371 struct blk_zone_range zrange
;
376 q
= bdev_get_queue(bdev
);
380 if (!blk_queue_is_zoned(q
))
383 if (!capable(CAP_SYS_ADMIN
))
386 if (!(mode
& FMODE_WRITE
))
389 if (copy_from_user(&zrange
, argp
, sizeof(struct blk_zone_range
)))
392 return blkdev_reset_zones(bdev
, zrange
.sector
, zrange
.nr_sectors
,