Linux 4.19-rc7
[linux-2.6/btrfs-unstable.git] / block / blk-zoned.c
blobc461cf63f1f40d4e2d3c2c80727411de66f8af06
1 /*
2 * Zoned block device handling
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
9 */
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/rbtree.h>
14 #include <linux/blkdev.h>
16 static inline sector_t blk_zone_start(struct request_queue *q,
17 sector_t sector)
19 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
21 return sector & ~zone_mask;
25 * Return true if a request is a write requests that needs zone write locking.
27 bool blk_req_needs_zone_write_lock(struct request *rq)
29 if (!rq->q->seq_zones_wlock)
30 return false;
32 if (blk_rq_is_passthrough(rq))
33 return false;
35 switch (req_op(rq)) {
36 case REQ_OP_WRITE_ZEROES:
37 case REQ_OP_WRITE_SAME:
38 case REQ_OP_WRITE:
39 return blk_rq_zone_is_seq(rq);
40 default:
41 return false;
44 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
46 void __blk_req_zone_write_lock(struct request *rq)
48 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
49 rq->q->seq_zones_wlock)))
50 return;
52 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
53 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
55 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
57 void __blk_req_zone_write_unlock(struct request *rq)
59 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
60 if (rq->q->seq_zones_wlock)
61 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
62 rq->q->seq_zones_wlock));
64 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
67 * Check that a zone report belongs to the partition.
68 * If yes, fix its start sector and write pointer, copy it in the
69 * zone information array and return true. Return false otherwise.
71 static bool blkdev_report_zone(struct block_device *bdev,
72 struct blk_zone *rep,
73 struct blk_zone *zone)
75 sector_t offset = get_start_sect(bdev);
77 if (rep->start < offset)
78 return false;
80 rep->start -= offset;
81 if (rep->start + rep->len > bdev->bd_part->nr_sects)
82 return false;
84 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
85 rep->wp = rep->start + rep->len;
86 else
87 rep->wp -= offset;
88 memcpy(zone, rep, sizeof(struct blk_zone));
90 return true;
93 /**
94 * blkdev_report_zones - Get zones information
95 * @bdev: Target block device
96 * @sector: Sector from which to report zones
97 * @zones: Array of zone structures where to return the zones information
98 * @nr_zones: Number of zone structures in the zone array
99 * @gfp_mask: Memory allocation flags (for bio_alloc)
101 * Description:
102 * Get zone information starting from the zone containing @sector.
103 * The number of zone information reported may be less than the number
104 * requested by @nr_zones. The number of zones actually reported is
105 * returned in @nr_zones.
107 int blkdev_report_zones(struct block_device *bdev,
108 sector_t sector,
109 struct blk_zone *zones,
110 unsigned int *nr_zones,
111 gfp_t gfp_mask)
113 struct request_queue *q = bdev_get_queue(bdev);
114 struct blk_zone_report_hdr *hdr;
115 unsigned int nrz = *nr_zones;
116 struct page *page;
117 unsigned int nr_rep;
118 size_t rep_bytes;
119 unsigned int nr_pages;
120 struct bio *bio;
121 struct bio_vec *bv;
122 unsigned int i, n, nz;
123 unsigned int ofst;
124 void *addr;
125 int ret;
127 if (!q)
128 return -ENXIO;
130 if (!blk_queue_is_zoned(q))
131 return -EOPNOTSUPP;
133 if (!nrz)
134 return 0;
136 if (sector > bdev->bd_part->nr_sects) {
137 *nr_zones = 0;
138 return 0;
142 * The zone report has a header. So make room for it in the
143 * payload. Also make sure that the report fits in a single BIO
144 * that will not be split down the stack.
146 rep_bytes = sizeof(struct blk_zone_report_hdr) +
147 sizeof(struct blk_zone) * nrz;
148 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
149 if (rep_bytes > (queue_max_sectors(q) << 9))
150 rep_bytes = queue_max_sectors(q) << 9;
152 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
153 rep_bytes >> PAGE_SHIFT);
154 nr_pages = min_t(unsigned int, nr_pages,
155 queue_max_segments(q));
157 bio = bio_alloc(gfp_mask, nr_pages);
158 if (!bio)
159 return -ENOMEM;
161 bio_set_dev(bio, bdev);
162 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
163 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
165 for (i = 0; i < nr_pages; i++) {
166 page = alloc_page(gfp_mask);
167 if (!page) {
168 ret = -ENOMEM;
169 goto out;
171 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
172 __free_page(page);
173 break;
177 if (i == 0)
178 ret = -ENOMEM;
179 else
180 ret = submit_bio_wait(bio);
181 if (ret)
182 goto out;
185 * Process the report result: skip the header and go through the
186 * reported zones to fixup and fixup the zone information for
187 * partitions. At the same time, return the zone information into
188 * the zone array.
190 n = 0;
191 nz = 0;
192 nr_rep = 0;
193 bio_for_each_segment_all(bv, bio, i) {
195 if (!bv->bv_page)
196 break;
198 addr = kmap_atomic(bv->bv_page);
200 /* Get header in the first page */
201 ofst = 0;
202 if (!nr_rep) {
203 hdr = addr;
204 nr_rep = hdr->nr_zones;
205 ofst = sizeof(struct blk_zone_report_hdr);
208 /* Fixup and report zones */
209 while (ofst < bv->bv_len &&
210 n < nr_rep && nz < nrz) {
211 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
212 nz++;
213 ofst += sizeof(struct blk_zone);
214 n++;
217 kunmap_atomic(addr);
219 if (n >= nr_rep || nz >= nrz)
220 break;
224 *nr_zones = nz;
225 out:
226 bio_for_each_segment_all(bv, bio, i)
227 __free_page(bv->bv_page);
228 bio_put(bio);
230 return ret;
232 EXPORT_SYMBOL_GPL(blkdev_report_zones);
235 * blkdev_reset_zones - Reset zones write pointer
236 * @bdev: Target block device
237 * @sector: Start sector of the first zone to reset
238 * @nr_sectors: Number of sectors, at least the length of one zone
239 * @gfp_mask: Memory allocation flags (for bio_alloc)
241 * Description:
242 * Reset the write pointer of the zones contained in the range
243 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
244 * is valid, but the specified range should not contain conventional zones.
246 int blkdev_reset_zones(struct block_device *bdev,
247 sector_t sector, sector_t nr_sectors,
248 gfp_t gfp_mask)
250 struct request_queue *q = bdev_get_queue(bdev);
251 sector_t zone_sectors;
252 sector_t end_sector = sector + nr_sectors;
253 struct bio *bio;
254 int ret;
256 if (!q)
257 return -ENXIO;
259 if (!blk_queue_is_zoned(q))
260 return -EOPNOTSUPP;
262 if (end_sector > bdev->bd_part->nr_sects)
263 /* Out of range */
264 return -EINVAL;
266 /* Check alignment (handle eventual smaller last zone) */
267 zone_sectors = blk_queue_zone_sectors(q);
268 if (sector & (zone_sectors - 1))
269 return -EINVAL;
271 if ((nr_sectors & (zone_sectors - 1)) &&
272 end_sector != bdev->bd_part->nr_sects)
273 return -EINVAL;
275 while (sector < end_sector) {
277 bio = bio_alloc(gfp_mask, 0);
278 bio->bi_iter.bi_sector = sector;
279 bio_set_dev(bio, bdev);
280 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
282 ret = submit_bio_wait(bio);
283 bio_put(bio);
285 if (ret)
286 return ret;
288 sector += zone_sectors;
290 /* This may take a while, so be nice to others */
291 cond_resched();
295 return 0;
297 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
300 * BLKREPORTZONE ioctl processing.
301 * Called from blkdev_ioctl.
303 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
304 unsigned int cmd, unsigned long arg)
306 void __user *argp = (void __user *)arg;
307 struct request_queue *q;
308 struct blk_zone_report rep;
309 struct blk_zone *zones;
310 int ret;
312 if (!argp)
313 return -EINVAL;
315 q = bdev_get_queue(bdev);
316 if (!q)
317 return -ENXIO;
319 if (!blk_queue_is_zoned(q))
320 return -ENOTTY;
322 if (!capable(CAP_SYS_ADMIN))
323 return -EACCES;
325 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
326 return -EFAULT;
328 if (!rep.nr_zones)
329 return -EINVAL;
331 if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
332 return -ERANGE;
334 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
335 GFP_KERNEL | __GFP_ZERO);
336 if (!zones)
337 return -ENOMEM;
339 ret = blkdev_report_zones(bdev, rep.sector,
340 zones, &rep.nr_zones,
341 GFP_KERNEL);
342 if (ret)
343 goto out;
345 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
346 ret = -EFAULT;
347 goto out;
350 if (rep.nr_zones) {
351 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
352 sizeof(struct blk_zone) * rep.nr_zones))
353 ret = -EFAULT;
356 out:
357 kvfree(zones);
359 return ret;
363 * BLKRESETZONE ioctl processing.
364 * Called from blkdev_ioctl.
366 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
367 unsigned int cmd, unsigned long arg)
369 void __user *argp = (void __user *)arg;
370 struct request_queue *q;
371 struct blk_zone_range zrange;
373 if (!argp)
374 return -EINVAL;
376 q = bdev_get_queue(bdev);
377 if (!q)
378 return -ENXIO;
380 if (!blk_queue_is_zoned(q))
381 return -ENOTTY;
383 if (!capable(CAP_SYS_ADMIN))
384 return -EACCES;
386 if (!(mode & FMODE_WRITE))
387 return -EBADF;
389 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
390 return -EFAULT;
392 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
393 GFP_KERNEL);