1 // SPDX-License-Identifier: GPL-2.0
3 * Code extracted from drivers/block/genhd.c
4 * Copyright (C) 1991-1998 Linus Torvalds
5 * Re-organised Feb 1998 Russell King
7 * We now have independent partition support from the
8 * block drivers, which allows all the partition code to
9 * be grouped in one location, and it to be mostly self
13 #include <linux/init.h>
14 #include <linux/module.h>
16 #include <linux/slab.h>
17 #include <linux/kmod.h>
18 #include <linux/ctype.h>
19 #include <linux/genhd.h>
20 #include <linux/blktrace_api.h>
22 #include "partitions/check.h"
24 #ifdef CONFIG_BLK_DEV_MD
25 extern void md_autodetect_dev(dev_t dev
);
29 * disk_name() is used by partition check code and the genhd driver.
30 * It formats the devicename of the indicated disk into
31 * the supplied buffer (of size at least 32), and returns
32 * a pointer to that same buffer (for convenience).
35 char *disk_name(struct gendisk
*hd
, int partno
, char *buf
)
38 snprintf(buf
, BDEVNAME_SIZE
, "%s", hd
->disk_name
);
39 else if (isdigit(hd
->disk_name
[strlen(hd
->disk_name
)-1]))
40 snprintf(buf
, BDEVNAME_SIZE
, "%sp%d", hd
->disk_name
, partno
);
42 snprintf(buf
, BDEVNAME_SIZE
, "%s%d", hd
->disk_name
, partno
);
47 const char *bdevname(struct block_device
*bdev
, char *buf
)
49 return disk_name(bdev
->bd_disk
, bdev
->bd_part
->partno
, buf
);
52 EXPORT_SYMBOL(bdevname
);
54 const char *bio_devname(struct bio
*bio
, char *buf
)
56 return disk_name(bio
->bi_disk
, bio
->bi_partno
, buf
);
58 EXPORT_SYMBOL(bio_devname
);
61 * There's very little reason to use this, you should really
62 * have a struct block_device just about everywhere and use
65 const char *__bdevname(dev_t dev
, char *buffer
)
67 scnprintf(buffer
, BDEVNAME_SIZE
, "unknown-block(%u,%u)",
68 MAJOR(dev
), MINOR(dev
));
72 EXPORT_SYMBOL(__bdevname
);
74 static ssize_t
part_partition_show(struct device
*dev
,
75 struct device_attribute
*attr
, char *buf
)
77 struct hd_struct
*p
= dev_to_part(dev
);
79 return sprintf(buf
, "%d\n", p
->partno
);
82 static ssize_t
part_start_show(struct device
*dev
,
83 struct device_attribute
*attr
, char *buf
)
85 struct hd_struct
*p
= dev_to_part(dev
);
87 return sprintf(buf
, "%llu\n",(unsigned long long)p
->start_sect
);
90 ssize_t
part_size_show(struct device
*dev
,
91 struct device_attribute
*attr
, char *buf
)
93 struct hd_struct
*p
= dev_to_part(dev
);
94 return sprintf(buf
, "%llu\n",(unsigned long long)part_nr_sects_read(p
));
97 static ssize_t
part_ro_show(struct device
*dev
,
98 struct device_attribute
*attr
, char *buf
)
100 struct hd_struct
*p
= dev_to_part(dev
);
101 return sprintf(buf
, "%d\n", p
->policy
? 1 : 0);
104 static ssize_t
part_alignment_offset_show(struct device
*dev
,
105 struct device_attribute
*attr
, char *buf
)
107 struct hd_struct
*p
= dev_to_part(dev
);
108 return sprintf(buf
, "%llu\n", (unsigned long long)p
->alignment_offset
);
111 static ssize_t
part_discard_alignment_show(struct device
*dev
,
112 struct device_attribute
*attr
, char *buf
)
114 struct hd_struct
*p
= dev_to_part(dev
);
115 return sprintf(buf
, "%u\n", p
->discard_alignment
);
118 ssize_t
part_stat_show(struct device
*dev
,
119 struct device_attribute
*attr
, char *buf
)
121 struct hd_struct
*p
= dev_to_part(dev
);
122 struct request_queue
*q
= part_to_disk(p
)->queue
;
123 unsigned int inflight
;
125 inflight
= part_in_flight(q
, p
);
127 "%8lu %8lu %8llu %8u "
128 "%8lu %8lu %8llu %8u "
130 "%8lu %8lu %8llu %8u "
133 part_stat_read(p
, ios
[STAT_READ
]),
134 part_stat_read(p
, merges
[STAT_READ
]),
135 (unsigned long long)part_stat_read(p
, sectors
[STAT_READ
]),
136 (unsigned int)part_stat_read_msecs(p
, STAT_READ
),
137 part_stat_read(p
, ios
[STAT_WRITE
]),
138 part_stat_read(p
, merges
[STAT_WRITE
]),
139 (unsigned long long)part_stat_read(p
, sectors
[STAT_WRITE
]),
140 (unsigned int)part_stat_read_msecs(p
, STAT_WRITE
),
142 jiffies_to_msecs(part_stat_read(p
, io_ticks
)),
143 jiffies_to_msecs(part_stat_read(p
, time_in_queue
)),
144 part_stat_read(p
, ios
[STAT_DISCARD
]),
145 part_stat_read(p
, merges
[STAT_DISCARD
]),
146 (unsigned long long)part_stat_read(p
, sectors
[STAT_DISCARD
]),
147 (unsigned int)part_stat_read_msecs(p
, STAT_DISCARD
),
148 part_stat_read(p
, ios
[STAT_FLUSH
]),
149 (unsigned int)part_stat_read_msecs(p
, STAT_FLUSH
));
152 ssize_t
part_inflight_show(struct device
*dev
, struct device_attribute
*attr
,
155 struct hd_struct
*p
= dev_to_part(dev
);
156 struct request_queue
*q
= part_to_disk(p
)->queue
;
157 unsigned int inflight
[2];
159 part_in_flight_rw(q
, p
, inflight
);
160 return sprintf(buf
, "%8u %8u\n", inflight
[0], inflight
[1]);
163 #ifdef CONFIG_FAIL_MAKE_REQUEST
164 ssize_t
part_fail_show(struct device
*dev
,
165 struct device_attribute
*attr
, char *buf
)
167 struct hd_struct
*p
= dev_to_part(dev
);
169 return sprintf(buf
, "%d\n", p
->make_it_fail
);
172 ssize_t
part_fail_store(struct device
*dev
,
173 struct device_attribute
*attr
,
174 const char *buf
, size_t count
)
176 struct hd_struct
*p
= dev_to_part(dev
);
179 if (count
> 0 && sscanf(buf
, "%d", &i
) > 0)
180 p
->make_it_fail
= (i
== 0) ? 0 : 1;
186 static DEVICE_ATTR(partition
, 0444, part_partition_show
, NULL
);
187 static DEVICE_ATTR(start
, 0444, part_start_show
, NULL
);
188 static DEVICE_ATTR(size
, 0444, part_size_show
, NULL
);
189 static DEVICE_ATTR(ro
, 0444, part_ro_show
, NULL
);
190 static DEVICE_ATTR(alignment_offset
, 0444, part_alignment_offset_show
, NULL
);
191 static DEVICE_ATTR(discard_alignment
, 0444, part_discard_alignment_show
, NULL
);
192 static DEVICE_ATTR(stat
, 0444, part_stat_show
, NULL
);
193 static DEVICE_ATTR(inflight
, 0444, part_inflight_show
, NULL
);
194 #ifdef CONFIG_FAIL_MAKE_REQUEST
195 static struct device_attribute dev_attr_fail
=
196 __ATTR(make
-it
-fail
, 0644, part_fail_show
, part_fail_store
);
199 static struct attribute
*part_attrs
[] = {
200 &dev_attr_partition
.attr
,
201 &dev_attr_start
.attr
,
204 &dev_attr_alignment_offset
.attr
,
205 &dev_attr_discard_alignment
.attr
,
207 &dev_attr_inflight
.attr
,
208 #ifdef CONFIG_FAIL_MAKE_REQUEST
214 static struct attribute_group part_attr_group
= {
218 static const struct attribute_group
*part_attr_groups
[] = {
220 #ifdef CONFIG_BLK_DEV_IO_TRACE
221 &blk_trace_attr_group
,
226 static void part_release(struct device
*dev
)
228 struct hd_struct
*p
= dev_to_part(dev
);
229 blk_free_devt(dev
->devt
);
234 static int part_uevent(struct device
*dev
, struct kobj_uevent_env
*env
)
236 struct hd_struct
*part
= dev_to_part(dev
);
238 add_uevent_var(env
, "PARTN=%u", part
->partno
);
239 if (part
->info
&& part
->info
->volname
[0])
240 add_uevent_var(env
, "PARTNAME=%s", part
->info
->volname
);
244 struct device_type part_type
= {
246 .groups
= part_attr_groups
,
247 .release
= part_release
,
248 .uevent
= part_uevent
,
251 static void delete_partition_work_fn(struct work_struct
*work
)
253 struct hd_struct
*part
= container_of(to_rcu_work(work
), struct hd_struct
,
256 part
->start_sect
= 0;
258 part_stat_set_all(part
, 0);
259 put_device(part_to_dev(part
));
262 void __delete_partition(struct percpu_ref
*ref
)
264 struct hd_struct
*part
= container_of(ref
, struct hd_struct
, ref
);
265 INIT_RCU_WORK(&part
->rcu_work
, delete_partition_work_fn
);
266 queue_rcu_work(system_wq
, &part
->rcu_work
);
270 * Must be called either with bd_mutex held, before a disk can be opened or
271 * after all disk users are gone.
273 void delete_partition(struct gendisk
*disk
, int partno
)
275 struct disk_part_tbl
*ptbl
=
276 rcu_dereference_protected(disk
->part_tbl
, 1);
277 struct hd_struct
*part
;
279 if (partno
>= ptbl
->len
)
282 part
= rcu_dereference_protected(ptbl
->part
[partno
], 1);
286 rcu_assign_pointer(ptbl
->part
[partno
], NULL
);
287 rcu_assign_pointer(ptbl
->last_lookup
, NULL
);
288 kobject_put(part
->holder_dir
);
289 device_del(part_to_dev(part
));
292 * Remove gendisk pointer from idr so that it cannot be looked up
293 * while RCU period before freeing gendisk is running to prevent
294 * use-after-free issues. Note that the device number stays
295 * "in-use" until we really free the gendisk.
297 blk_invalidate_devt(part_devt(part
));
298 hd_struct_kill(part
);
301 static ssize_t
whole_disk_show(struct device
*dev
,
302 struct device_attribute
*attr
, char *buf
)
306 static DEVICE_ATTR(whole_disk
, 0444, whole_disk_show
, NULL
);
309 * Must be called either with bd_mutex held, before a disk can be opened or
310 * after all disk users are gone.
312 struct hd_struct
*add_partition(struct gendisk
*disk
, int partno
,
313 sector_t start
, sector_t len
, int flags
,
314 struct partition_meta_info
*info
)
317 dev_t devt
= MKDEV(0, 0);
318 struct device
*ddev
= disk_to_dev(disk
);
320 struct disk_part_tbl
*ptbl
;
325 * Partitions are not supported on zoned block devices that are used as
328 switch (disk
->queue
->limits
.zoned
) {
330 pr_warn("%s: partitions not supported on host managed zoned block device\n",
332 return ERR_PTR(-ENXIO
);
334 pr_info("%s: disabling host aware zoned block device support due to partitions\n",
336 disk
->queue
->limits
.zoned
= BLK_ZONED_NONE
;
342 err
= disk_expand_part_tbl(disk
, partno
);
345 ptbl
= rcu_dereference_protected(disk
->part_tbl
, 1);
347 if (ptbl
->part
[partno
])
348 return ERR_PTR(-EBUSY
);
350 p
= kzalloc(sizeof(*p
), GFP_KERNEL
);
352 return ERR_PTR(-EBUSY
);
354 if (!init_part_stats(p
)) {
359 seqcount_init(&p
->nr_sects_seq
);
360 pdev
= part_to_dev(p
);
362 p
->start_sect
= start
;
363 p
->alignment_offset
=
364 queue_limit_alignment_offset(&disk
->queue
->limits
, start
);
365 p
->discard_alignment
=
366 queue_limit_discard_alignment(&disk
->queue
->limits
, start
);
369 p
->policy
= get_disk_ro(disk
);
372 struct partition_meta_info
*pinfo
= alloc_part_info(disk
);
377 memcpy(pinfo
, info
, sizeof(*info
));
381 dname
= dev_name(ddev
);
382 if (isdigit(dname
[strlen(dname
) - 1]))
383 dev_set_name(pdev
, "%sp%d", dname
, partno
);
385 dev_set_name(pdev
, "%s%d", dname
, partno
);
387 device_initialize(pdev
);
388 pdev
->class = &block_class
;
389 pdev
->type
= &part_type
;
392 err
= blk_alloc_devt(p
, &devt
);
397 /* delay uevent until 'holders' subdir is created */
398 dev_set_uevent_suppress(pdev
, 1);
399 err
= device_add(pdev
);
404 p
->holder_dir
= kobject_create_and_add("holders", &pdev
->kobj
);
408 dev_set_uevent_suppress(pdev
, 0);
409 if (flags
& ADDPART_FLAG_WHOLEDISK
) {
410 err
= device_create_file(pdev
, &dev_attr_whole_disk
);
415 err
= hd_ref_init(p
);
417 if (flags
& ADDPART_FLAG_WHOLEDISK
)
418 goto out_remove_file
;
422 /* everything is up and running, commence */
423 rcu_assign_pointer(ptbl
->part
[partno
], p
);
425 /* suppress uevent if the disk suppresses it */
426 if (!dev_get_uevent_suppress(ddev
))
427 kobject_uevent(&pdev
->kobj
, KOBJ_ADD
);
438 device_remove_file(pdev
, &dev_attr_whole_disk
);
440 kobject_put(p
->holder_dir
);
447 static bool disk_unlock_native_capacity(struct gendisk
*disk
)
449 const struct block_device_operations
*bdops
= disk
->fops
;
451 if (bdops
->unlock_native_capacity
&&
452 !(disk
->flags
& GENHD_FL_NATIVE_CAPACITY
)) {
453 printk(KERN_CONT
"enabling native capacity\n");
454 bdops
->unlock_native_capacity(disk
);
455 disk
->flags
|= GENHD_FL_NATIVE_CAPACITY
;
458 printk(KERN_CONT
"truncated\n");
463 int blk_drop_partitions(struct gendisk
*disk
, struct block_device
*bdev
)
465 struct disk_part_iter piter
;
466 struct hd_struct
*part
;
469 if (!disk_part_scan_enabled(disk
))
471 if (bdev
->bd_part_count
|| bdev
->bd_super
)
473 res
= invalidate_partition(disk
, 0);
477 disk_part_iter_init(&piter
, disk
, DISK_PITER_INCL_EMPTY
);
478 while ((part
= disk_part_iter_next(&piter
)))
479 delete_partition(disk
, part
->partno
);
480 disk_part_iter_exit(&piter
);
485 static bool blk_add_partition(struct gendisk
*disk
, struct block_device
*bdev
,
486 struct parsed_partitions
*state
, int p
)
488 sector_t size
= state
->parts
[p
].size
;
489 sector_t from
= state
->parts
[p
].from
;
490 struct hd_struct
*part
;
495 if (from
>= get_capacity(disk
)) {
497 "%s: p%d start %llu is beyond EOD, ",
498 disk
->disk_name
, p
, (unsigned long long) from
);
499 if (disk_unlock_native_capacity(disk
))
504 if (from
+ size
> get_capacity(disk
)) {
506 "%s: p%d size %llu extends beyond EOD, ",
507 disk
->disk_name
, p
, (unsigned long long) size
);
509 if (disk_unlock_native_capacity(disk
))
513 * We can not ignore partitions of broken tables created by for
514 * example camera firmware, but we limit them to the end of the
515 * disk to avoid creating invalid block devices.
517 size
= get_capacity(disk
) - from
;
520 part
= add_partition(disk
, p
, from
, size
, state
->parts
[p
].flags
,
521 &state
->parts
[p
].info
);
522 if (IS_ERR(part
) && PTR_ERR(part
) != -ENXIO
) {
523 printk(KERN_ERR
" %s: p%d could not be added: %ld\n",
524 disk
->disk_name
, p
, -PTR_ERR(part
));
528 #ifdef CONFIG_BLK_DEV_MD
529 if (state
->parts
[p
].flags
& ADDPART_FLAG_RAID
)
530 md_autodetect_dev(part_to_dev(part
)->devt
);
535 int blk_add_partitions(struct gendisk
*disk
, struct block_device
*bdev
)
537 struct parsed_partitions
*state
;
538 int ret
= -EAGAIN
, p
, highest
;
540 if (!disk_part_scan_enabled(disk
))
543 state
= check_partition(disk
, bdev
);
548 * I/O error reading the partition table. If we tried to read
549 * beyond EOD, retry after unlocking the native capacity.
551 if (PTR_ERR(state
) == -ENOSPC
) {
552 printk(KERN_WARNING
"%s: partition table beyond EOD, ",
554 if (disk_unlock_native_capacity(disk
))
561 * Partitions are not supported on host managed zoned block devices.
563 if (disk
->queue
->limits
.zoned
== BLK_ZONED_HM
) {
564 pr_warn("%s: ignoring partition table on host managed zoned block device\n",
571 * If we read beyond EOD, try unlocking native capacity even if the
572 * partition table was successfully read as we could be missing some
575 if (state
->access_beyond_eod
) {
577 "%s: partition table partially beyond EOD, ",
579 if (disk_unlock_native_capacity(disk
))
583 /* tell userspace that the media / partition table may have changed */
584 kobject_uevent(&disk_to_dev(disk
)->kobj
, KOBJ_CHANGE
);
587 * Detect the highest partition number and preallocate disk->part_tbl.
588 * This is an optimization and not strictly necessary.
590 for (p
= 1, highest
= 0; p
< state
->limit
; p
++)
591 if (state
->parts
[p
].size
)
593 disk_expand_part_tbl(disk
, highest
);
595 for (p
= 1; p
< state
->limit
; p
++)
596 if (!blk_add_partition(disk
, bdev
, state
, p
))
601 free_partitions(state
);
605 unsigned char *read_dev_sector(struct block_device
*bdev
, sector_t n
, Sector
*p
)
607 struct address_space
*mapping
= bdev
->bd_inode
->i_mapping
;
610 page
= read_mapping_page(mapping
, (pgoff_t
)(n
>> (PAGE_SHIFT
-9)), NULL
);
615 return (unsigned char *)page_address(page
) + ((n
& ((1 << (PAGE_SHIFT
- 9)) - 1)) << 9);
623 EXPORT_SYMBOL(read_dev_sector
);