1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 1991-1998 Linus Torvalds
4 * Re-organised Feb 1998 Russell King
5 * Copyright (C) 2020 Christoph Hellwig
8 #include <linux/slab.h>
9 #include <linux/ctype.h>
10 #include <linux/genhd.h>
11 #include <linux/vmalloc.h>
12 #include <linux/blktrace_api.h>
13 #include <linux/raid/detect.h>
16 static int (*check_part
[])(struct parsed_partitions
*) = {
18 * Probe partition formats with tables at disk address 0
19 * that also have an ADFS boot block at 0xdc0.
21 #ifdef CONFIG_ACORN_PARTITION_ICS
24 #ifdef CONFIG_ACORN_PARTITION_POWERTEC
25 adfspart_check_POWERTEC
,
27 #ifdef CONFIG_ACORN_PARTITION_EESOX
32 * Now move on to formats that only have partition info at
33 * disk address 0xdc0. Since these may also have stale
34 * PC/BIOS partition tables, they need to come before
37 #ifdef CONFIG_ACORN_PARTITION_CUMANA
38 adfspart_check_CUMANA
,
40 #ifdef CONFIG_ACORN_PARTITION_ADFS
44 #ifdef CONFIG_CMDLINE_PARTITION
47 #ifdef CONFIG_EFI_PARTITION
48 efi_partition
, /* this must come before msdos */
50 #ifdef CONFIG_SGI_PARTITION
53 #ifdef CONFIG_LDM_PARTITION
54 ldm_partition
, /* this must come before msdos */
56 #ifdef CONFIG_MSDOS_PARTITION
59 #ifdef CONFIG_OSF_PARTITION
62 #ifdef CONFIG_SUN_PARTITION
65 #ifdef CONFIG_AMIGA_PARTITION
68 #ifdef CONFIG_ATARI_PARTITION
71 #ifdef CONFIG_MAC_PARTITION
74 #ifdef CONFIG_ULTRIX_PARTITION
77 #ifdef CONFIG_IBM_PARTITION
80 #ifdef CONFIG_KARMA_PARTITION
83 #ifdef CONFIG_SYSV68_PARTITION
89 static void bdev_set_nr_sectors(struct block_device
*bdev
, sector_t sectors
)
91 spin_lock(&bdev
->bd_size_lock
);
92 i_size_write(bdev
->bd_inode
, (loff_t
)sectors
<< SECTOR_SHIFT
);
93 spin_unlock(&bdev
->bd_size_lock
);
96 static struct parsed_partitions
*allocate_partitions(struct gendisk
*hd
)
98 struct parsed_partitions
*state
;
101 state
= kzalloc(sizeof(*state
), GFP_KERNEL
);
105 nr
= disk_max_parts(hd
);
106 state
->parts
= vzalloc(array_size(nr
, sizeof(state
->parts
[0])));
117 static void free_partitions(struct parsed_partitions
*state
)
123 static struct parsed_partitions
*check_partition(struct gendisk
*hd
,
124 struct block_device
*bdev
)
126 struct parsed_partitions
*state
;
129 state
= allocate_partitions(hd
);
132 state
->pp_buf
= (char *)__get_free_page(GFP_KERNEL
);
133 if (!state
->pp_buf
) {
134 free_partitions(state
);
137 state
->pp_buf
[0] = '\0';
140 disk_name(hd
, 0, state
->name
);
141 snprintf(state
->pp_buf
, PAGE_SIZE
, " %s:", state
->name
);
142 if (isdigit(state
->name
[strlen(state
->name
)-1]))
143 sprintf(state
->name
, "p");
146 while (!res
&& check_part
[i
]) {
147 memset(state
->parts
, 0, state
->limit
* sizeof(state
->parts
[0]));
148 res
= check_part
[i
++](state
);
151 * We have hit an I/O error which we don't report now.
152 * But record it, and let the others do their job.
160 printk(KERN_INFO
"%s", state
->pp_buf
);
162 free_page((unsigned long)state
->pp_buf
);
165 if (state
->access_beyond_eod
)
168 * The partition is unrecognized. So report I/O errors if there were any
173 strlcat(state
->pp_buf
,
174 " unable to read partition table\n", PAGE_SIZE
);
175 printk(KERN_INFO
"%s", state
->pp_buf
);
178 free_page((unsigned long)state
->pp_buf
);
179 free_partitions(state
);
183 static ssize_t
part_partition_show(struct device
*dev
,
184 struct device_attribute
*attr
, char *buf
)
186 return sprintf(buf
, "%d\n", dev_to_bdev(dev
)->bd_partno
);
189 static ssize_t
part_start_show(struct device
*dev
,
190 struct device_attribute
*attr
, char *buf
)
192 return sprintf(buf
, "%llu\n", dev_to_bdev(dev
)->bd_start_sect
);
195 static ssize_t
part_ro_show(struct device
*dev
,
196 struct device_attribute
*attr
, char *buf
)
198 return sprintf(buf
, "%d\n", dev_to_bdev(dev
)->bd_read_only
);
201 static ssize_t
part_alignment_offset_show(struct device
*dev
,
202 struct device_attribute
*attr
, char *buf
)
204 struct block_device
*bdev
= dev_to_bdev(dev
);
206 return sprintf(buf
, "%u\n",
207 queue_limit_alignment_offset(&bdev
->bd_disk
->queue
->limits
,
208 bdev
->bd_start_sect
));
211 static ssize_t
part_discard_alignment_show(struct device
*dev
,
212 struct device_attribute
*attr
, char *buf
)
214 struct block_device
*bdev
= dev_to_bdev(dev
);
216 return sprintf(buf
, "%u\n",
217 queue_limit_discard_alignment(&bdev
->bd_disk
->queue
->limits
,
218 bdev
->bd_start_sect
));
221 static DEVICE_ATTR(partition
, 0444, part_partition_show
, NULL
);
222 static DEVICE_ATTR(start
, 0444, part_start_show
, NULL
);
223 static DEVICE_ATTR(size
, 0444, part_size_show
, NULL
);
224 static DEVICE_ATTR(ro
, 0444, part_ro_show
, NULL
);
225 static DEVICE_ATTR(alignment_offset
, 0444, part_alignment_offset_show
, NULL
);
226 static DEVICE_ATTR(discard_alignment
, 0444, part_discard_alignment_show
, NULL
);
227 static DEVICE_ATTR(stat
, 0444, part_stat_show
, NULL
);
228 static DEVICE_ATTR(inflight
, 0444, part_inflight_show
, NULL
);
229 #ifdef CONFIG_FAIL_MAKE_REQUEST
230 static struct device_attribute dev_attr_fail
=
231 __ATTR(make
-it
-fail
, 0644, part_fail_show
, part_fail_store
);
234 static struct attribute
*part_attrs
[] = {
235 &dev_attr_partition
.attr
,
236 &dev_attr_start
.attr
,
239 &dev_attr_alignment_offset
.attr
,
240 &dev_attr_discard_alignment
.attr
,
242 &dev_attr_inflight
.attr
,
243 #ifdef CONFIG_FAIL_MAKE_REQUEST
249 static struct attribute_group part_attr_group
= {
253 static const struct attribute_group
*part_attr_groups
[] = {
255 #ifdef CONFIG_BLK_DEV_IO_TRACE
256 &blk_trace_attr_group
,
261 static void part_release(struct device
*dev
)
263 blk_free_devt(dev
->devt
);
264 bdput(dev_to_bdev(dev
));
267 static int part_uevent(struct device
*dev
, struct kobj_uevent_env
*env
)
269 struct block_device
*part
= dev_to_bdev(dev
);
271 add_uevent_var(env
, "PARTN=%u", part
->bd_partno
);
272 if (part
->bd_meta_info
&& part
->bd_meta_info
->volname
[0])
273 add_uevent_var(env
, "PARTNAME=%s", part
->bd_meta_info
->volname
);
277 struct device_type part_type
= {
279 .groups
= part_attr_groups
,
280 .release
= part_release
,
281 .uevent
= part_uevent
,
285 * Must be called either with bd_mutex held, before a disk can be opened or
286 * after all disk users are gone.
288 void delete_partition(struct block_device
*part
)
290 struct gendisk
*disk
= part
->bd_disk
;
291 struct disk_part_tbl
*ptbl
=
292 rcu_dereference_protected(disk
->part_tbl
, 1);
294 rcu_assign_pointer(ptbl
->part
[part
->bd_partno
], NULL
);
295 rcu_assign_pointer(ptbl
->last_lookup
, NULL
);
297 kobject_put(part
->bd_holder_dir
);
298 device_del(&part
->bd_device
);
301 * Remove the block device from the inode hash, so that it cannot be
302 * looked up any more even when openers still hold references.
304 remove_inode_hash(part
->bd_inode
);
306 put_device(&part
->bd_device
);
309 static ssize_t
whole_disk_show(struct device
*dev
,
310 struct device_attribute
*attr
, char *buf
)
314 static DEVICE_ATTR(whole_disk
, 0444, whole_disk_show
, NULL
);
317 * Must be called either with bd_mutex held, before a disk can be opened or
318 * after all disk users are gone.
320 static struct block_device
*add_partition(struct gendisk
*disk
, int partno
,
321 sector_t start
, sector_t len
, int flags
,
322 struct partition_meta_info
*info
)
324 dev_t devt
= MKDEV(0, 0);
325 struct device
*ddev
= disk_to_dev(disk
);
327 struct block_device
*bdev
;
328 struct disk_part_tbl
*ptbl
;
333 * Partitions are not supported on zoned block devices that are used as
336 switch (disk
->queue
->limits
.zoned
) {
338 pr_warn("%s: partitions not supported on host managed zoned block device\n",
340 return ERR_PTR(-ENXIO
);
342 pr_info("%s: disabling host aware zoned block device support due to partitions\n",
344 disk
->queue
->limits
.zoned
= BLK_ZONED_NONE
;
350 err
= disk_expand_part_tbl(disk
, partno
);
353 ptbl
= rcu_dereference_protected(disk
->part_tbl
, 1);
355 if (ptbl
->part
[partno
])
356 return ERR_PTR(-EBUSY
);
358 bdev
= bdev_alloc(disk
, partno
);
360 return ERR_PTR(-ENOMEM
);
362 bdev
->bd_start_sect
= start
;
363 bdev_set_nr_sectors(bdev
, len
);
364 bdev
->bd_read_only
= get_disk_ro(disk
);
368 bdev
->bd_meta_info
= kmemdup(info
, sizeof(*info
), GFP_KERNEL
);
369 if (!bdev
->bd_meta_info
)
373 pdev
= &bdev
->bd_device
;
374 dname
= dev_name(ddev
);
375 if (isdigit(dname
[strlen(dname
) - 1]))
376 dev_set_name(pdev
, "%sp%d", dname
, partno
);
378 dev_set_name(pdev
, "%s%d", dname
, partno
);
380 device_initialize(pdev
);
381 pdev
->class = &block_class
;
382 pdev
->type
= &part_type
;
385 err
= blk_alloc_devt(bdev
, &devt
);
390 /* delay uevent until 'holders' subdir is created */
391 dev_set_uevent_suppress(pdev
, 1);
392 err
= device_add(pdev
);
397 bdev
->bd_holder_dir
= kobject_create_and_add("holders", &pdev
->kobj
);
398 if (!bdev
->bd_holder_dir
)
401 dev_set_uevent_suppress(pdev
, 0);
402 if (flags
& ADDPART_FLAG_WHOLEDISK
) {
403 err
= device_create_file(pdev
, &dev_attr_whole_disk
);
408 /* everything is up and running, commence */
409 bdev_add(bdev
, devt
);
410 rcu_assign_pointer(ptbl
->part
[partno
], bdev
);
412 /* suppress uevent if the disk suppresses it */
413 if (!dev_get_uevent_suppress(ddev
))
414 kobject_uevent(&pdev
->kobj
, KOBJ_ADD
);
421 kobject_put(bdev
->bd_holder_dir
);
428 static bool partition_overlaps(struct gendisk
*disk
, sector_t start
,
429 sector_t length
, int skip_partno
)
431 struct disk_part_iter piter
;
432 struct block_device
*part
;
433 bool overlap
= false;
435 disk_part_iter_init(&piter
, disk
, DISK_PITER_INCL_EMPTY
);
436 while ((part
= disk_part_iter_next(&piter
))) {
437 if (part
->bd_partno
== skip_partno
||
438 start
>= part
->bd_start_sect
+ bdev_nr_sectors(part
) ||
439 start
+ length
<= part
->bd_start_sect
)
445 disk_part_iter_exit(&piter
);
449 int bdev_add_partition(struct block_device
*bdev
, int partno
,
450 sector_t start
, sector_t length
)
452 struct block_device
*part
;
454 mutex_lock(&bdev
->bd_mutex
);
455 if (partition_overlaps(bdev
->bd_disk
, start
, length
, -1)) {
456 mutex_unlock(&bdev
->bd_mutex
);
460 part
= add_partition(bdev
->bd_disk
, partno
, start
, length
,
461 ADDPART_FLAG_NONE
, NULL
);
462 mutex_unlock(&bdev
->bd_mutex
);
463 return PTR_ERR_OR_ZERO(part
);
466 int bdev_del_partition(struct block_device
*bdev
, int partno
)
468 struct block_device
*part
;
471 part
= bdget_disk(bdev
->bd_disk
, partno
);
475 mutex_lock(&part
->bd_mutex
);
476 mutex_lock_nested(&bdev
->bd_mutex
, 1);
479 if (part
->bd_openers
)
483 invalidate_bdev(part
);
485 delete_partition(part
);
488 mutex_unlock(&bdev
->bd_mutex
);
489 mutex_unlock(&part
->bd_mutex
);
494 int bdev_resize_partition(struct block_device
*bdev
, int partno
,
495 sector_t start
, sector_t length
)
497 struct block_device
*part
;
500 part
= bdget_disk(bdev
->bd_disk
, partno
);
504 mutex_lock(&part
->bd_mutex
);
505 mutex_lock_nested(&bdev
->bd_mutex
, 1);
507 if (start
!= part
->bd_start_sect
)
511 if (partition_overlaps(bdev
->bd_disk
, start
, length
, partno
))
514 bdev_set_nr_sectors(part
, length
);
518 mutex_unlock(&part
->bd_mutex
);
519 mutex_unlock(&bdev
->bd_mutex
);
524 static bool disk_unlock_native_capacity(struct gendisk
*disk
)
526 const struct block_device_operations
*bdops
= disk
->fops
;
528 if (bdops
->unlock_native_capacity
&&
529 !(disk
->flags
& GENHD_FL_NATIVE_CAPACITY
)) {
530 printk(KERN_CONT
"enabling native capacity\n");
531 bdops
->unlock_native_capacity(disk
);
532 disk
->flags
|= GENHD_FL_NATIVE_CAPACITY
;
535 printk(KERN_CONT
"truncated\n");
540 int blk_drop_partitions(struct block_device
*bdev
)
542 struct disk_part_iter piter
;
543 struct block_device
*part
;
545 if (bdev
->bd_part_count
)
549 invalidate_bdev(bdev
);
551 disk_part_iter_init(&piter
, bdev
->bd_disk
, DISK_PITER_INCL_EMPTY
);
552 while ((part
= disk_part_iter_next(&piter
)))
553 delete_partition(part
);
554 disk_part_iter_exit(&piter
);
559 /* for historic reasons in the DASD driver */
560 EXPORT_SYMBOL_GPL(blk_drop_partitions
);
563 static bool blk_add_partition(struct gendisk
*disk
, struct block_device
*bdev
,
564 struct parsed_partitions
*state
, int p
)
566 sector_t size
= state
->parts
[p
].size
;
567 sector_t from
= state
->parts
[p
].from
;
568 struct block_device
*part
;
573 if (from
>= get_capacity(disk
)) {
575 "%s: p%d start %llu is beyond EOD, ",
576 disk
->disk_name
, p
, (unsigned long long) from
);
577 if (disk_unlock_native_capacity(disk
))
582 if (from
+ size
> get_capacity(disk
)) {
584 "%s: p%d size %llu extends beyond EOD, ",
585 disk
->disk_name
, p
, (unsigned long long) size
);
587 if (disk_unlock_native_capacity(disk
))
591 * We can not ignore partitions of broken tables created by for
592 * example camera firmware, but we limit them to the end of the
593 * disk to avoid creating invalid block devices.
595 size
= get_capacity(disk
) - from
;
598 part
= add_partition(disk
, p
, from
, size
, state
->parts
[p
].flags
,
599 &state
->parts
[p
].info
);
600 if (IS_ERR(part
) && PTR_ERR(part
) != -ENXIO
) {
601 printk(KERN_ERR
" %s: p%d could not be added: %ld\n",
602 disk
->disk_name
, p
, -PTR_ERR(part
));
606 if (IS_BUILTIN(CONFIG_BLK_DEV_MD
) &&
607 (state
->parts
[p
].flags
& ADDPART_FLAG_RAID
))
608 md_autodetect_dev(part
->bd_dev
);
613 int blk_add_partitions(struct gendisk
*disk
, struct block_device
*bdev
)
615 struct parsed_partitions
*state
;
616 int ret
= -EAGAIN
, p
, highest
;
618 if (!disk_part_scan_enabled(disk
))
621 state
= check_partition(disk
, bdev
);
626 * I/O error reading the partition table. If we tried to read
627 * beyond EOD, retry after unlocking the native capacity.
629 if (PTR_ERR(state
) == -ENOSPC
) {
630 printk(KERN_WARNING
"%s: partition table beyond EOD, ",
632 if (disk_unlock_native_capacity(disk
))
639 * Partitions are not supported on host managed zoned block devices.
641 if (disk
->queue
->limits
.zoned
== BLK_ZONED_HM
) {
642 pr_warn("%s: ignoring partition table on host managed zoned block device\n",
649 * If we read beyond EOD, try unlocking native capacity even if the
650 * partition table was successfully read as we could be missing some
653 if (state
->access_beyond_eod
) {
655 "%s: partition table partially beyond EOD, ",
657 if (disk_unlock_native_capacity(disk
))
661 /* tell userspace that the media / partition table may have changed */
662 kobject_uevent(&disk_to_dev(disk
)->kobj
, KOBJ_CHANGE
);
665 * Detect the highest partition number and preallocate disk->part_tbl.
666 * This is an optimization and not strictly necessary.
668 for (p
= 1, highest
= 0; p
< state
->limit
; p
++)
669 if (state
->parts
[p
].size
)
671 disk_expand_part_tbl(disk
, highest
);
673 for (p
= 1; p
< state
->limit
; p
++)
674 if (!blk_add_partition(disk
, bdev
, state
, p
))
679 free_partitions(state
);
683 void *read_part_sector(struct parsed_partitions
*state
, sector_t n
, Sector
*p
)
685 struct address_space
*mapping
= state
->bdev
->bd_inode
->i_mapping
;
688 if (n
>= get_capacity(state
->bdev
->bd_disk
)) {
689 state
->access_beyond_eod
= true;
693 page
= read_mapping_page(mapping
,
694 (pgoff_t
)(n
>> (PAGE_SHIFT
- 9)), NULL
);
701 return (unsigned char *)page_address(page
) +
702 ((n
& ((1 << (PAGE_SHIFT
- 9)) - 1)) << SECTOR_SHIFT
);