1 /* $NetBSD: subr_disk.c,v 1.98 2009/11/27 11:23:50 tsutsui Exp $ */
4 * Copyright (c) 1996, 1997, 1999, 2000, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Copyright (c) 1982, 1986, 1988, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: subr_disk.c,v 1.98 2009/11/27 11:23:50 tsutsui Exp $");
72 #include <sys/param.h>
73 #include <sys/kernel.h>
76 #include <sys/syslog.h>
77 #include <sys/disklabel.h>
79 #include <sys/sysctl.h>
80 #include <lib/libkern/libkern.h>
83 * Compute checksum for disk label.
86 dkcksum(struct disklabel
*lp
)
89 return dkcksum_sized(lp
, lp
->d_npartitions
);
93 dkcksum_sized(struct disklabel
*lp
, size_t npartitions
)
95 uint16_t *start
, *end
;
98 start
= (uint16_t *)lp
;
99 end
= (uint16_t *)&lp
->d_partitions
[npartitions
];
106 * Disk error is the preface to plaintive error messages
107 * about failing disk transfers. It prints messages of the form
109 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
111 * if the offset of the error in the transfer and a disk label
112 * are both available. blkdone should be -1 if the position of the error
113 * is unknown; the disklabel pointer may be null from drivers that have not
114 * been converted to use them. The message is printed with printf
115 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
116 * The message should be completed (with at least a newline) with printf
117 * or addlog, respectively. There is no trailing space.
120 #define PRIdaddr PRId64
123 diskerr(const struct buf
*bp
, const char *dname
, const char *what
, int pri
,
124 int blkdone
, const struct disklabel
*lp
)
126 int unit
= DISKUNIT(bp
->b_dev
), part
= DISKPART(bp
->b_dev
);
127 void (*pr
)(const char *, ...);
128 char partname
= 'a' + part
;
132 /* Compiler will error this is the format is wrong... */
133 printf("%" PRIdaddr
, bp
->b_blkno
);
135 if (pri
!= LOG_PRINTF
) {
136 static const char fmt
[] = "";
141 (*pr
)("%s%d%c: %s %sing fsbn ", dname
, unit
, partname
, what
,
142 bp
->b_flags
& B_READ
? "read" : "writ");
144 if (bp
->b_bcount
<= DEV_BSIZE
)
145 (*pr
)("%" PRIdaddr
, sn
);
149 (*pr
)("%" PRIdaddr
" of ", sn
);
151 (*pr
)("%" PRIdaddr
"-%" PRIdaddr
"", bp
->b_blkno
,
152 bp
->b_blkno
+ (bp
->b_bcount
- 1) / DEV_BSIZE
);
154 if (lp
&& (blkdone
>= 0 || bp
->b_bcount
<= lp
->d_secsize
)) {
155 sn
+= lp
->d_partitions
[part
].p_offset
;
156 (*pr
)(" (%s%d bn %" PRIdaddr
"; cn %" PRIdaddr
"",
157 dname
, unit
, sn
, sn
/ lp
->d_secpercyl
);
158 sn
%= lp
->d_secpercyl
;
159 (*pr
)(" tn %" PRIdaddr
" sn %" PRIdaddr
")",
160 sn
/ lp
->d_nsectors
, sn
% lp
->d_nsectors
);
165 * Searches the iostatlist for the disk corresponding to the
169 disk_find(const char *name
)
171 struct io_stats
*stat
;
173 stat
= iostat_find(name
);
175 if ((stat
!= NULL
) && (stat
->io_type
== IOSTAT_DISK
))
176 return stat
->io_parent
;
182 disk_init(struct disk
*diskp
, const char *name
, const struct dkdriver
*driver
)
186 * Initialize the wedge-related locks and other fields.
188 mutex_init(&diskp
->dk_rawlock
, MUTEX_DEFAULT
, IPL_NONE
);
189 mutex_init(&diskp
->dk_openlock
, MUTEX_DEFAULT
, IPL_NONE
);
190 LIST_INIT(&diskp
->dk_wedges
);
191 diskp
->dk_nwedges
= 0;
192 diskp
->dk_labelsector
= LABELSECTOR
;
193 disk_blocksize(diskp
, DEV_BSIZE
);
194 diskp
->dk_name
= name
;
195 diskp
->dk_driver
= driver
;
202 disk_attach(struct disk
*diskp
)
206 * Allocate and initialize the disklabel structures.
208 diskp
->dk_label
= kmem_zalloc(sizeof(struct disklabel
), KM_SLEEP
);
209 diskp
->dk_cpulabel
= kmem_zalloc(sizeof(struct cpu_disklabel
),
211 if ((diskp
->dk_label
== NULL
) || (diskp
->dk_cpulabel
== NULL
))
212 panic("disk_attach: can't allocate storage for disklabel");
215 * Set up the stats collection.
217 diskp
->dk_stats
= iostat_alloc(IOSTAT_DISK
, diskp
, diskp
->dk_name
);
221 disk_begindetach(struct disk
*dk
, int (*lastclose
)(device_t
),
222 device_t self
, int flags
)
227 mutex_enter(&dk
->dk_openlock
);
228 if (dk
->dk_openmask
== 0)
229 ; /* nothing to do */
230 else if ((flags
& DETACH_FORCE
) == 0)
232 else if (lastclose
!= NULL
)
233 rc
= (*lastclose
)(self
);
234 mutex_exit(&dk
->dk_openlock
);
243 disk_detach(struct disk
*diskp
)
247 * Remove from the drivelist.
249 iostat_free(diskp
->dk_stats
);
252 * Release the disk-info dictionary.
254 if (diskp
->dk_info
) {
255 prop_object_release(diskp
->dk_info
);
256 diskp
->dk_info
= NULL
;
260 * Free the space used by the disklabel structures.
262 kmem_free(diskp
->dk_label
, sizeof(*diskp
->dk_label
));
263 kmem_free(diskp
->dk_cpulabel
, sizeof(*diskp
->dk_cpulabel
));
267 disk_destroy(struct disk
*diskp
)
270 mutex_destroy(&diskp
->dk_openlock
);
271 mutex_destroy(&diskp
->dk_rawlock
);
275 * Mark the disk as busy for metrics collection.
278 disk_busy(struct disk
*diskp
)
281 iostat_busy(diskp
->dk_stats
);
285 * Finished disk operations, gather metrics.
288 disk_unbusy(struct disk
*diskp
, long bcount
, int read
)
291 iostat_unbusy(diskp
->dk_stats
, bcount
, read
);
295 * Return true if disk has an I/O operation in flight.
298 disk_isbusy(struct disk
*diskp
)
301 return iostat_isbusy(diskp
->dk_stats
);
305 * Set the physical blocksize of a disk, in bytes.
306 * Only necessary if blocksize != DEV_BSIZE.
309 disk_blocksize(struct disk
*diskp
, int blocksize
)
312 diskp
->dk_blkshift
= DK_BSIZE2BLKSHIFT(blocksize
);
313 diskp
->dk_byteshift
= DK_BSIZE2BYTESHIFT(blocksize
);
317 * Bounds checking against the media size, used for the raw partition.
318 * secsize, mediasize and b_blkno must all be the same units.
319 * Possibly this has to be DEV_BSIZE (512).
322 bounds_check_with_mediasize(struct buf
*bp
, int secsize
, uint64_t mediasize
)
326 sz
= howmany(bp
->b_bcount
, secsize
);
328 if (bp
->b_blkno
+ sz
> mediasize
) {
329 sz
= mediasize
- bp
->b_blkno
;
331 /* If exactly at end of disk, return EOF. */
332 bp
->b_resid
= bp
->b_bcount
;
336 /* If past end of disk, return EINVAL. */
337 bp
->b_error
= EINVAL
;
340 /* Otherwise, truncate request. */
341 bp
->b_bcount
= sz
* secsize
;
348 * Determine the size of the transfer, and make sure it is
349 * within the boundaries of the partition. Adjust transfer
350 * if needed, and signal errors or early completion.
353 bounds_check_with_label(struct disk
*dk
, struct buf
*bp
, int wlabel
)
355 struct disklabel
*lp
= dk
->dk_label
;
356 struct partition
*p
= lp
->d_partitions
+ DISKPART(bp
->b_dev
);
357 uint64_t p_size
, p_offset
, labelsector
;
360 /* Protect against division by zero. XXX: Should never happen?!?! */
361 if (lp
->d_secpercyl
== 0) {
362 bp
->b_error
= EINVAL
;
366 p_size
= p
->p_size
<< dk
->dk_blkshift
;
367 p_offset
= p
->p_offset
<< dk
->dk_blkshift
;
369 labelsector
= lp
->d_partitions
[2].p_offset
;
371 labelsector
= lp
->d_partitions
[RAW_PART
].p_offset
;
373 labelsector
= (labelsector
+ dk
->dk_labelsector
) << dk
->dk_blkshift
;
375 sz
= howmany(bp
->b_bcount
, DEV_BSIZE
);
376 if ((bp
->b_blkno
+ sz
) > p_size
) {
377 sz
= p_size
- bp
->b_blkno
;
379 /* If exactly at end of disk, return EOF. */
380 bp
->b_resid
= bp
->b_bcount
;
384 /* If past end of disk, return EINVAL. */
385 bp
->b_error
= EINVAL
;
388 /* Otherwise, truncate request. */
389 bp
->b_bcount
= sz
<< DEV_BSHIFT
;
392 /* Overwriting disk label? */
393 if (bp
->b_blkno
+ p_offset
<= labelsector
&&
394 bp
->b_blkno
+ p_offset
+ sz
> labelsector
&&
395 (bp
->b_flags
& B_READ
) == 0 && !wlabel
) {
400 /* calculate cylinder for disksort to order transfers with */
401 bp
->b_cylinder
= (bp
->b_blkno
+ p
->p_offset
) /
402 (lp
->d_secsize
/ DEV_BSIZE
) / lp
->d_secpercyl
;
407 disk_read_sectors(void (*strat
)(struct buf
*), const struct disklabel
*lp
,
408 struct buf
*bp
, unsigned int sector
, int count
)
410 bp
->b_blkno
= sector
;
411 bp
->b_bcount
= count
* lp
->d_secsize
;
412 bp
->b_flags
= (bp
->b_flags
& ~B_WRITE
) | B_READ
;
413 bp
->b_oflags
&= ~BO_DONE
;
414 bp
->b_cylinder
= sector
/ lp
->d_secpercyl
;
420 convertdisklabel(struct disklabel
*lp
, void (*strat
)(struct buf
*),
421 struct buf
*bp
, uint32_t secperunit
)
423 struct partition rp
, *altp
, *p
;
426 memset(&rp
, 0, sizeof(rp
));
427 rp
.p_size
= secperunit
;
428 rp
.p_fstype
= FS_UNUSED
;
430 /* If we can seek to d_secperunit - 1, believe the disk geometry. */
431 if (secperunit
!= 0 &&
432 disk_read_sectors(strat
, lp
, bp
, secperunit
- 1, 1) == 0)
438 printf("%s: secperunit (%" PRIu32
") %s\n", __func__
,
439 secperunit
, geom_ok
? "ok" : "not ok");
442 p
= &lp
->d_partitions
[RAW_PART
];
443 if (RAW_PART
== 'c' - 'a')
444 altp
= &lp
->d_partitions
['d' - 'a'];
446 altp
= &lp
->d_partitions
['c' - 'a'];
448 if (lp
->d_npartitions
> RAW_PART
&& p
->p_offset
== 0 && p
->p_size
!= 0)
449 ; /* already a raw partition */
450 else if (lp
->d_npartitions
> MAX('c', 'd') - 'a' &&
451 altp
->p_offset
== 0 && altp
->p_size
!= 0) {
452 /* alternate partition ('c' or 'd') is suitable for raw slot,
453 * swap with 'd' or 'c'.
458 } else if (lp
->d_npartitions
<= RAW_PART
&&
459 lp
->d_npartitions
> 'c' - 'a') {
460 /* No raw partition is present, but the alternate is present.
461 * Copy alternate to raw partition.
463 lp
->d_npartitions
= RAW_PART
+ 1;
466 return "no raw partition and disk reports bad geometry";
467 else if (lp
->d_npartitions
<= RAW_PART
) {
468 memset(&lp
->d_partitions
[lp
->d_npartitions
], 0,
469 sizeof(struct partition
) * (RAW_PART
- lp
->d_npartitions
));
471 lp
->d_npartitions
= RAW_PART
+ 1;
472 } else if (lp
->d_npartitions
< MAXPARTITIONS
) {
474 sizeof(struct partition
) * (lp
->d_npartitions
- RAW_PART
));
478 return "no raw partition and partition table is full";
484 * Generic disk ioctl handling.
487 disk_ioctl(struct disk
*diskp
, u_long cmd
, void *data
, int flag
,
495 struct plistref
*pref
= (struct plistref
*) data
;
497 if (diskp
->dk_info
== NULL
)
500 error
= prop_dictionary_copyout_ioctl(pref
, cmd
,
506 error
= EPASSTHROUGH
;