4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/param.h>
28 #include <sys/systm.h>
36 #include <sys/vnode.h>
41 #include <sys/statvfs.h>
42 #include <sys/mount.h>
43 #include <sys/pathname.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/sysmacros.h>
48 #include <sys/mkdev.h>
50 #include <sys/sunddi.h>
51 #include <sys/sunldi.h>
52 #include <sys/dktp/fdisk.h>
53 #include <sys/fs/pc_label.h>
54 #include <sys/fs/pc_fs.h>
55 #include <sys/fs/pc_dir.h>
56 #include <sys/fs/pc_node.h>
57 #include <sys/fs_subr.h>
58 #include <sys/modctl.h>
61 #include <sys/mntent.h>
62 #include <sys/policy.h>
63 #include <sys/atomic.h>
67 * The majority of PC media use a 512 sector size, but
68 * occasionally you will run across a 1k sector size.
69 * For media with a 1k sector size, fd_strategy() requires
70 * the I/O size to be a 1k multiple; so when the sector size
71 * is not yet known, always read 1k.
73 #define PC_SAFESECSIZE (PC_SECSIZE * 2)
75 static int pcfsinit(int, char *);
76 static int pcfs_mount(struct vfs
*, struct vnode
*, struct mounta
*,
78 static int pcfs_unmount(struct vfs
*, int, struct cred
*);
79 static int pcfs_root(struct vfs
*, struct vnode
**);
80 static int pcfs_statvfs(struct vfs
*, struct statvfs64
*);
81 static int pc_syncfsnodes(struct pcfs
*);
82 static int pcfs_sync(struct vfs
*, short, struct cred
*);
83 static int pcfs_vget(struct vfs
*vfsp
, struct vnode
**vpp
, struct fid
*fidp
);
84 static void pcfs_freevfs(vfs_t
*vfsp
);
86 static int pc_readfat(struct pcfs
*fsp
, uchar_t
*fatp
);
87 static int pc_writefat(struct pcfs
*fsp
, daddr_t start
);
89 static int pc_getfattype(struct pcfs
*fsp
);
90 static void pcfs_parse_mntopts(struct pcfs
*fsp
);
94 * pcfs mount options table
97 static char *nohidden_cancel
[] = { MNTOPT_PCFS_HIDDEN
, NULL
};
98 static char *hidden_cancel
[] = { MNTOPT_PCFS_NOHIDDEN
, NULL
};
99 static char *nofoldcase_cancel
[] = { MNTOPT_PCFS_FOLDCASE
, NULL
};
100 static char *foldcase_cancel
[] = { MNTOPT_PCFS_NOFOLDCASE
, NULL
};
101 static char *clamptime_cancel
[] = { MNTOPT_PCFS_NOCLAMPTIME
, NULL
};
102 static char *noclamptime_cancel
[] = { MNTOPT_PCFS_CLAMPTIME
, NULL
};
103 static char *atime_cancel
[] = { MNTOPT_NOATIME
, NULL
};
104 static char *noatime_cancel
[] = { MNTOPT_ATIME
, NULL
};
106 static mntopt_t mntopts
[] = {
108 * option name cancel option default arg flags opt data
110 { MNTOPT_PCFS_NOHIDDEN
, nohidden_cancel
, NULL
, 0, NULL
},
111 { MNTOPT_PCFS_HIDDEN
, hidden_cancel
, NULL
, MO_DEFAULT
, NULL
},
112 { MNTOPT_PCFS_NOFOLDCASE
, nofoldcase_cancel
, NULL
, MO_DEFAULT
, NULL
},
113 { MNTOPT_PCFS_FOLDCASE
, foldcase_cancel
, NULL
, 0, NULL
},
114 { MNTOPT_PCFS_CLAMPTIME
, clamptime_cancel
, NULL
, MO_DEFAULT
, NULL
},
115 { MNTOPT_PCFS_NOCLAMPTIME
, noclamptime_cancel
, NULL
, 0, NULL
},
116 { MNTOPT_NOATIME
, noatime_cancel
, NULL
, 0, NULL
},
117 { MNTOPT_ATIME
, atime_cancel
, NULL
, 0, NULL
},
118 { MNTOPT_PCFS_TIMEZONE
, NULL
, "+0", MO_DEFAULT
| MO_HASVALUE
, NULL
},
119 { MNTOPT_PCFS_SECSIZE
, NULL
, NULL
, MO_HASVALUE
, NULL
}
122 static mntopts_t pcfs_mntopts
= {
123 sizeof (mntopts
) / sizeof (mntopt_t
),
127 int pcfsdebuglevel
= 0;
130 * pcfslock: protects the list of mounted pc filesystems "pc_mounttab.
131 * pcfs_lock: (inside per filesystem structure "pcfs")
132 * per filesystem lock. Most of the vfsops and vnodeops are
133 * protected by this lock.
134 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
136 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
138 * pcfs_mountcount: used to prevent module unloads while there is still
139 * pcfs state from a former mount hanging around. With
140 * forced umount support, the filesystem module must not
141 * be allowed to go away before the last VFS_FREEVFS()
142 * call has been made.
143 * Since this is just an atomic counter, there's no need
147 krwlock_t pcnodes_lock
;
148 uint32_t pcfs_mountcount
;
152 static vfsdef_t vfw
= {
156 VSW_HASPROTO
|VSW_CANREMOUNT
|VSW_STATS
|VSW_CANLOFI
,
160 extern struct mod_ops mod_fsops
;
162 static struct modlfs modlfs
= {
168 static struct modlinkage modlinkage
= {
179 /* make sure the on-disk structures are sane */
180 ASSERT(sizeof (struct pcdir
) == 32);
181 ASSERT(sizeof (struct pcdir_lfn
) == 32);
182 mutex_init(&pcfslock
, NULL
, MUTEX_DEFAULT
, NULL
);
183 rw_init(&pcnodes_lock
, NULL
, RW_DEFAULT
, NULL
);
184 error
= mod_install(&modlinkage
);
186 mutex_destroy(&pcfslock
);
187 rw_destroy(&pcnodes_lock
);
198 * If a forcedly unmounted instance is still hanging around,
199 * we cannot allow the module to be unloaded because that would
200 * cause panics once the VFS framework decides it's time to call
201 * into VFS_FREEVFS().
206 error
= mod_remove(&modlinkage
);
209 mutex_destroy(&pcfslock
);
210 rw_destroy(&pcnodes_lock
);
212 * Tear down the operations vector
214 (void) vfs_freevfsops_by_type(pcfstype
);
219 _info(struct modinfo
*modinfop
)
221 return (mod_info(&modlinkage
, modinfop
));
224 static const struct vfsops pcfs_vfsops
= {
225 .vfs_mount
= pcfs_mount
,
226 .vfs_unmount
= pcfs_unmount
,
227 .vfs_root
= pcfs_root
,
228 .vfs_statvfs
= pcfs_statvfs
,
229 .vfs_sync
= pcfs_sync
,
230 .vfs_vget
= pcfs_vget
,
231 .vfs_freevfs
= pcfs_freevfs
,
236 pcfsinit(int fstype
, char *name
)
240 error
= vfs_setfsops(fstype
, &pcfs_vfsops
);
242 cmn_err(CE_WARN
, "pcfsinit: bad fstype");
252 static struct pcfs
*pc_mounttab
= NULL
;
254 extern struct pcfs_args pc_tz
;
257 pcfs_device_identify(
263 struct pathname special
;
264 struct vnode
*svp
= NULL
;
265 struct vnode
*lvp
= NULL
;
270 * Resolve path name of special file being mounted.
272 if (error
= pn_get(uap
->spec
, UIO_USERSPACE
, &special
)) {
276 error
= lookupname(special
.pn_path
, UIO_SYSSPACE
, FOLLOW
, NULLVPP
, &svp
);
283 * Verify caller's permission to open the device special file.
285 if ((vfsp
->vfs_flag
& VFS_RDONLY
) != 0 ||
286 ((uap
->flags
& MS_RDONLY
) != 0)) {
290 oflag
= FREAD
| FWRITE
;
291 aflag
= VREAD
| VWRITE
;
294 error
= vfs_get_lofi(vfsp
, &lvp
);
300 } else if (error
== 0) {
305 if (svp
->v_type
!= VBLK
) {
310 if ((error
= secpolicy_spec_open(cr
, svp
, oflag
)) != 0)
314 if (getmajor(*xdev
) >= devcnt
) {
319 if ((error
= fop_access(svp
, aflag
, 0, cr
, NULL
)) != 0)
331 pcfs_device_ismounted(
337 int remount
= *remounting
;
340 * Ensure that this drive isn't already mounted, unless this is a
345 if (vfs_devmounting(*pseudodev
, vfsp
))
348 if (vfs_devismounted(*pseudodev
))
349 return (remount
? 0 : EBUSY
);
352 * This is not a remount. Even if MS_REMOUNT was requested,
353 * the caller needs to proceed as it would on an ordinary
363 * Get the PCFS-specific mount options from the VFS framework.
364 * For "timezone" and "secsize", we need to parse the number
365 * ourselves and ensure its validity.
366 * Note: "secsize" is deliberately undocumented at this time,
367 * it's a workaround for devices (particularly: lofi image files)
368 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
371 pcfs_parse_mntopts(struct pcfs
*fsp
)
376 struct vfs
*vfsp
= fsp
->pcfs_vfs
;
378 ASSERT(fsp
->pcfs_secondswest
== 0);
379 ASSERT(fsp
->pcfs_secsize
== 0);
381 if (vfs_optionisset(vfsp
, MNTOPT_PCFS_HIDDEN
, NULL
))
382 fsp
->pcfs_flags
|= PCFS_HIDDEN
;
383 if (vfs_optionisset(vfsp
, MNTOPT_PCFS_FOLDCASE
, NULL
))
384 fsp
->pcfs_flags
|= PCFS_FOLDCASE
;
385 if (vfs_optionisset(vfsp
, MNTOPT_PCFS_NOCLAMPTIME
, NULL
))
386 fsp
->pcfs_flags
|= PCFS_NOCLAMPTIME
;
387 if (vfs_optionisset(vfsp
, MNTOPT_NOATIME
, NULL
))
388 fsp
->pcfs_flags
|= PCFS_NOATIME
;
390 if (vfs_optionisset(vfsp
, MNTOPT_PCFS_TIMEZONE
, &c
)) {
391 if (ddi_strtol(c
, &endptr
, 10, &l
) == 0 &&
392 endptr
== c
+ strlen(c
)) {
394 * A number alright - in the allowed range ?
396 if (l
<= -12*3600 || l
>= 12*3600) {
397 cmn_err(CE_WARN
, "!pcfs: invalid use of "
398 "'timezone' mount option - %ld "
399 "is out of range. Assuming 0.", l
);
403 cmn_err(CE_WARN
, "!pcfs: invalid use of "
404 "'timezone' mount option - argument %s "
405 "is not a valid number. Assuming 0.", c
);
408 fsp
->pcfs_secondswest
= l
;
412 * The "secsize=..." mount option is a workaround for the lack of
413 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
414 * partition table of a disk image and it has been partitioned with
415 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
417 * That should really be fixed in lofi ... this is a workaround.
419 if (vfs_optionisset(vfsp
, MNTOPT_PCFS_SECSIZE
, &c
)) {
420 if (ddi_strtol(c
, &endptr
, 10, &l
) == 0 &&
421 endptr
== c
+ strlen(c
)) {
423 * A number alright - a valid sector size as well ?
425 if (!VALID_SECSIZE(l
)) {
426 cmn_err(CE_WARN
, "!pcfs: invalid use of "
427 "'secsize' mount option - %ld is "
428 "unsupported. Autodetecting.", l
);
432 cmn_err(CE_WARN
, "!pcfs: invalid use of "
433 "'secsize' mount option - argument %s "
434 "is not a valid number. Autodetecting.", c
);
437 fsp
->pcfs_secsize
= l
;
438 fsp
->pcfs_sdshift
= ddi_ffs(l
/ DEV_BSIZE
) - 1;
447 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
463 if ((error
= secpolicy_fs_mount(cr
, mvp
, vfsp
)) != 0)
466 if (mvp
->v_type
!= VDIR
)
469 mutex_enter(&mvp
->v_lock
);
470 if ((uap
->flags
& MS_REMOUNT
) == 0 &&
471 (uap
->flags
& MS_OVERLAY
) == 0 &&
472 (mvp
->v_count
!= 1 || (mvp
->v_flag
& VROOT
))) {
473 mutex_exit(&mvp
->v_lock
);
476 mutex_exit(&mvp
->v_lock
);
479 * PCFS doesn't do mount arguments anymore - everything's a mount
480 * option these days. In order not to break existing callers, we
481 * don't reject it yet, just warn that the data (if any) is ignored.
483 if (uap
->datalen
!= 0)
484 cmn_err(CE_WARN
, "!pcfs: deprecated use of mount(2) with "
485 "mount argument structures instead of mount options. "
486 "Ignoring mount(2) 'dataptr' argument.");
489 * This is needed early, to make sure the access / open calls
490 * are done using the correct mode. Processing this mount option
491 * only when calling pcfs_parse_mntopts() would lead us to attempt
492 * a read/write access to a possibly writeprotected device, and
493 * a readonly mount attempt might fail because of that.
495 if (uap
->flags
& MS_RDONLY
) {
496 vfsp
->vfs_flag
|= VFS_RDONLY
;
497 vfs_setmntopt(vfsp
, MNTOPT_RO
, NULL
, 0);
501 * lookupname() + some extra checks
503 if (error
= pcfs_device_identify(vfsp
, uap
, cr
, &xdev
))
507 * Check that the device isn't already mounted.
509 remounting
= (uap
->flags
& MS_REMOUNT
);
511 if (error
= pcfs_device_ismounted(vfsp
, xdev
, &remounting
,
519 * Mount the filesystem.
520 * An instance structure is required before the attempt to locate
521 * and parse the FAT BPB. This is because mount options may change
522 * the behaviour of the filesystem type matching code. Precreate
523 * it and fill it in to a degree that allows parsing the mount
526 devvp
= makespecvp(xdev
, VBLK
);
527 if (IS_SWAPVP(devvp
)) {
531 error
= fop_open(&devvp
,
532 (vfsp
->vfs_flag
& VFS_RDONLY
) ? FREAD
: FREAD
| FWRITE
, cr
, NULL
);
538 fsp
= kmem_zalloc(sizeof (*fsp
), KM_SLEEP
);
539 fsp
->pcfs_vfs
= vfsp
;
540 fsp
->pcfs_xdev
= xdev
;
541 fsp
->pcfs_devvp
= devvp
;
542 mutex_init(&fsp
->pcfs_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
544 pcfs_parse_mntopts(fsp
);
547 * This is the actual "mount" - the PCFS superblock check.
549 * Find the requested FAT BPB.
550 * Check device type and flag the instance if media is removeable.
552 * Initializes most members of the filesystem instance structure.
553 * Returns EINVAL if no valid BPB can be found. Other errors may
554 * occur after I/O failures are encountered.
556 if (error
= pc_getfattype(fsp
))
560 * Now that the BPB has been parsed, this structural information
561 * is available and known to be valid. Initialize the VFS.
563 vfsp
->vfs_data
= fsp
;
564 vfsp
->vfs_dev
= pseudodev
;
565 vfsp
->vfs_fstype
= pcfstype
;
566 vfs_make_fsid(&vfsp
->vfs_fsid
, pseudodev
, pcfstype
);
567 vfsp
->vfs_bcount
= 0;
568 vfsp
->vfs_bsize
= fsp
->pcfs_clsize
;
571 * Validate that we can access the FAT and that it is, to the
572 * degree we can verify here, self-consistent.
574 if (error
= pc_verify(fsp
))
578 * Record the time of the mount, to return as an "approximate"
579 * timestamp for the FAT root directory. Since FAT roots don't
580 * have timestamps, this is less confusing to the user than
581 * claiming "zero" / Jan/01/1970.
583 gethrestime(&fsp
->pcfs_mounttime
);
586 * Fix up the mount options. Because "noatime" is made default on
587 * removeable media only, a fixed disk will have neither "atime"
588 * nor "noatime" set. We set the options explicitly depending on
589 * the PCFS_NOATIME flag, to inform the user of what applies.
590 * Mount option cancellation will take care that the mutually
591 * exclusive 'other' is cleared.
594 fsp
->pcfs_flags
& PCFS_NOATIME
? MNTOPT_NOATIME
: MNTOPT_ATIME
,
598 * All clear - insert the FS instance into PCFS' list.
600 mutex_enter(&pcfslock
);
601 fsp
->pcfs_nxt
= pc_mounttab
;
603 mutex_exit(&pcfslock
);
604 atomic_inc_32(&pcfs_mountcount
);
608 (void) fop_close(devvp
,
609 vfsp
->vfs_flag
& VFS_RDONLY
? FREAD
: FREAD
| FWRITE
,
610 1, (offset_t
)0, cr
, NULL
);
612 mutex_destroy(&fsp
->pcfs_lock
);
613 kmem_free(fsp
, sizeof (*fsp
));
624 struct pcfs
*fsp
, *fsp1
;
626 if (secpolicy_fs_unmount(cr
, vfsp
) != 0)
629 fsp
= VFSTOPCFS(vfsp
);
632 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
633 * prevent lookuppn from crossing the mount point.
634 * If this is not a forced umount request and there's ongoing I/O,
635 * don't allow the mount to proceed.
638 vfsp
->vfs_flag
|= VFS_UNMOUNTED
;
639 else if (fsp
->pcfs_nrefs
)
642 mutex_enter(&pcfslock
);
645 * If this is a forced umount request or if the fs instance has
646 * been marked as beyond recovery, allow the umount to proceed
647 * regardless of state. pc_diskchanged() forcibly releases all
648 * inactive vnodes/pcnodes.
650 if (flag
& MS_FORCE
|| fsp
->pcfs_flags
& PCFS_IRRECOV
) {
651 rw_enter(&pcnodes_lock
, RW_WRITER
);
653 rw_exit(&pcnodes_lock
);
656 /* now there should be no pcp node on pcfhead or pcdhead. */
658 if (fsp
== pc_mounttab
) {
659 pc_mounttab
= fsp
->pcfs_nxt
;
661 for (fsp1
= pc_mounttab
; fsp1
!= NULL
; fsp1
= fsp1
->pcfs_nxt
)
662 if (fsp1
->pcfs_nxt
== fsp
)
663 fsp1
->pcfs_nxt
= fsp
->pcfs_nxt
;
666 mutex_exit(&pcfslock
);
669 * Since we support VFS_FREEVFS(), there's no need to
670 * free the fsp right now. The framework will tell us
671 * when the right time to do so has arrived by calling
689 fsp
= VFSTOPCFS(vfsp
);
690 if (error
= pc_lockfs(fsp
, 0, 0))
693 pcp
= pc_getnode(fsp
, (daddr_t
)0, 0, NULL
);
696 pcp
->pc_flags
|= PC_EXTERNAL
;
701 * Get file system statistics.
706 struct statvfs64
*sp
)
712 fsp
= VFSTOPCFS(vfsp
);
713 error
= pc_getfat(fsp
);
716 bzero(sp
, sizeof (*sp
));
717 sp
->f_bsize
= sp
->f_frsize
= fsp
->pcfs_clsize
;
718 sp
->f_blocks
= (fsblkcnt64_t
)fsp
->pcfs_ncluster
;
719 sp
->f_bavail
= sp
->f_bfree
= (fsblkcnt64_t
)pc_freeclusters(fsp
);
720 sp
->f_files
= (fsfilcnt64_t
)-1;
721 sp
->f_ffree
= (fsfilcnt64_t
)-1;
722 sp
->f_favail
= (fsfilcnt64_t
)-1;
724 (void) cmpldev(&d32
, fsp
->pcfs_devvp
->v_rdev
);
726 (void) cmpldev(&d32
, vfsp
->vfs_dev
);
728 (void) strcpy(sp
->f_basetype
, vfssw
[vfsp
->vfs_fstype
].vsw_name
);
729 sp
->f_flag
= vf_to_stf(vfsp
->vfs_flag
);
730 sp
->f_namemax
= PCMAXNAMLEN
;
735 pc_syncfsnodes(struct pcfs
*fsp
)
741 if (error
= pc_lockfs(fsp
, 0, 0))
744 if (!(error
= pc_syncfat(fsp
))) {
746 while (hp
< & pcfhead
[ NPCHASH
]) {
747 rw_enter(&pcnodes_lock
, RW_READER
);
749 while (pcp
!= (struct pcnode
*)hp
) {
750 if (VFSTOPCFS(PCTOV(pcp
) -> v_vfsp
) == fsp
)
751 if (error
= pc_nodesync(pcp
))
753 pcp
= pcp
-> pc_forw
;
755 rw_exit(&pcnodes_lock
);
766 * Flush any pending I/O.
778 /* this prevents the filesystem from being umounted. */
779 mutex_enter(&pcfslock
);
781 fsp
= VFSTOPCFS(vfsp
);
782 if (!(fsp
->pcfs_flags
& PCFS_IRRECOV
)) {
783 error
= pc_syncfsnodes(fsp
);
785 rw_enter(&pcnodes_lock
, RW_WRITER
);
787 rw_exit(&pcnodes_lock
);
792 while (fsp
!= NULL
) {
793 if (fsp
->pcfs_flags
& PCFS_IRRECOV
) {
794 rw_enter(&pcnodes_lock
, RW_WRITER
);
796 rw_exit(&pcnodes_lock
);
800 error
= pc_syncfsnodes(fsp
);
805 mutex_exit(&pcfslock
);
810 pc_lockfs(struct pcfs
*fsp
, int diskchanged
, int releasing
)
814 if ((fsp
->pcfs_flags
& PCFS_IRRECOV
) && !releasing
)
817 if ((fsp
->pcfs_flags
& PCFS_LOCKED
) && (fsp
->pcfs_owner
== curthread
)) {
820 mutex_enter(&fsp
->pcfs_lock
);
821 if (fsp
->pcfs_flags
& PCFS_LOCKED
)
824 * We check the IRRECOV bit again just in case somebody
825 * snuck past the initial check but then got held up before
826 * they could grab the lock. (And in the meantime someone
827 * had grabbed the lock and set the bit)
829 if (!diskchanged
&& !(fsp
->pcfs_flags
& PCFS_IRRECOV
)) {
830 if ((err
= pc_getfat(fsp
))) {
831 mutex_exit(&fsp
->pcfs_lock
);
835 fsp
->pcfs_flags
|= PCFS_LOCKED
;
836 fsp
->pcfs_owner
= curthread
;
843 pc_unlockfs(struct pcfs
*fsp
)
846 if ((fsp
->pcfs_flags
& PCFS_LOCKED
) == 0)
847 panic("pc_unlockfs");
848 if (--fsp
->pcfs_count
< 0)
849 panic("pc_unlockfs: count");
850 if (fsp
->pcfs_count
== 0) {
851 fsp
->pcfs_flags
&= ~PCFS_LOCKED
;
853 mutex_exit(&fsp
->pcfs_lock
);
858 pc_syncfat(struct pcfs
*fsp
)
863 struct fat_od_fsi
*fsinfo_disk
;
865 if ((fsp
->pcfs_fatp
== (uchar_t
*)0) ||
866 !(fsp
->pcfs_flags
& PCFS_FATMOD
))
869 * write out all copies of FATs
871 fsp
->pcfs_flags
&= ~PCFS_FATMOD
;
872 fsp
->pcfs_fattime
= gethrestime_sec() + PCFS_DISKTIMEOUT
;
873 for (nfat
= 0; nfat
< fsp
->pcfs_numfat
; nfat
++) {
874 error
= pc_writefat(fsp
, pc_dbdaddr(fsp
,
875 fsp
->pcfs_fatstart
+ nfat
* fsp
->pcfs_fatsec
));
877 pc_mark_irrecov(fsp
);
881 pc_clear_fatchanges(fsp
);
884 * Write out fsinfo sector.
887 bp
= bread(fsp
->pcfs_xdev
,
888 pc_dbdaddr(fsp
, fsp
->pcfs_fsistart
), fsp
->pcfs_secsize
);
889 if (bp
->b_flags
& (B_ERROR
| B_STALE
)) {
890 error
= geterror(bp
);
892 fsinfo_disk
= (fat_od_fsi_t
*)(bp
->b_un
.b_addr
);
893 if (!error
&& FSISIG_OK(fsinfo_disk
)) {
894 fsinfo_disk
->fsi_incore
.fs_free_clusters
=
895 LE_32(fsp
->pcfs_fsinfo
.fs_free_clusters
);
896 fsinfo_disk
->fsi_incore
.fs_next_free
=
897 LE_32(FSINFO_UNKNOWN
);
899 error
= geterror(bp
);
903 pc_mark_irrecov(fsp
);
911 pc_invalfat(struct pcfs
*fsp
)
916 if (fsp
->pcfs_fatp
== (uchar_t
*)0)
917 panic("pc_invalfat");
921 kmem_free(fsp
->pcfs_fatp
, fsp
->pcfs_fatsec
* fsp
->pcfs_secsize
);
922 fsp
->pcfs_fatp
= NULL
;
923 kmem_free(fsp
->pcfs_fat_changemap
, fsp
->pcfs_fat_changemapsize
);
924 fsp
->pcfs_fat_changemap
= NULL
;
926 * Invalidate all the blocks associated with the device.
927 * Not needed if stateless.
929 for (xfsp
= pc_mounttab
; xfsp
; xfsp
= xfsp
->pcfs_nxt
)
930 if (xfsp
!= fsp
&& xfsp
->pcfs_xdev
== fsp
->pcfs_xdev
)
934 binval(fsp
->pcfs_xdev
);
936 * close mounted device
938 (void) fop_close(fsp
->pcfs_devvp
,
939 (PCFSTOVFS(fsp
)->vfs_flag
& VFS_RDONLY
) ? FREAD
: FREAD
|FWRITE
,
940 1, (offset_t
)0, CRED(), NULL
);
944 pc_badfs(struct pcfs
*fsp
)
946 cmn_err(CE_WARN
, "corrupted PC file system on dev (%x.%x)\n",
947 getmajor(fsp
->pcfs_devvp
->v_rdev
),
948 getminor(fsp
->pcfs_devvp
->v_rdev
));
952 * The problem with supporting NFS on the PCFS filesystem is that there
953 * is no good place to keep the generation number. The only possible
954 * place is inside a directory entry. There are a few words that we
955 * don't use - they store NT & OS/2 attributes, and the creation/last access
956 * time of the file - but it seems wrong to use them. In addition, directory
957 * entries come and go. If a directory is removed completely, its directory
958 * blocks are freed and the generation numbers are lost. Whereas in ufs,
959 * inode blocks are dedicated for inodes, so the generation numbers are
960 * permanently kept on the disk.
963 pcfs_vget(struct vfs
*vfsp
, struct vnode
**vpp
, struct fid
*fidp
)
966 struct pc_fid
*pcfid
;
975 pcfid
= (struct pc_fid
*)fidp
;
976 fsp
= VFSTOPCFS(vfsp
);
978 error
= pc_lockfs(fsp
, 0, 0);
984 if (pcfid
->pcfid_block
== 0) {
985 pcp
= pc_getnode(fsp
, (daddr_t
)0, 0, NULL
);
986 pcp
->pc_flags
|= PC_EXTERNAL
;
991 eblkno
= pcfid
->pcfid_block
;
992 eoffset
= pcfid
->pcfid_offset
;
995 eblkno
- fsp
->pcfs_dosstart
) >= fsp
->pcfs_ncluster
) ||
996 (eoffset
> fsp
->pcfs_clsize
)) {
1002 if (eblkno
>= fsp
->pcfs_datastart
|| (eblkno
- fsp
->pcfs_rdirstart
)
1003 < (fsp
->pcfs_rdirsec
& ~(fsp
->pcfs_spcl
- 1))) {
1004 bp
= bread(fsp
->pcfs_xdev
, pc_dbdaddr(fsp
, eblkno
),
1008 * This is an access "backwards" into the FAT12/FAT16
1009 * root directory. A better code structure would
1010 * significantly improve maintainability here ...
1012 bp
= bread(fsp
->pcfs_xdev
, pc_dbdaddr(fsp
, eblkno
),
1013 (int)(fsp
->pcfs_datastart
- eblkno
) * fsp
->pcfs_secsize
);
1015 if (bp
->b_flags
& (B_ERROR
| B_STALE
)) {
1016 error
= geterror(bp
);
1019 pc_mark_irrecov(fsp
);
1024 ep
= (struct pcdir
*)(bp
->b_un
.b_addr
+ eoffset
);
1026 * Ok, if this is a valid file handle that we gave out,
1027 * then simply ensuring that the creation time matches,
1028 * the entry has not been deleted, and it has a valid first
1029 * character should be enough.
1031 * Unfortunately, verifying that the <blkno, offset> _still_
1032 * refers to a directory entry is not easy, since we'd have
1033 * to search _all_ directories starting from root to find it.
1034 * That's a high price to pay just in case somebody is forging
1035 * file handles. So instead we verify that as much of the
1036 * entry is valid as we can:
1038 * 1. The starting cluster is 0 (unallocated) or valid
1039 * 2. It is not an LFN entry
1040 * 3. It is not hidden (unless mounted as such)
1041 * 4. It is not the label
1043 cn
= pc_getstartcluster(fsp
, ep
);
1045 * if the starting cluster is valid, but not valid according
1046 * to pc_validcl(), force it to be to simplify the following if.
1049 cn
= PCF_FIRSTCLUSTER
;
1050 if (IS_FAT32(fsp
)) {
1051 if (cn
>= PCF_LASTCLUSTER32
)
1052 cn
= PCF_FIRSTCLUSTER
;
1054 if (cn
>= PCF_LASTCLUSTER
)
1055 cn
= PCF_FIRSTCLUSTER
;
1057 if ((!pc_validcl(fsp
, cn
)) ||
1058 (PCDL_IS_LFN(ep
)) ||
1059 (PCA_IS_HIDDEN(fsp
, ep
->pcd_attr
)) ||
1060 ((ep
->pcd_attr
& PCA_LABEL
) == PCA_LABEL
)) {
1061 bp
->b_flags
|= B_STALE
| B_AGE
;
1066 if ((ep
->pcd_crtime
.pct_time
== pcfid
->pcfid_ctime
) &&
1067 (ep
->pcd_filename
[0] != PCD_ERASED
) &&
1068 (pc_validchar(ep
->pcd_filename
[0]) ||
1069 (ep
->pcd_filename
[0] == '.' && ep
->pcd_filename
[1] == '.'))) {
1070 pcp
= pc_getnode(fsp
, eblkno
, eoffset
, ep
);
1071 pcp
->pc_flags
|= PC_EXTERNAL
;
1076 bp
->b_flags
|= B_STALE
| B_AGE
;
1083 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1084 * a meg), so we can't bread() it all in at once. This routine reads a
1085 * fat a chunk at a time.
1088 pc_readfat(struct pcfs
*fsp
, uchar_t
*fatp
)
1094 size_t fatsize
= fsp
->pcfs_fatsec
* fsp
->pcfs_secsize
;
1095 daddr_t start
= fsp
->pcfs_fatstart
;
1097 readsize
= fsp
->pcfs_clsize
;
1098 for (off
= 0; off
< fatsize
; off
+= readsize
, fatp
+= readsize
) {
1099 if (readsize
> (fatsize
- off
))
1100 readsize
= fatsize
- off
;
1101 diskblk
= pc_dbdaddr(fsp
, start
+
1102 pc_cltodb(fsp
, pc_lblkno(fsp
, off
)));
1103 bp
= bread(fsp
->pcfs_xdev
, diskblk
, readsize
);
1104 if (bp
->b_flags
& (B_ERROR
| B_STALE
)) {
1108 bp
->b_flags
|= B_STALE
| B_AGE
;
1109 bcopy(bp
->b_un
.b_addr
, fatp
, readsize
);
1116 * We write the FAT out a _lot_, in order to make sure that it
1117 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1118 * the FAT might be a couple of megabytes, and writing it all out just
1119 * because we created or deleted a small file is painful (especially
1120 * since we do it for each alternate FAT too). So instead, for FAT16 and
1121 * FAT32 we only write out the bit that has changed. We don't clear
1122 * the 'updated' fields here because the caller might be writing out
1123 * several FATs, so the caller must use pc_clear_fatchanges() after
1124 * all FATs have been updated.
1125 * This function doesn't take "start" from fsp->pcfs_dosstart because
1126 * callers can use it to write either the primary or any of the alternate
1130 pc_writefat(struct pcfs
*fsp
, daddr_t start
)
1136 uchar_t
*fatp
= fsp
->pcfs_fatp
;
1137 size_t fatsize
= fsp
->pcfs_fatsec
* fsp
->pcfs_secsize
;
1139 writesize
= fsp
->pcfs_clsize
;
1140 for (off
= 0; off
< fatsize
; off
+= writesize
, fatp
+= writesize
) {
1141 if (writesize
> (fatsize
- off
))
1142 writesize
= fatsize
- off
;
1143 if (!pc_fat_is_changed(fsp
, pc_lblkno(fsp
, off
))) {
1146 bp
= ngeteblk(writesize
);
1147 bp
->b_edev
= fsp
->pcfs_xdev
;
1148 bp
->b_dev
= cmpdev(bp
->b_edev
);
1149 bp
->b_blkno
= pc_dbdaddr(fsp
, start
+
1150 pc_cltodb(fsp
, pc_lblkno(fsp
, off
)));
1151 bcopy(fatp
, bp
->b_un
.b_addr
, writesize
);
1153 error
= geterror(bp
);
1163 * Mark the FAT cluster that 'cn' is stored in as modified.
1166 pc_mark_fat_updated(struct pcfs
*fsp
, pc_cluster32_t cn
)
1171 /* which fat block is the cluster number stored in? */
1172 if (IS_FAT32(fsp
)) {
1173 size
= sizeof (pc_cluster32_t
);
1174 bn
= pc_lblkno(fsp
, cn
* size
);
1175 fsp
->pcfs_fat_changemap
[bn
] = 1;
1176 } else if (IS_FAT16(fsp
)) {
1177 size
= sizeof (pc_cluster16_t
);
1178 bn
= pc_lblkno(fsp
, cn
* size
);
1179 fsp
->pcfs_fat_changemap
[bn
] = 1;
1184 ASSERT(IS_FAT12(fsp
));
1185 off
= cn
+ (cn
>> 1);
1186 bn
= pc_lblkno(fsp
, off
);
1187 fsp
->pcfs_fat_changemap
[bn
] = 1;
1188 /* does this field wrap into the next fat cluster? */
1189 nbn
= pc_lblkno(fsp
, off
+ 1);
1191 fsp
->pcfs_fat_changemap
[nbn
] = 1;
1197 * return whether the FAT cluster 'bn' is updated and needs to
1201 pc_fat_is_changed(struct pcfs
*fsp
, pc_cluster32_t bn
)
1203 return (fsp
->pcfs_fat_changemap
[bn
] == 1);
1207 * Implementation of VFS_FREEVFS() to support forced umounts.
1208 * This is called by the vfs framework after umount, to trigger
1209 * the release of any resources still associated with the given
1210 * vfs_t once the need to keep them has gone away.
1213 pcfs_freevfs(vfs_t
*vfsp
)
1215 struct pcfs
*fsp
= VFSTOPCFS(vfsp
);
1217 mutex_enter(&pcfslock
);
1219 * Purging the FAT closes the device - can't do any more
1222 if (fsp
->pcfs_fatp
!= (uchar_t
*)0)
1224 mutex_exit(&pcfslock
);
1226 VN_RELE(fsp
->pcfs_devvp
);
1227 mutex_destroy(&fsp
->pcfs_lock
);
1228 kmem_free(fsp
, sizeof (*fsp
));
1231 * Allow _fini() to succeed now, if so desired.
1233 atomic_dec_32(&pcfs_mountcount
);
1238 secondaryBPBChecks(struct pcfs
*fsp
, uchar_t
*bpb
, size_t secsize
)
1240 uint32_t ncl
= fsp
->pcfs_ncluster
;
1243 if (bpb_get_FatSz16(bpb
) == 0)
1244 return (FAT_UNKNOWN
);
1246 if (bpb_get_FatSz16(bpb
) * secsize
< ncl
* 2 &&
1247 bpb_get_FatSz16(bpb
) * secsize
>= (3 * ncl
/ 2))
1249 if (bcmp(bpb_FilSysType16(bpb
), "FAT12", 5) == 0)
1251 if (bcmp(bpb_FilSysType16(bpb
), "FAT16", 5) == 0)
1254 switch (bpb_get_Media(bpb
)) {
1262 * Is this reliable - all floppies are FAT12 ?
1267 * Is this reliable - disks are always FAT16 ?
1273 } else if (ncl
<= 65536) {
1274 if (bpb_get_FatSz16(bpb
) == 0 && bpb_get_FatSz32(bpb
) > 0)
1276 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb
)))
1278 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb
)))
1281 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb
)))
1283 if (bpb_get_FatSz16(bpb
) * secsize
< ncl
* 4)
1290 return (FAT_UNKNOWN
);
1294 * Check to see if the BPB we found is correct.
1296 * This looks far more complicated that it needs to be for pure structural
1297 * validation. The reason for this is that parseBPB() is also used for
1298 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1299 * BPB fields (do not) have 'known good' values, even if we (do not) reject
1300 * the BPB when attempting to mount the filesystem.
1302 * Real-world usage of FAT shows there are a lot of corner-case situations
1303 * and, following the specification strictly, invalid filesystems out there.
1304 * Known are situations such as:
1305 * - FAT12/FAT16 filesystems with garbage in either totsec16/32
1306 * instead of the zero in one of the fields mandated by the spec
1307 * - filesystems that claim to be larger than the partition they're in
1308 * - filesystems without valid media descriptor
1309 * - FAT32 filesystems with RootEntCnt != 0
1310 * - FAT32 filesystems with less than 65526 clusters
1311 * - FAT32 filesystems without valid FSI sector
1312 * - FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1314 * Such filesystems are accessible by PCFS - if it'd know to start with that
1315 * the filesystem should be treated as a specific FAT type. Before S10, it
1316 * relied on the PC/fdisk partition type for the purpose and almost completely
1317 * ignored the BPB; now it ignores the partition type for everything.
1319 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1320 * not possible to allow all such mostly-compliant filesystems in unless one
1321 * accepts false positives (definitely invalid filesystems that cause problems
1322 * later). This at least allows to pinpoint why the mount failed.
1324 * Due to the use of FAT on removeable media, all relaxations of the rules
1325 * here need to be carefully evaluated wrt. to potential effects on PCFS
1326 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1330 parseBPB(struct pcfs
*fsp
, uchar_t
*bpb
, int *valid
)
1334 uint32_t ncl
; /* number of clusters in file area */
1337 uint32_t fsisec
, bkbootsec
;
1338 blkcnt_t totsec
, totsec16
, totsec32
, datasec
;
1339 size_t fatsec
, fatsec16
, fatsec32
, rdirsec
;
1342 uint64_t validflags
= 0;
1344 if (VALID_BPBSIG(bpb_get_BPBSig(bpb
)))
1345 validflags
|= BPB_BPBSIG_OK
;
1347 rec
= bpb_get_RootEntCnt(bpb
);
1348 reserved
= bpb_get_RsvdSecCnt(bpb
);
1349 fsisec
= bpb_get_FSInfo32(bpb
);
1350 bkbootsec
= bpb_get_BkBootSec32(bpb
);
1351 totsec16
= (blkcnt_t
)bpb_get_TotSec16(bpb
);
1352 totsec32
= (blkcnt_t
)bpb_get_TotSec32(bpb
);
1353 fatsec16
= bpb_get_FatSz16(bpb
);
1354 fatsec32
= bpb_get_FatSz32(bpb
);
1356 totsec
= totsec16
? totsec16
: totsec32
;
1357 fatsec
= fatsec16
? fatsec16
: fatsec32
;
1359 secsize
= bpb_get_BytesPerSec(bpb
);
1360 if (!VALID_SECSIZE(secsize
))
1361 secsize
= fsp
->pcfs_secsize
;
1362 if (secsize
!= fsp
->pcfs_secsize
) {
1363 PC_DPRINTF2(3, "!pcfs: parseBPB, device (%x.%x):\n",
1364 getmajor(fsp
->pcfs_xdev
),
1365 getminor(fsp
->pcfs_xdev
));
1366 PC_DPRINTF2(3, "!BPB secsize %d != "
1367 "autodetected media block size %d\n",
1368 (int)secsize
, (int)fsp
->pcfs_secsize
);
1370 * This allows mounting lofi images of PCFS partitions
1371 * with sectorsize != DEV_BSIZE. We can't parse the
1372 * partition table on whole-disk images unless the
1373 * (undocumented) "secsize=..." mount option is used,
1374 * but at least this allows us to mount if we have
1375 * an image of a partition.
1378 "!pcfs: Using BPB secsize %d\n", (int)secsize
);
1381 if (fsp
->pcfs_mediasize
== 0) {
1382 mediasize
= (len_t
)totsec
* (len_t
)secsize
;
1384 * This is not an error because not all devices support the
1385 * dkio(7i) mediasize queries, and/or not all devices are
1386 * partitioned. If we have not been able to figure out the
1387 * size of the underlaying medium, we have to trust the BPB.
1389 PC_DPRINTF3(3, "!pcfs: parseBPB: mediasize autodetect failed "
1390 "on device (%x.%x), trusting BPB totsec (%lld Bytes)\n",
1391 getmajor(fsp
->pcfs_xdev
), getminor(fsp
->pcfs_xdev
),
1392 (long long)fsp
->pcfs_mediasize
);
1393 } else if ((len_t
)totsec
* (len_t
)secsize
> fsp
->pcfs_mediasize
) {
1395 "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1396 "FAT BPB mediasize (%lld Bytes).\n"
1397 "truncated filesystem on device (%x.%x), access errors "
1399 (long long)fsp
->pcfs_mediasize
,
1400 (long long)(totsec
* (blkcnt_t
)secsize
),
1401 getmajor(fsp
->pcfs_xdev
), getminor(fsp
->pcfs_xdev
));
1402 mediasize
= fsp
->pcfs_mediasize
;
1405 * This is actually ok. A FAT needs not occupy the maximum
1406 * space available in its partition, it can be shorter.
1408 mediasize
= (len_t
)totsec
* (len_t
)secsize
;
1412 * Since we let just about anything pass through this function,
1413 * fence against divide-by-zero here.
1416 rdirsec
= roundup(rec
* 32, secsize
) / secsize
;
1421 * This assignment is necessary before pc_dbdaddr() can first be
1422 * used. Must initialize the value here.
1424 fsp
->pcfs_secsize
= secsize
;
1425 fsp
->pcfs_sdshift
= ddi_ffs(secsize
/ DEV_BSIZE
) - 1;
1427 fsp
->pcfs_mediasize
= mediasize
;
1429 fsp
->pcfs_spcl
= bpb_get_SecPerClus(bpb
);
1430 fsp
->pcfs_numfat
= bpb_get_NumFATs(bpb
);
1431 fsp
->pcfs_mediadesc
= bpb_get_Media(bpb
);
1432 fsp
->pcfs_clsize
= secsize
* fsp
->pcfs_spcl
;
1433 fsp
->pcfs_rdirsec
= rdirsec
;
1436 * Remember: All PCFS offset calculations in sectors. Before I/O
1437 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
1438 * necessary so that media with > 512Byte sector sizes work correctly.
1440 fsp
->pcfs_fatstart
= fsp
->pcfs_dosstart
+ reserved
;
1441 fsp
->pcfs_rdirstart
= fsp
->pcfs_fatstart
+ fsp
->pcfs_numfat
* fatsec
;
1442 fsp
->pcfs_datastart
= fsp
->pcfs_rdirstart
+ rdirsec
;
1444 (blkcnt_t
)fatsec
* fsp
->pcfs_numfat
-
1448 DTRACE_PROBE4(fatgeometry
,
1449 blkcnt_t
, totsec
, size_t, fatsec
,
1450 size_t, rdirsec
, blkcnt_t
, datasec
);
1453 * 'totsec' is taken directly from the BPB and guaranteed to fit
1454 * into a 32bit unsigned integer. The calculation of 'datasec',
1455 * on the other hand, could underflow for incorrect values in
1456 * rdirsec/reserved/fatsec. Check for that.
1457 * We also check that the BPB conforms to the FAT specification's
1458 * requirement that either of the 16/32bit total sector counts
1462 (totsec16
== totsec32
|| totsec16
== 0 || totsec32
== 0) &&
1463 datasec
< totsec
&& datasec
<= UINT32_MAX
)
1464 validflags
|= BPB_TOTSEC_OK
;
1466 if ((len_t
)totsec
* (len_t
)secsize
<= mediasize
)
1467 validflags
|= BPB_MEDIASZ_OK
;
1469 if (VALID_SECSIZE(secsize
))
1470 validflags
|= BPB_SECSIZE_OK
;
1471 if (VALID_SPCL(fsp
->pcfs_spcl
))
1472 validflags
|= BPB_SECPERCLUS_OK
;
1473 if (VALID_CLSIZE(fsp
->pcfs_clsize
))
1474 validflags
|= BPB_CLSIZE_OK
;
1475 if (VALID_NUMFATS(fsp
->pcfs_numfat
))
1476 validflags
|= BPB_NUMFAT_OK
;
1477 if (VALID_RSVDSEC(reserved
) && reserved
< totsec
)
1478 validflags
|= BPB_RSVDSECCNT_OK
;
1479 if (VALID_MEDIA(fsp
->pcfs_mediadesc
))
1480 validflags
|= BPB_MEDIADESC_OK
;
1481 if (VALID_BOOTSIG(bpb_get_BootSig16(bpb
)))
1482 validflags
|= BPB_BOOTSIG16_OK
;
1483 if (VALID_BOOTSIG(bpb_get_BootSig32(bpb
)))
1484 validflags
|= BPB_BOOTSIG32_OK
;
1485 if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb
)))
1486 validflags
|= BPB_FSTYPSTR16_OK
;
1487 if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb
)))
1488 validflags
|= BPB_FSTYPSTR32_OK
;
1489 if (VALID_OEMNAME(bpb_OEMName(bpb
)))
1490 validflags
|= BPB_OEMNAME_OK
;
1491 if (bkbootsec
> 0 && bkbootsec
<= reserved
&& fsisec
!= bkbootsec
)
1492 validflags
|= BPB_BKBOOTSEC_OK
;
1493 if (fsisec
> 0 && fsisec
<= reserved
)
1494 validflags
|= BPB_FSISEC_OK
;
1495 if (VALID_JMPBOOT(bpb_jmpBoot(bpb
)))
1496 validflags
|= BPB_JMPBOOT_OK
;
1497 if (VALID_FSVER32(bpb_get_FSVer32(bpb
)))
1498 validflags
|= BPB_FSVER_OK
;
1499 if (VALID_VOLLAB(bpb_VolLab16(bpb
)))
1500 validflags
|= BPB_VOLLAB16_OK
;
1501 if (VALID_VOLLAB(bpb_VolLab32(bpb
)))
1502 validflags
|= BPB_VOLLAB32_OK
;
1503 if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb
)))
1504 validflags
|= BPB_EXTFLAGS_OK
;
1507 * Try to determine which FAT format to use.
1509 * Calculate the number of clusters in order to determine
1510 * the type of FAT we are looking at. This is the only
1511 * recommended way of determining FAT type, though there
1512 * are other hints in the data, this is the best way.
1514 * Since we let just about "anything" pass through this function
1515 * without early exits, fence against divide-by-zero here.
1517 * datasec was already validated against UINT32_MAX so we know
1518 * the result will not overflow the 32bit calculation.
1521 ncl
= (uint32_t)datasec
/ fsp
->pcfs_spcl
;
1525 fsp
->pcfs_ncluster
= ncl
;
1528 * From the Microsoft FAT specification:
1529 * In the following example, when it says <, it does not mean <=.
1530 * Note also that the numbers are correct. The first number for
1531 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
1532 * and the '<' signs are not wrong.
1534 * We "specialdetect" the corner cases, and use at least one "extra"
1535 * criterion to decide whether it's FAT16 or FAT32 if the cluster
1536 * count is dangerously close to the boundaries.
1539 if (ncl
<= PCF_FIRSTCLUSTER
) {
1541 } else if (ncl
< 4085) {
1543 } else if (ncl
<= 4096) {
1544 type
= FAT_QUESTIONABLE
;
1545 } else if (ncl
< 65525) {
1547 } else if (ncl
<= 65536) {
1548 type
= FAT_QUESTIONABLE
;
1549 } else if (ncl
< PCF_LASTCLUSTER32
) {
1555 DTRACE_PROBE4(parseBPB__initial
,
1556 struct pcfs
*, fsp
, unsigned char *, bpb
,
1557 int, validflags
, fattype_t
, type
);
1560 fsp
->pcfs_fatsec
= fatsec
;
1562 /* Do some final sanity checks for each specific type of FAT */
1566 validflags
|= BPB_ROOTENTCNT_OK
;
1567 if ((blkcnt_t
)bpb_get_TotSec16(bpb
) == totsec
||
1568 bpb_get_TotSec16(bpb
) == 0)
1569 validflags
|= BPB_TOTSEC16_OK
;
1570 if ((blkcnt_t
)bpb_get_TotSec32(bpb
) == totsec
||
1571 bpb_get_TotSec32(bpb
) == 0)
1572 validflags
|= BPB_TOTSEC32_OK
;
1573 if (bpb_get_FatSz16(bpb
) == fatsec
)
1574 validflags
|= BPB_FATSZ16_OK
;
1575 if (fatsec
* secsize
>= (ncl
+ PCF_FIRSTCLUSTER
)
1577 validflags
|= BPB_FATSZ_OK
;
1579 validflags
|= BPB_NCLUSTERS_OK
;
1581 fsp
->pcfs_lastclmark
= (PCF_LASTCLUSTER
& 0xfff);
1582 fsp
->pcfs_rootblksize
=
1583 fsp
->pcfs_rdirsec
* secsize
;
1584 fsp
->pcfs_fsistart
= 0;
1586 if ((validflags
& FAT12_VALIDMSK
) != FAT12_VALIDMSK
)
1591 validflags
|= BPB_ROOTENTCNT_OK
;
1592 if ((blkcnt_t
)bpb_get_TotSec16(bpb
) == totsec
||
1593 bpb_get_TotSec16(bpb
) == 0)
1594 validflags
|= BPB_TOTSEC16_OK
;
1595 if ((blkcnt_t
)bpb_get_TotSec32(bpb
) == totsec
||
1596 bpb_get_TotSec32(bpb
) == 0)
1597 validflags
|= BPB_TOTSEC32_OK
;
1598 if (bpb_get_FatSz16(bpb
) == fatsec
)
1599 validflags
|= BPB_FATSZ16_OK
;
1600 if (fatsec
* secsize
>= (ncl
+ PCF_FIRSTCLUSTER
) * 2)
1601 validflags
|= BPB_FATSZ_OK
;
1602 if (ncl
>= 4085 && ncl
< 65525)
1603 validflags
|= BPB_NCLUSTERS_OK
;
1605 fsp
->pcfs_lastclmark
= PCF_LASTCLUSTER
;
1606 fsp
->pcfs_rootblksize
=
1607 fsp
->pcfs_rdirsec
* secsize
;
1608 fsp
->pcfs_fsistart
= 0;
1610 if ((validflags
& FAT16_VALIDMSK
) != FAT16_VALIDMSK
)
1615 validflags
|= BPB_ROOTENTCNT_OK
;
1616 if (bpb_get_TotSec16(bpb
) == 0)
1617 validflags
|= BPB_TOTSEC16_OK
;
1618 if ((blkcnt_t
)bpb_get_TotSec32(bpb
) == totsec
)
1619 validflags
|= BPB_TOTSEC32_OK
;
1620 if (bpb_get_FatSz16(bpb
) == 0)
1621 validflags
|= BPB_FATSZ16_OK
;
1622 if (bpb_get_FatSz32(bpb
) == fatsec
)
1623 validflags
|= BPB_FATSZ32_OK
;
1624 if (fatsec
* secsize
>= (ncl
+ PCF_FIRSTCLUSTER
) * 4)
1625 validflags
|= BPB_FATSZ_OK
;
1626 if (ncl
>= 65525 && ncl
< PCF_LASTCLUSTER32
)
1627 validflags
|= BPB_NCLUSTERS_OK
;
1629 fsp
->pcfs_lastclmark
= PCF_LASTCLUSTER32
;
1630 fsp
->pcfs_rootblksize
= fsp
->pcfs_clsize
;
1631 fsp
->pcfs_fsistart
= fsp
->pcfs_dosstart
+ fsisec
;
1632 if (validflags
& BPB_FSISEC_OK
)
1633 fsp
->pcfs_flags
|= PCFS_FSINFO_OK
;
1634 fsp
->pcfs_rootclnum
= bpb_get_RootClus32(bpb
);
1635 if (pc_validcl(fsp
, fsp
->pcfs_rootclnum
))
1636 validflags
|= BPB_ROOTCLUSTER_OK
;
1639 * Current PCFS code only works if 'pcfs_rdirstart'
1640 * contains the root cluster number on FAT32.
1641 * That's a mis-use and would better be changed.
1643 fsp
->pcfs_rdirstart
= (daddr_t
)fsp
->pcfs_rootclnum
;
1645 if ((validflags
& FAT32_VALIDMSK
) != FAT32_VALIDMSK
)
1648 case FAT_QUESTIONABLE
:
1649 type
= secondaryBPBChecks(fsp
, bpb
, secsize
);
1652 ASSERT(type
== FAT_UNKNOWN
);
1656 ASSERT(type
!= FAT_QUESTIONABLE
);
1658 fsp
->pcfs_fattype
= type
;
1661 *valid
= validflags
;
1663 DTRACE_PROBE4(parseBPB__final
,
1664 struct pcfs
*, fsp
, unsigned char *, bpb
,
1665 int, validflags
, fattype_t
, type
);
1667 if (type
!= FAT_UNKNOWN
) {
1668 ASSERT((secsize
& (DEV_BSIZE
- 1)) == 0);
1669 ASSERT(ISP2(secsize
/ DEV_BSIZE
));
1678 * Detect the device's native block size (sector size).
1680 * Test whether the device is:
1681 * - a floppy device from a known controller type via DKIOCINFO
1682 * - a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
1683 * - a USB floppy drive (identified by drive geometry)
1685 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
1686 * to minimize risks due to slow I/O and user hotplugging / device ejection.
1688 * This might be a bit wasteful on kernel stack space; if anyone's
1689 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
1692 pcfs_device_getinfo(struct pcfs
*fsp
)
1694 dev_t rdev
= fsp
->pcfs_xdev
;
1701 } arg
; /* save stackspace ... */
1702 intptr_t argp
= (intptr_t)&arg
;
1705 int isfloppy
, isremoveable
, ishotpluggable
;
1706 cred_t
*cr
= CRED();
1708 if (ldi_ident_from_dev(rdev
, &li
))
1711 error
= ldi_open_by_dev(&rdev
, OTYP_CHR
, FREAD
, cr
, &lh
, li
);
1712 ldi_ident_release(li
);
1717 * Not sure if this could possibly happen. It'd be a bit like
1718 * fop_open() changing the passed-in vnode ptr. We're just not
1719 * expecting it, needs some thought if triggered ...
1721 ASSERT(fsp
->pcfs_xdev
== rdev
);
1724 * Check for removeable/hotpluggable media.
1726 if (ldi_ioctl(lh
, DKIOCREMOVABLE
,
1727 (intptr_t)&isremoveable
, FKIOCTL
, cr
, NULL
)) {
1730 if (ldi_ioctl(lh
, DKIOCHOTPLUGGABLE
,
1731 (intptr_t)&ishotpluggable
, FKIOCTL
, cr
, NULL
)) {
1736 * Make sure we don't use "half-initialized" values if the ioctls fail.
1738 if (ldi_ioctl(lh
, DKIOCGMEDIAINFO
, argp
, FKIOCTL
, cr
, NULL
)) {
1739 bzero(&arg
, sizeof (arg
));
1740 fsp
->pcfs_mediasize
= 0;
1742 fsp
->pcfs_mediasize
=
1743 (len_t
)arg
.mi
.dki_lbsize
*
1744 (len_t
)arg
.mi
.dki_capacity
;
1747 if (VALID_SECSIZE(arg
.mi
.dki_lbsize
)) {
1748 if (fsp
->pcfs_secsize
== 0) {
1749 fsp
->pcfs_secsize
= arg
.mi
.dki_lbsize
;
1751 ddi_ffs(arg
.mi
.dki_lbsize
/ DEV_BSIZE
) - 1;
1753 PC_DPRINTF4(1, "!pcfs: autodetected media block size "
1754 "%d, device (%x.%x), different from user-provided "
1755 "%d. User override - ignoring autodetect result.\n",
1757 getmajor(fsp
->pcfs_xdev
), getminor(fsp
->pcfs_xdev
),
1760 } else if (arg
.mi
.dki_lbsize
) {
1761 PC_DPRINTF3(1, "!pcfs: autodetected media block size "
1762 "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
1763 "Ignoring autodetect result.\n",
1765 getmajor(fsp
->pcfs_xdev
), getminor(fsp
->pcfs_xdev
));
1769 * We treat the following media types as a floppy by default.
1772 (arg
.mi
.dki_media_type
== DK_FLOPPY
||
1773 arg
.mi
.dki_media_type
== DK_ZIP
||
1774 arg
.mi
.dki_media_type
== DK_JAZ
);
1777 * if this device understands fdio(7I) requests it's
1778 * obviously a floppy drive.
1781 !ldi_ioctl(lh
, FDIOGCHAR
, argp
, FKIOCTL
, cr
, NULL
))
1785 * some devices we like to treat as floppies, but they don't
1786 * understand fdio(7I) requests.
1789 !ldi_ioctl(lh
, DKIOCINFO
, argp
, FKIOCTL
, cr
, NULL
) &&
1790 (arg
.ci
.dki_ctype
== DKC_WDC2880
||
1791 arg
.ci
.dki_ctype
== DKC_NCRFLOPPY
||
1792 arg
.ci
.dki_ctype
== DKC_SMSFLOPPY
||
1793 arg
.ci
.dki_ctype
== DKC_INTEL82077
))
1797 * This is the "final fallback" test - media with
1798 * 2 heads and 80 cylinders are assumed to be floppies.
1799 * This is normally true for USB floppy drives ...
1802 !ldi_ioctl(lh
, DKIOCGGEOM
, argp
, FKIOCTL
, cr
, NULL
) &&
1803 (arg
.gi
.dkg_ncyl
== 80 && arg
.gi
.dkg_nhead
== 2))
1807 * This is similar to the "old" PCFS code that sets this flag
1808 * just based on the media descriptor being 0xf8 (MD_FIXED).
1809 * Should be re-worked. We really need some specialcasing for
1813 fsp
->pcfs_flags
|= PCFS_NOCHK
;
1817 * We automatically disable access time updates if the medium is
1818 * removeable and/or hotpluggable, and the admin did not explicitly
1819 * request access time updates (via the "atime" mount option).
1820 * The majority of flash-based media should fit this category.
1821 * Minimizing write access extends the lifetime of your memory stick !
1823 if (!vfs_optionisset(fsp
->pcfs_vfs
, MNTOPT_ATIME
, NULL
) &&
1824 (isremoveable
|| ishotpluggable
| isfloppy
)) {
1825 fsp
->pcfs_flags
|= PCFS_NOATIME
;
1828 (void) ldi_close(lh
, FREAD
, cr
);
1830 if (fsp
->pcfs_secsize
== 0) {
1831 PC_DPRINTF3(1, "!pcfs: media block size autodetection "
1832 "device (%x.%x) failed, no user-provided fallback. "
1833 "Using %d bytes.\n",
1834 getmajor(fsp
->pcfs_xdev
), getminor(fsp
->pcfs_xdev
),
1836 fsp
->pcfs_secsize
= DEV_BSIZE
;
1837 fsp
->pcfs_sdshift
= 0;
1839 ASSERT(fsp
->pcfs_secsize
% DEV_BSIZE
== 0);
1840 ASSERT(VALID_SECSIZE(fsp
->pcfs_secsize
));
1844 * Get the FAT type for the DOS medium.
1846 * -------------------------
1847 * According to Microsoft:
1848 * The FAT type one of FAT12, FAT16, or FAT32 is determined by the
1849 * count of clusters on the volume and nothing else.
1850 * -------------------------
1854 pc_getfattype(struct pcfs
*fsp
)
1858 struct vnode
*devvp
= fsp
->pcfs_devvp
;
1859 dev_t dev
= devvp
->v_rdev
;
1862 * Detect the native block size of the medium, and attempt to
1863 * detect whether the medium is removeable.
1864 * We do treat removable media (floppies, USB and FireWire disks)
1865 * differently wrt. to the frequency and synchronicity of FAT updates.
1866 * We need to know the media block size in order to be able to
1867 * parse the partition table.
1869 pcfs_device_getinfo(fsp
);
1872 * Unpartitioned media (floppies and some removeable devices)
1873 * don't have a partition table, the FAT BPB is at disk block 0.
1874 * Start out by reading block 0.
1876 fsp
->pcfs_dosstart
= 0;
1877 bp
= bread(dev
, pc_dbdaddr(fsp
, fsp
->pcfs_dosstart
), fsp
->pcfs_secsize
);
1879 if (error
= geterror(bp
))
1883 * Validate the BPB and fill in the instance structure.
1885 if (!parseBPB(fsp
, (uchar_t
*)bp
->b_un
.b_addr
, NULL
)) {
1886 PC_DPRINTF3(1, "!pcfs: pc_getfattype: No FAT BPB on "
1887 "device (%x.%x), disk LBA %u\n",
1888 getmajor(dev
), getminor(dev
),
1889 (uint_t
)pc_dbdaddr(fsp
, fsp
->pcfs_dosstart
));
1894 ASSERT(fsp
->pcfs_fattype
!= FAT_UNKNOWN
);
1898 * Release the buffer used
1907 * Get the file allocation table.
1908 * If there is an old FAT, invalidate it.
1911 pc_getfat(struct pcfs
*fsp
)
1913 struct buf
*bp
= NULL
;
1914 uchar_t
*fatp
= NULL
;
1915 uchar_t
*fat_changemap
= NULL
;
1917 int fat_changemapsize
;
1920 int altfat_mustmatch
= 0;
1921 int fatsize
= fsp
->pcfs_fatsec
* fsp
->pcfs_secsize
;
1923 if (fsp
->pcfs_fatp
) {
1925 * There is a FAT in core.
1926 * If there are open file pcnodes or we have modified it or
1927 * it hasn't timed out yet use the in core FAT.
1928 * Otherwise invalidate it and get a new one
1931 if (fsp
->pcfs_frefs
||
1932 (fsp
->pcfs_flags
& PCFS_FATMOD
) ||
1933 (gethrestime_sec() < fsp
->pcfs_fattime
)) {
1936 mutex_enter(&pcfslock
);
1938 mutex_exit(&pcfslock
);
1945 * Get FAT and check it for validity
1947 fatp
= kmem_alloc(fatsize
, KM_SLEEP
);
1948 error
= pc_readfat(fsp
, fatp
);
1953 fat_changemapsize
= (fatsize
/ fsp
->pcfs_clsize
) + 1;
1954 fat_changemap
= kmem_zalloc(fat_changemapsize
, KM_SLEEP
);
1955 fsp
->pcfs_fatp
= fatp
;
1956 fsp
->pcfs_fat_changemapsize
= fat_changemapsize
;
1957 fsp
->pcfs_fat_changemap
= fat_changemap
;
1960 * The only definite signature check is that the
1961 * media descriptor byte should match the first byte
1964 if (fatp
[0] != fsp
->pcfs_mediadesc
) {
1965 cmn_err(CE_NOTE
, "!pcfs: FAT signature mismatch, "
1966 "media descriptor %x, FAT[0] lowbyte %x\n",
1967 (uint32_t)fsp
->pcfs_mediadesc
, (uint32_t)fatp
[0]);
1968 cmn_err(CE_NOTE
, "!pcfs: Enforcing alternate FAT validation\n");
1969 altfat_mustmatch
= 1;
1973 * Get alternate FATs and check for consistency
1974 * This is an inlined version of pc_readfat().
1975 * Since we're only comparing FAT and alternate FAT,
1976 * there's no reason to let pc_readfat() copy data out
1977 * of the buf. Instead, compare in-situ, one cluster
1980 for (nfat
= 1; nfat
< fsp
->pcfs_numfat
; nfat
++) {
1984 startsec
= pc_dbdaddr(fsp
,
1985 fsp
->pcfs_fatstart
+ nfat
* fsp
->pcfs_fatsec
);
1987 for (off
= 0; off
< fatsize
; off
+= fsp
->pcfs_clsize
) {
1988 daddr_t fatblk
= startsec
+ pc_dbdaddr(fsp
,
1989 pc_cltodb(fsp
, pc_lblkno(fsp
, off
)));
1991 bp
= bread(fsp
->pcfs_xdev
, fatblk
,
1992 MIN(fsp
->pcfs_clsize
, fatsize
- off
));
1993 if (bp
->b_flags
& (B_ERROR
| B_STALE
)) {
1995 "!pcfs: alternate FAT #%d (start LBA %p)"
1996 " read error at offset %ld on device"
1998 nfat
, (void *)(uintptr_t)startsec
, off
,
1999 getmajor(fsp
->pcfs_xdev
),
2000 getminor(fsp
->pcfs_xdev
));
2005 bp
->b_flags
|= B_STALE
| B_AGE
;
2006 if (bcmp(bp
->b_un
.b_addr
, fatp
+ off
,
2007 MIN(fsp
->pcfs_clsize
, fatsize
- off
))) {
2009 "!pcfs: alternate FAT #%d (start LBA %p)"
2010 " corrupted at offset %ld on device"
2012 nfat
, (void *)(uintptr_t)startsec
, off
,
2013 getmajor(fsp
->pcfs_xdev
),
2014 getminor(fsp
->pcfs_xdev
));
2015 if (altfat_mustmatch
) {
2022 bp
= NULL
; /* prevent double release */
2026 fsp
->pcfs_fattime
= gethrestime_sec() + PCFS_DISKTIMEOUT
;
2027 fsp
->pcfs_fatjustread
= 1;
2030 * Retrieve FAT32 fsinfo sector.
2031 * A failure to read this is not fatal to accessing the volume.
2032 * It simply means operations that count or search free blocks
2033 * will have to do a full FAT walk, vs. a possibly quicker lookup
2034 * of the summary information.
2035 * Hence, we log a message but return success overall after this point.
2037 if (IS_FAT32(fsp
) && (fsp
->pcfs_flags
& PCFS_FSINFO_OK
)) {
2038 struct fat_od_fsi
*fsinfo_disk
;
2040 bp
= bread(fsp
->pcfs_xdev
,
2041 pc_dbdaddr(fsp
, fsp
->pcfs_fsistart
), fsp
->pcfs_secsize
);
2042 fsinfo_disk
= (struct fat_od_fsi
*)bp
->b_un
.b_addr
;
2043 if (bp
->b_flags
& (B_ERROR
| B_STALE
) ||
2044 !FSISIG_OK(fsinfo_disk
)) {
2046 "!pcfs: error reading fat32 fsinfo from "
2047 "device (%x.%x), block %lld",
2048 getmajor(fsp
->pcfs_xdev
), getminor(fsp
->pcfs_xdev
),
2049 (long long)pc_dbdaddr(fsp
, fsp
->pcfs_fsistart
));
2050 fsp
->pcfs_flags
&= ~PCFS_FSINFO_OK
;
2051 fsp
->pcfs_fsinfo
.fs_free_clusters
= FSINFO_UNKNOWN
;
2052 fsp
->pcfs_fsinfo
.fs_next_free
= FSINFO_UNKNOWN
;
2054 bp
->b_flags
|= B_STALE
| B_AGE
;
2055 fsinfo_disk
= (fat_od_fsi_t
*)(bp
->b_un
.b_addr
);
2056 fsp
->pcfs_fsinfo
.fs_free_clusters
=
2057 LE_32(fsinfo_disk
->fsi_incore
.fs_free_clusters
);
2058 fsp
->pcfs_fsinfo
.fs_next_free
=
2059 LE_32(fsinfo_disk
->fsi_incore
.fs_next_free
);
2065 if (pc_validcl(fsp
, (pc_cluster32_t
)fsp
->pcfs_fsinfo
.fs_next_free
))
2066 fsp
->pcfs_nxfrecls
= fsp
->pcfs_fsinfo
.fs_next_free
;
2068 fsp
->pcfs_nxfrecls
= PCF_FIRSTCLUSTER
;
2073 cmn_err(CE_NOTE
, "!pcfs: illegal disk format");
2077 kmem_free(fatp
, fatsize
);
2079 kmem_free(fat_changemap
, fat_changemapsize
);
2082 pc_mark_irrecov(fsp
);