4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2017 by Delphix. All rights reserved.
29 #include <sys/param.h>
30 #include <sys/t_lock.h>
31 #include <sys/errno.h>
32 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
36 #include <sys/vnode.h>
40 #include <sys/cmn_err.h>
41 #include <sys/debug.h>
43 #include <sys/fs/pc_label.h>
44 #include <sys/fs/pc_fs.h>
45 #include <sys/fs/pc_dir.h>
46 #include <sys/fs/pc_node.h>
47 #include <sys/dirent.h>
52 struct pchead pcfhead
[NPCHASH
];
53 struct pchead pcdhead
[NPCHASH
];
55 extern krwlock_t pcnodes_lock
;
57 static int pc_getentryblock(struct pcnode
*, struct buf
**);
58 static int syncpcp(struct pcnode
*, int);
61 * fake entry for root directory, since this does not have a parent
64 struct pcdir pcfs_rootdirentry
= {
73 struct pchead
*hdp
, *hfp
;
75 for (i
= 0; i
< NPCHASH
; i
++) {
78 hdp
->pch_forw
= (struct pcnode
*)hdp
;
79 hdp
->pch_back
= (struct pcnode
*)hdp
;
80 hfp
->pch_forw
= (struct pcnode
*)hfp
;
81 hfp
->pch_back
= (struct pcnode
*)hfp
;
87 struct pcfs
*fsp
, /* filsystem for node */
88 daddr_t blkno
, /* phys block no of dir entry */
89 int offset
, /* offset of dir entry in block */
90 struct pcdir
*ep
) /* node dir entry */
95 pc_cluster32_t scluster
;
97 ASSERT(fsp
->pcfs_flags
& PCFS_LOCKED
);
99 ep
= &pcfs_rootdirentry
;
102 scluster
= pc_getstartcluster(fsp
, ep
);
105 * First look for active nodes.
106 * File nodes are identified by the location (blkno, offset) of
107 * its directory entry.
108 * Directory nodes are identified by the starting cluster number
111 if (ep
->pcd_attr
& PCA_DIR
) {
112 hp
= &pcdhead
[PCDHASH(fsp
, scluster
)];
113 rw_enter(&pcnodes_lock
, RW_READER
);
114 for (pcp
= hp
->pch_forw
;
115 pcp
!= (struct pcnode
*)hp
; pcp
= pcp
->pc_forw
) {
116 if ((fsp
== VFSTOPCFS(PCTOV(pcp
)->v_vfsp
)) &&
117 (scluster
== pcp
->pc_scluster
)) {
119 rw_exit(&pcnodes_lock
);
123 rw_exit(&pcnodes_lock
);
125 hp
= &pcfhead
[PCFHASH(fsp
, blkno
, offset
)];
126 rw_enter(&pcnodes_lock
, RW_READER
);
127 for (pcp
= hp
->pch_forw
;
128 pcp
!= (struct pcnode
*)hp
; pcp
= pcp
->pc_forw
) {
129 if ((fsp
== VFSTOPCFS(PCTOV(pcp
)->v_vfsp
)) &&
130 ((pcp
->pc_flags
& PC_INVAL
) == 0) &&
131 (blkno
== pcp
->pc_eblkno
) &&
132 (offset
== pcp
->pc_eoffset
)) {
134 rw_exit(&pcnodes_lock
);
138 rw_exit(&pcnodes_lock
);
141 * Cannot find node in active list. Allocate memory for a new node
142 * initialize it, and put it on the active list.
144 pcp
= kmem_zalloc(sizeof (struct pcnode
), KM_SLEEP
);
145 vp
= vn_alloc(KM_SLEEP
);
148 pcp
->pc_eblkno
= blkno
;
149 pcp
->pc_eoffset
= offset
;
150 pcp
->pc_scluster
= scluster
;
151 pcp
->pc_lcluster
= scluster
;
154 if (ep
->pcd_attr
& PCA_DIR
) {
155 vn_setops(vp
, &pcfs_dvnodeops
);
161 pc_cluster32_t ncl
= 0;
163 scluster
= fsp
->pcfs_rdirstart
;
164 if (pc_fileclsize(fsp
, scluster
, &ncl
)) {
165 PC_DPRINTF1(2, "cluster chain "
166 "corruption, scluster=%d\n",
168 pcp
->pc_flags
|= PC_INVAL
;
170 pcp
->pc_size
= fsp
->pcfs_clsize
* ncl
;
173 fsp
->pcfs_rdirsec
* fsp
->pcfs_secsize
;
176 pc_cluster32_t ncl
= 0;
178 if (pc_fileclsize(fsp
, scluster
, &ncl
)) {
179 PC_DPRINTF1(2, "cluster chain corruption, "
180 "scluster=%d\n", scluster
);
181 pcp
->pc_flags
|= PC_INVAL
;
183 pcp
->pc_size
= fsp
->pcfs_clsize
* ncl
;
186 vn_setops(vp
, &pcfs_fvnodeops
);
188 vp
->v_flag
= VNOSWAP
;
190 pcp
->pc_size
= ltohi(ep
->pcd_size
);
193 VFS_HOLD(PCFSTOVFS(fsp
));
194 vp
->v_data
= (caddr_t
)pcp
;
195 vp
->v_vfsp
= PCFSTOVFS(fsp
);
197 rw_enter(&pcnodes_lock
, RW_WRITER
);
199 rw_exit(&pcnodes_lock
);
204 syncpcp(struct pcnode
*pcp
, int flags
)
207 if (!vn_has_cached_data(PCTOV(pcp
)))
210 err
= fop_putpage(PCTOV(pcp
), 0, 0, flags
,
217 pc_rele(struct pcnode
*pcp
)
224 PC_DPRINTF1(8, "pc_rele vp=0x%p\n", (void *)vp
);
226 fsp
= VFSTOPCFS(vp
->v_vfsp
);
227 ASSERT(fsp
->pcfs_flags
& PCFS_LOCKED
);
229 rw_enter(&pcnodes_lock
, RW_WRITER
);
230 pcp
->pc_flags
|= PC_RELEHOLD
;
233 if (vp
->v_type
!= VDIR
&& (pcp
->pc_flags
& PC_INVAL
) == 0) {
235 * If the file was removed while active it may be safely
239 if (pcp
->pc_entry
.pcd_filename
[0] == PCD_ERASED
) {
240 (void) pc_truncate(pcp
, 0);
241 } else if (pcp
->pc_flags
& PC_CHG
) {
242 (void) pc_nodeupdate(pcp
);
244 err
= syncpcp(pcp
, B_INVAL
);
246 (void) syncpcp(pcp
, B_INVAL
| B_FORCE
);
249 if (vn_has_cached_data(vp
)) {
251 * pvn_vplist_dirty will abort all old pages
253 (void) pvn_vplist_dirty(vp
, 0,
254 pcfs_putapage
, B_INVAL
, NULL
);
257 (void) pc_syncfat(fsp
);
258 mutex_enter(&vp
->v_lock
);
259 if (vn_has_cached_data(vp
)) {
260 mutex_exit(&vp
->v_lock
);
263 ASSERT(!vn_has_cached_data(vp
));
266 if (vp
->v_count
> 0) { /* Is this check still needed? */
267 PC_DPRINTF1(3, "pc_rele: pcp=0x%p HELD AGAIN!\n", (void *)pcp
);
268 mutex_exit(&vp
->v_lock
);
269 pcp
->pc_flags
&= ~PC_RELEHOLD
;
270 rw_exit(&pcnodes_lock
);
275 rw_exit(&pcnodes_lock
);
277 * XXX - old code had a check for !(pcp->pc_flags & PC_INVAL)
278 * here. Seems superfluous/incorrect, but then earlier on PC_INVAL
279 * was never set anywhere in PCFS. Now it is, and we _have_ to drop
280 * the file reference here. Else, we'd screw up umount/modunload.
282 if ((vp
->v_type
== VREG
)) {
286 VFS_RELE(vp
->v_vfsp
);
288 if (fsp
->pcfs_nrefs
< 0) {
289 panic("pc_rele: nrefs count");
291 if (fsp
->pcfs_frefs
< 0) {
292 panic("pc_rele: frefs count");
295 mutex_exit(&vp
->v_lock
);
298 kmem_free(pcp
, sizeof (struct pcnode
));
302 * Mark a pcnode as modified with the current time.
306 pc_mark_mod(struct pcfs
*fsp
, struct pcnode
*pcp
)
310 if (PCTOV(pcp
)->v_type
== VDIR
)
313 ASSERT(PCTOV(pcp
)->v_type
== VREG
);
316 if (pc_tvtopct(&now
, &pcp
->pc_entry
.pcd_mtime
))
317 PC_DPRINTF1(2, "pc_mark_mod failed timestamp "
318 "conversion, curtime = %lld\n",
319 (long long)now
.tv_sec
);
321 pcp
->pc_flags
|= PC_CHG
;
325 * Mark a pcnode as accessed with the current time.
328 pc_mark_acc(struct pcfs
*fsp
, struct pcnode
*pcp
)
330 struct pctime pt
= { 0, 0 };
333 if (fsp
->pcfs_flags
& PCFS_NOATIME
|| PCTOV(pcp
)->v_type
== VDIR
)
336 ASSERT(PCTOV(pcp
)->v_type
== VREG
);
339 if (pc_tvtopct(&now
, &pt
)) {
340 PC_DPRINTF1(2, "pc_mark_acc failed timestamp "
341 "conversion, curtime = %lld\n",
342 (long long)now
.tv_sec
);
347 * We don't really want to write the adate for every access
348 * on flash media; make sure it really changed !
350 if (pcp
->pc_entry
.pcd_ladate
!= pt
.pct_date
) {
351 pcp
->pc_entry
.pcd_ladate
= pt
.pct_date
;
352 pcp
->pc_flags
|= (PC_CHG
| PC_ACC
);
357 * Truncate a file to a length.
358 * Node must be locked.
361 pc_truncate(struct pcnode
*pcp
, uint_t length
)
367 PC_DPRINTF3(4, "pc_truncate pcp=0x%p, len=%u, size=%u\n",
368 (void *)pcp
, length
, pcp
->pc_size
);
370 if (pcp
->pc_flags
& PC_INVAL
)
372 fsp
= VFSTOPCFS(vp
->v_vfsp
);
374 * directories are always truncated to zero and are not marked
376 if (vp
->v_type
== VDIR
) {
377 error
= pc_bfree(pcp
, 0);
381 * If length is the same as the current size
382 * just mark the pcnode and return.
384 if (length
> pcp
->pc_size
) {
386 uint_t llcn
= howmany((offset_t
)length
, fsp
->pcfs_clsize
);
389 * We are extending a file.
390 * Extend it with _one_ call to pc_balloc (no holes)
391 * since we don't need to use the block number(s).
393 if ((daddr_t
)howmany((offset_t
)pcp
->pc_size
, fsp
->pcfs_clsize
) <
395 error
= pc_balloc(pcp
, (daddr_t
)(llcn
- 1), 1, &bno
);
398 pc_cluster32_t ncl
= 0;
399 PC_DPRINTF1(2, "pc_truncate: error=%d\n", error
);
401 * probably ran out disk space;
402 * determine current file size
404 if (pc_fileclsize(fsp
, pcp
->pc_scluster
, &ncl
)) {
405 PC_DPRINTF1(2, "cluster chain corruption, "
406 "scluster=%d\n", pcp
->pc_scluster
);
407 pcp
->pc_flags
|= PC_INVAL
;
409 pcp
->pc_size
= fsp
->pcfs_clsize
* ncl
;
411 pcp
->pc_size
= length
;
413 } else if (length
< pcp
->pc_size
) {
415 * We are shrinking a file.
416 * Free blocks after the block that length points to.
418 if (pc_blkoff(fsp
, length
) == 0) {
420 * Truncation to a block (cluster size) boundary only
421 * requires us to invalidate everything after the new
424 (void) pvn_vplist_dirty(PCTOV(pcp
), (uoff_t
)length
,
425 pcfs_putapage
, B_INVAL
| B_TRUNC
, CRED());
428 * pvn_vpzero() cannot deal with more than MAXBSIZE
429 * chunks. Since the FAT clustersize can get larger
430 * than that, we'll zero from the new length to the
431 * end of the cluster for clustersizes smaller than
432 * MAXBSIZE - or the end of the MAXBSIZE block in
433 * case we've got a large clustersize.
436 roundup(length
, MIN(fsp
->pcfs_clsize
, MAXBSIZE
)) -
439 pvn_vpzero(PCTOV(pcp
), (uoff_t
)length
, nbytes
);
440 (void) pvn_vplist_dirty(PCTOV(pcp
),
441 (uoff_t
)length
+ nbytes
,
442 pcfs_putapage
, B_INVAL
| B_TRUNC
, CRED());
444 error
= pc_bfree(pcp
, (pc_cluster32_t
)
445 howmany((offset_t
)length
, fsp
->pcfs_clsize
));
446 pcp
->pc_size
= length
;
450 * This is the only place in PCFS code where pc_mark_mod() is called
451 * without setting PC_MOD. May be a historical artifact ...
453 pc_mark_mod(fsp
, pcp
);
458 * Get block for entry.
461 pc_getentryblock(struct pcnode
*pcp
, struct buf
**bpp
)
465 fsp
= VFSTOPCFS(PCTOV(pcp
)->v_vfsp
);
466 if (pcp
->pc_eblkno
>= fsp
->pcfs_datastart
||
467 (pcp
->pc_eblkno
- fsp
->pcfs_rdirstart
) <
468 (fsp
->pcfs_rdirsec
& ~(fsp
->pcfs_spcl
- 1))) {
469 *bpp
= bread(fsp
->pcfs_xdev
,
470 pc_dbdaddr(fsp
, pcp
->pc_eblkno
), fsp
->pcfs_clsize
);
472 *bpp
= bread(fsp
->pcfs_xdev
,
473 pc_dbdaddr(fsp
, pcp
->pc_eblkno
),
474 (int)(fsp
->pcfs_datastart
- pcp
->pc_eblkno
) *
477 if ((*bpp
)->b_flags
& B_ERROR
) {
479 pc_mark_irrecov(fsp
);
486 * Sync all data associated with a file.
487 * Flush all the blocks in the buffer cache out to disk, sync the FAT and
488 * update the directory entry.
491 pc_nodesync(struct pcnode
*pcp
)
498 fsp
= VFSTOPCFS(vp
->v_vfsp
);
500 if (pcp
->pc_flags
& PC_MOD
) {
502 * Flush all data blocks from buffer cache and
503 * update the FAT which points to the data.
505 if (err
= syncpcp(pcp
, 0)) { /* %% ?? how to handle error? */
509 pc_mark_irrecov(fsp
);
513 pcp
->pc_flags
&= ~PC_MOD
;
516 * update the directory entry
518 if (pcp
->pc_flags
& PC_CHG
)
519 (void) pc_nodeupdate(pcp
);
524 * Update the node's directory entry.
527 pc_nodeupdate(struct pcnode
*pcp
)
535 fsp
= VFSTOPCFS(vp
->v_vfsp
);
536 if (IS_FAT32(fsp
) && (vp
->v_flag
& VROOT
)) {
537 /* no node to update */
538 pcp
->pc_flags
&= ~(PC_CHG
| PC_MOD
| PC_ACC
);
541 if (vp
->v_flag
& VROOT
) {
542 panic("pc_nodeupdate");
544 if (pcp
->pc_flags
& PC_INVAL
)
546 PC_DPRINTF3(7, "pc_nodeupdate pcp=0x%p, bn=%ld, off=%d\n", (void *)pcp
,
547 pcp
->pc_eblkno
, pcp
->pc_eoffset
);
549 if (error
= pc_getentryblock(pcp
, &bp
)) {
552 if (vp
->v_type
== VREG
) {
553 if (pcp
->pc_flags
& PC_CHG
)
554 pcp
->pc_entry
.pcd_attr
|= PCA_ARCH
;
555 pcp
->pc_entry
.pcd_size
= htoli(pcp
->pc_size
);
557 pc_setstartcluster(fsp
, &pcp
->pc_entry
, pcp
->pc_scluster
);
558 *((struct pcdir
*)(bp
->b_un
.b_addr
+ pcp
->pc_eoffset
)) = pcp
->pc_entry
;
560 error
= geterror(bp
);
564 pc_mark_irrecov(VFSTOPCFS(vp
->v_vfsp
));
566 pcp
->pc_flags
&= ~(PC_CHG
| PC_MOD
| PC_ACC
);
571 * Verify that the disk in the drive is the same one that we
572 * got the pcnode from.
573 * MUST be called with node unlocked.
576 pc_verify(struct pcfs
*fsp
)
581 if (!fsp
|| fsp
->pcfs_flags
& PCFS_IRRECOV
)
584 if (!(fsp
->pcfs_flags
& PCFS_NOCHK
) && fsp
->pcfs_fatp
) {
586 * This "has it been removed" check should better be
587 * modified for removeable media that are not floppies.
588 * dkio-managed devices such as USB/firewire external
589 * disks/memory sticks/floppies (gasp) do not understand
592 PC_DPRINTF1(4, "pc_verify fsp=0x%p\n", (void *)fsp
);
593 error
= cdev_ioctl(fsp
->pcfs_vfs
->vfs_dev
,
594 FDGETCHANGE
, (intptr_t)&fdstatus
, FNATIVE
| FKIOCTL
,
598 if (error
== ENOTTY
|| error
== ENXIO
) {
600 * See comment above. This is a workaround
601 * for removeable media that don't understand
607 "pc_verify: FDGETCHANGE ioctl failed: %d\n",
609 pc_mark_irrecov(fsp
);
611 } else if (fsp
->pcfs_fatjustread
) {
613 * Ignore the results of the ioctl if we just
614 * read the FAT. There is a good chance that
615 * the disk changed bit will be on, because
616 * we've just mounted and we don't want to
617 * give a false positive that the sky is falling.
619 fsp
->pcfs_fatjustread
= 0;
622 * Oddly enough we can't check just one flag here. The
623 * x86 floppy driver sets a different flag
624 * (FDGC_DETECTED) than the sparc driver does.
625 * I think this MAY be a bug, and I filed 4165938
626 * to get someone to look at the behavior
627 * a bit more closely. In the meantime, my testing and
628 * code examination seem to indicate it is safe to
629 * check for either bit being set.
631 if (fdstatus
& (FDGC_HISTORY
| FDGC_DETECTED
)) {
632 PC_DPRINTF0(1, "pc_verify: change detected\n");
633 pc_mark_irrecov(fsp
);
637 if (error
== 0 && fsp
->pcfs_fatp
== NULL
) {
638 error
= pc_getfat(fsp
);
645 * The disk has changed, pulling the rug out from beneath us.
646 * Mark the FS as being in an irrecoverable state.
647 * In a short while we'll clean up.
650 pc_mark_irrecov(struct pcfs
*fsp
)
652 if (!(fsp
->pcfs_flags
& PCFS_NOCHK
)) {
653 if (pc_lockfs(fsp
, 1, 0)) {
655 * Locking failed, which currently would
656 * only happen if the FS were already
657 * marked as hosed. If another reason for
658 * failure were to arise in the future, this
659 * routine would have to change.
664 fsp
->pcfs_flags
|= PCFS_IRRECOV
;
666 "Disk was changed during an update or\n"
667 "an irrecoverable error was encountered.\n"
668 "File damage is possible. To prevent further\n"
669 "damage, this pcfs instance will now be frozen.\n"
670 "Use umount(1M) to release the instance.\n");
671 (void) pc_unlockfs(fsp
);
676 * The disk has been changed!
679 pc_diskchanged(struct pcfs
*fsp
)
681 struct pcnode
*pcp
, *npcp
= NULL
;
684 extern vfs_t EIO_vfs
;
688 * Eliminate all pcnodes (dir & file) associated with this fs.
689 * If the node is internal, ie, no references outside of
690 * pcfs itself, then release the associated vnode structure.
691 * Invalidate the in core FAT.
692 * Invalidate cached data blocks and blocks waiting for I/O.
694 PC_DPRINTF1(1, "pc_diskchanged fsp=0x%p\n", (void *)fsp
);
696 vfsp
= PCFSTOVFS(fsp
);
698 for (hp
= pcdhead
; hp
< &pcdhead
[NPCHASH
]; hp
++) {
699 for (pcp
= hp
->pch_forw
;
700 pcp
!= (struct pcnode
*)hp
; pcp
= npcp
) {
701 npcp
= pcp
-> pc_forw
;
703 if ((vp
->v_vfsp
== vfsp
) &&
704 !(pcp
->pc_flags
& PC_RELEHOLD
)) {
705 mutex_enter(&(vp
)->v_lock
);
706 if (vp
->v_count
> 0) {
707 mutex_exit(&(vp
)->v_lock
);
710 mutex_exit(&(vp
)->v_lock
);
714 vp
->v_vfsp
= &EIO_vfs
;
717 if (!(pcp
->pc_flags
& PC_EXTERNAL
)) {
718 (void) pvn_vplist_dirty(vp
,
720 B_INVAL
| B_TRUNC
, NULL
);
723 kmem_free(pcp
, sizeof (struct pcnode
));
729 for (hp
= pcfhead
; fsp
->pcfs_frefs
&& hp
< &pcfhead
[NPCHASH
]; hp
++) {
730 for (pcp
= hp
->pch_forw
; fsp
->pcfs_frefs
&&
731 pcp
!= (struct pcnode
*)hp
; pcp
= npcp
) {
732 npcp
= pcp
-> pc_forw
;
734 if ((vp
->v_vfsp
== vfsp
) &&
735 !(pcp
->pc_flags
& PC_RELEHOLD
)) {
736 mutex_enter(&(vp
)->v_lock
);
737 if (vp
->v_count
> 0) {
738 mutex_exit(&(vp
)->v_lock
);
741 mutex_exit(&(vp
)->v_lock
);
745 vp
->v_vfsp
= &EIO_vfs
;
748 if (!(pcp
->pc_flags
& PC_EXTERNAL
)) {
749 (void) pvn_vplist_dirty(vp
,
751 B_INVAL
| B_TRUNC
, NULL
);
754 kmem_free(pcp
, sizeof (struct pcnode
));
762 if (fsp
->pcfs_frefs
) {
763 rw_exit(&pcnodes_lock
);
764 panic("pc_diskchanged: frefs");
766 if (fsp
->pcfs_nrefs
) {
767 rw_exit(&pcnodes_lock
);
768 panic("pc_diskchanged: nrefs");
771 if (!(vfsp
->vfs_flag
& VFS_UNMOUNTED
) &&
772 fsp
->pcfs_fatp
!= (uchar_t
*)0) {
775 binval(fsp
->pcfs_xdev
);