4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright (c) 2012 by Delphix. All rights reserved.
29 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
33 * University Copyright- Copyright (c) 1982, 1986, 1988
34 * The Regents of the University of California
37 * University Acknowledgment- Portions of this document are derived from
38 * software developed by the University of California, Berkeley, and its
43 #include <sys/types.h>
44 #include <sys/t_lock.h>
45 #include <sys/param.h>
46 #include <sys/systm.h>
51 #include <sys/sysmacros.h>
53 #include <sys/vnode.h>
54 #include <sys/fs/snode.h>
55 #include <sys/fs/fifonode.h>
56 #include <sys/debug.h>
57 #include <sys/errno.h>
62 #include <sys/termios.h>
63 #include <sys/stream.h>
64 #include <sys/strsubr.h>
65 #include <sys/autoconf.h>
66 #include <sys/esunddi.h>
67 #include <sys/flock.h>
68 #include <sys/modctl.h>
72 struct kmem_cache
*snode_cache
;
75 static struct snode
*sfind(dev_t
, vtype_t
, struct vnode
*);
76 static struct vnode
*get_cvp(dev_t
, vtype_t
, struct snode
*, int *);
77 static void sinsert(struct snode
*);
89 ASSERT(realvp
&& dip
);
90 vp
= specvp(realvp
, dev
, vtyp
, cr
);
93 /* associate a dip hold with the common snode's s_dip pointer */
94 spec_assoc_vp_with_devi(vp
, dip
);
99 * Return a shadow special vnode for the given dev.
100 * If no snode exists for this dev create one and put it
101 * in a table hashed by <dev, realvp>. If the snode for
102 * this dev is already in the table return it (ref count is
103 * incremented by sfind). The snode will be flushed from the
104 * table when spec_inactive calls sdelete.
106 * The fsid is inherited from the real vnode so that clones
123 int used_csp
= 0; /* Did we use pre-allocated csp */
127 if (vp
->v_type
== VFIFO
)
128 return (fifovp(vp
, cr
));
130 ASSERT(vp
->v_type
== type
);
131 ASSERT(vp
->v_rdev
== dev
);
134 * Pre-allocate snodes before holding any locks in case we block
136 nsp
= kmem_cache_alloc(snode_cache
, KM_SLEEP
);
137 csp
= kmem_cache_alloc(snode_cache
, KM_SLEEP
);
140 * Get the time attributes outside of the stable lock since
141 * this operation may block. Unfortunately, it may not have
142 * been required if the snode is in the cache.
144 va
.va_mask
= AT_FSID
| AT_TIMES
;
145 rc
= fop_getattr(vp
, &va
, 0, cr
, NULL
); /* XXX may block! */
147 mutex_enter(&stable_lock
);
148 if ((sp
= sfind(dev
, type
, vp
)) == NULL
) {
151 sp
= nsp
; /* Use pre-allocated snode */
156 sp
->s_commonvp
= NULL
;
166 * Set times in snode to those in the vnode.
168 sp
->s_fsid
= va
.va_fsid
;
169 sp
->s_atime
= va
.va_atime
.tv_sec
;
170 sp
->s_mtime
= va
.va_mtime
.tv_sec
;
171 sp
->s_ctime
= va
.va_ctime
.tv_sec
;
173 sp
->s_fsid
= specdev
;
182 svp
->v_flag
= (vp
->v_flag
& VROOT
);
183 svp
->v_vfsp
= vp
->v_vfsp
;
184 VFS_HOLD(svp
->v_vfsp
);
187 (void) vn_copypath(vp
, svp
);
188 if (type
== VBLK
|| type
== VCHR
) {
189 cvp
= get_cvp(dev
, type
, csp
, &used_csp
);
190 svp
->v_stream
= cvp
->v_stream
;
192 sp
->s_commonvp
= cvp
;
196 mutex_exit(&stable_lock
);
198 /* Didn't use pre-allocated snode so free it */
199 kmem_cache_free(snode_cache
, csp
);
202 mutex_exit(&stable_lock
);
203 /* free unused snode memory */
204 kmem_cache_free(snode_cache
, nsp
);
205 kmem_cache_free(snode_cache
, csp
);
211 * Return a special vnode for the given dev; no vnode is supplied
212 * for it to shadow. Always create a new snode and put it in the
213 * table hashed by <dev, NULL>. The snode will be flushed from the
214 * table when spec_inactive() calls sdelete(). The association of
215 * this node with a attached instance of hardware is not made until
218 * N.B. Assumes caller takes on responsibility of making sure no one
219 * else is creating a snode for (dev, type) at this time.
222 makespecvp(dev_t dev
, vtype_t type
)
225 struct vnode
*svp
, *cvp
;
228 sp
= kmem_cache_alloc(snode_cache
, KM_SLEEP
);
230 cvp
= commonvp(dev
, type
);
231 now
= gethrestime_sec();
234 sp
->s_commonvp
= cvp
;
242 sp
->s_fsid
= specdev
;
250 svp
->v_vfsp
= &spec_vfs
;
251 svp
->v_stream
= cvp
->v_stream
;
256 mutex_enter(&stable_lock
);
258 mutex_exit(&stable_lock
);
265 * This function is called from spec_assoc_vp_with_devi(). That function
266 * associates a "new" dip with a common snode, releasing (any) old dip
267 * in the process. This function (spec_assoc_fence()) looks at the "new dip"
268 * and determines whether the snode should be fenced of or not. As the table
269 * below indicates, the value of old-dip is a don't care for all cases.
271 * old-dip new-dip common-snode
272 * =========================================
273 * Don't care NULL unfence
274 * Don't care retired fence
275 * Don't care not-retired unfence
277 * Since old-dip value is a "don't care", it is not passed into this function.
280 spec_assoc_fence(dev_info_t
*ndip
, vnode_t
*vp
)
286 ASSERT(vn_matchops(vp
, spec_getvnodeops()));
290 mutex_enter(&DEVI(ndip
)->devi_lock
);
291 if (DEVI(ndip
)->devi_flags
& DEVI_RETIRED
)
293 mutex_exit(&DEVI(ndip
)->devi_lock
);
299 /* SFENCED flag only set on common snode */
300 mutex_enter(&csp
->s_lock
);
302 csp
->s_flag
|= SFENCED
;
304 csp
->s_flag
&= ~SFENCED
;
305 mutex_exit(&csp
->s_lock
);
307 FENDBG((CE_NOTE
, "%sfenced common snode (%p) for new dip=%p",
308 fence
? "" : "un", (void *)csp
, (void *)ndip
));
312 * Associate the common snode with a devinfo node. This is called from:
314 * 1) specvp_devfs to associate a specfs node with the dip attached
317 * 2) spec_open after path reconstruction and attach.
319 * 3) From dacf processing to associate a makespecvp node with
320 * the dip that dacf postattach processing is being performed on.
321 * This association is made prior to open to avoid recursion issues.
323 * 4) From ddi_assoc_queue_with_devi to change vnode association as part of
324 * DL_ATTACH/DL_DETACH processing (SDIPSET already set). The call
325 * from ddi_assoc_queue_with_devi may specify a NULL dip.
327 * We put an extra hold on the devinfo node passed in as we establish it as
328 * the new s_dip pointer. Any hold associated with the prior s_dip pointer
329 * is released. The new hold will stay active until another call to
330 * spec_assoc_vp_with_devi or until the common snode is destroyed by
331 * spec_inactive after the last VN_RELE of the common node. This devinfo hold
332 * transfers across a clone open except in the clone_dev case, where the clone
333 * driver is no longer required after open.
335 * When SDIPSET is set and s_dip is NULL, the vnode has an association with
336 * the driver even though there is currently no association with a specific
340 spec_assoc_vp_with_devi(struct vnode
*vp
, dev_info_t
*dip
)
348 * Don't establish a NULL association for a vnode associated with the
349 * clone driver. The qassociate(, -1) call from a streams driver's
350 * open implementation to indicate support for qassociate has the
351 * side-effect of this type of spec_assoc_vp_with_devi call. This
352 * call should not change the the association of the pre-clone
353 * vnode associated with the clone driver, the post-clone newdev
354 * association will be established later by spec_clone().
356 if ((dip
== NULL
) && (getmajor(vp
->v_rdev
) == clone_major
))
361 e_ddi_hold_devi(dip
);
363 csp
= VTOS(VTOS(vp
)->s_commonvp
);
364 mutex_enter(&csp
->s_lock
);
367 csp
->s_flag
|= SDIPSET
;
369 /* If association changes then invalidate cached size */
371 csp
->s_flag
&= ~SSIZEVALID
;
372 mutex_exit(&csp
->s_lock
);
374 spec_assoc_fence(dip
, vp
);
376 /* release the old */
378 ddi_release_devi(olddip
);
382 * Return the held dip associated with the specified snode.
385 spec_hold_devi_by_vp(struct vnode
*vp
)
390 ASSERT(vn_matchops(vp
, spec_getvnodeops()));
392 csp
= VTOS(VTOS(vp
)->s_commonvp
);
395 e_ddi_hold_devi(dip
);
400 * Find a special vnode that refers to the given device
401 * of the given type. Never return a "common" vnode.
402 * Return NULL if a special vnode does not exist.
403 * HOLD the vnode before returning it.
406 specfind(dev_t dev
, vtype_t type
)
411 mutex_enter(&stable_lock
);
412 st
= stable
[STABLEHASH(dev
)];
414 if (st
->s_dev
== dev
) {
416 if (nvp
->v_type
== type
&& st
->s_commonvp
!= nvp
) {
418 mutex_exit(&stable_lock
);
424 mutex_exit(&stable_lock
);
429 * Loop through the snode cache looking for snodes referencing dip.
431 * This function determines if a devinfo node is "BUSY" from the perspective
432 * of having an active vnode associated with the device, which represents a
433 * dependency on the device's services. This function is needed because a
434 * devinfo node can have a non-zero devi_ref and still NOT be "BUSY" when,
435 * for instance, the framework is manipulating the node (has an open
439 * DEVI_REFERENCED - if dip is referenced
440 * DEVI_NOT_REFERENCED - if dip is not referenced
443 devi_stillreferenced(dev_info_t
*dip
)
448 /* if no hold then there can't be an snode with s_dip == dip */
449 if (e_ddi_devi_holdcnt(dip
) == 0)
450 return (DEVI_NOT_REFERENCED
);
452 mutex_enter(&stable_lock
);
453 for (i
= 0; i
< STABLESIZE
; i
++) {
454 for (sp
= stable
[i
]; sp
!= NULL
; sp
= sp
->s_next
) {
455 if (sp
->s_dip
== dip
) {
456 mutex_exit(&stable_lock
);
457 return (DEVI_REFERENCED
);
461 mutex_exit(&stable_lock
);
462 return (DEVI_NOT_REFERENCED
);
466 * Given an snode, returns the open count and the dip
467 * associated with that snode
468 * Assumes the caller holds the appropriate locks
469 * to prevent snode and/or dip from going away.
471 * -1 No associated dip
472 * >= 0 Number of opens.
475 spec_devi_open_count(struct snode
*sp
, dev_info_t
**dipp
)
489 * We are only interested in common snodes. Only common snodes
490 * get their s_count fields bumped up on opens.
492 if (sp
->s_commonvp
!= vp
|| (dip
= sp
->s_dip
) == NULL
)
495 mutex_enter(&sp
->s_lock
);
496 count
= sp
->s_count
+ sp
->s_mapcnt
;
497 if (sp
->s_flag
& SLOCKED
)
499 mutex_exit(&sp
->s_lock
);
507 * Given a device vnode, return the common
508 * vnode associated with it.
511 common_specvp(struct vnode
*vp
)
515 if ((vp
->v_type
!= VBLK
) && (vp
->v_type
!= VCHR
) ||
516 !vn_matchops(vp
, spec_getvnodeops()))
519 return (sp
->s_commonvp
);
523 * Returns a special vnode for the given dev. The vnode is the
524 * one which is "common" to all the snodes which represent the
526 * Similar to commonvp() but doesn't acquire the stable_lock, and
527 * may use a pre-allocated snode provided by caller.
529 static struct vnode
*
533 struct snode
*nsp
, /* pre-allocated snode */
534 int *used_nsp
) /* flag indicating if we use nsp */
539 ASSERT(MUTEX_HELD(&stable_lock
));
540 if ((sp
= sfind(dev
, type
, NULL
)) == NULL
) {
541 sp
= nsp
; /* Use pre-allocated snode */
542 *used_nsp
= 1; /* return value */
546 sp
->s_commonvp
= svp
; /* points to itself */
552 sp
->s_size
= UNKNOWN_SIZE
;
554 sp
->s_fsid
= specdev
;
562 svp
->v_vfsp
= &spec_vfs
;
573 * Returns a special vnode for the given dev. The vnode is the
574 * one which is "common" to all the snodes which represent the
575 * same device. For use ONLY by SPECFS.
578 commonvp(dev_t dev
, vtype_t type
)
580 struct snode
*sp
, *nsp
;
583 /* Pre-allocate snode in case we might block */
584 nsp
= kmem_cache_alloc(snode_cache
, KM_SLEEP
);
586 mutex_enter(&stable_lock
);
587 if ((sp
= sfind(dev
, type
, NULL
)) == NULL
) {
588 sp
= nsp
; /* Use pre-alloced snode */
592 sp
->s_commonvp
= svp
; /* points to itself */
598 sp
->s_size
= UNKNOWN_SIZE
;
600 sp
->s_fsid
= specdev
;
608 svp
->v_vfsp
= &spec_vfs
;
613 mutex_exit(&stable_lock
);
615 mutex_exit(&stable_lock
);
616 /* Didn't need the pre-allocated snode */
617 kmem_cache_free(snode_cache
, nsp
);
623 * Snode lookup stuff.
624 * These routines maintain a table of snodes hashed by dev so
625 * that the snode for an dev can be found if it already exists.
627 struct snode
*stable
[STABLESIZE
];
628 int stablesz
= STABLESIZE
;
629 kmutex_t stable_lock
;
632 * Put a snode in the table.
635 sinsert(struct snode
*sp
)
637 ASSERT(MUTEX_HELD(&stable_lock
));
638 sp
->s_next
= stable
[STABLEHASH(sp
->s_dev
)];
639 stable
[STABLEHASH(sp
->s_dev
)] = sp
;
643 * Remove an snode from the hash table.
644 * The realvp is not released here because spec_inactive() still
645 * needs it to do a spec_fsync().
648 sdelete(struct snode
*sp
)
651 struct snode
*stprev
= NULL
;
653 ASSERT(MUTEX_HELD(&stable_lock
));
654 st
= stable
[STABLEHASH(sp
->s_dev
)];
658 stable
[STABLEHASH(sp
->s_dev
)] = st
->s_next
;
660 stprev
->s_next
= st
->s_next
;
669 * Lookup an snode by <dev, type, vp>.
670 * ONLY looks for snodes with non-NULL s_realvp members and
671 * common snodes (with s_commonvp pointing to its vnode).
673 * If vp is NULL, only return commonvp. Otherwise return
674 * shadow vp with both shadow and common vp's VN_HELD.
676 static struct snode
*
685 ASSERT(MUTEX_HELD(&stable_lock
));
686 st
= stable
[STABLEHASH(dev
)];
689 if (st
->s_dev
== dev
&& svp
->v_type
== type
&&
690 VN_CMP(st
->s_realvp
, vp
) &&
691 (vp
!= NULL
|| st
->s_commonvp
== svp
) &&
692 (vp
== NULL
|| st
->s_realvp
->v_vfsp
== vp
->v_vfsp
)) {
702 * Mark the accessed, updated, or changed times in an snode
703 * with the current time.
706 smark(struct snode
*sp
, int flag
)
708 time_t now
= gethrestime_sec();
710 /* check for change to avoid unnecessary locking */
711 ASSERT((flag
& ~(SACC
|SUPD
|SCHG
)) == 0);
712 if (((flag
& sp
->s_flag
) != flag
) ||
713 ((flag
& SACC
) && (sp
->s_atime
!= now
)) ||
714 ((flag
& SUPD
) && (sp
->s_mtime
!= now
)) ||
715 ((flag
& SCHG
) && (sp
->s_ctime
!= now
))) {
716 /* lock and update */
717 mutex_enter(&sp
->s_lock
);
725 mutex_exit(&sp
->s_lock
);
730 * Return the maximum file offset permitted for this device.
731 * -1 means unrestricted. SLOFFSET is associated with D_64BIT.
733 * On a 32-bit kernel this will limit:
734 * o D_64BIT devices to SPEC_MAXOFFSET_T.
735 * o non-D_64BIT character drivers to a 32-bit offset (MAXOFF_T).
738 spec_maxoffset(struct vnode
*vp
)
740 struct snode
*sp
= VTOS(vp
);
741 struct snode
*csp
= VTOS(sp
->s_commonvp
);
744 return ((offset_t
)-1);
745 else if (csp
->s_flag
& SANYOFFSET
) /* D_U64BIT */
746 return ((offset_t
)-1);
748 if (csp
->s_flag
& SLOFFSET
) /* D_64BIT */
749 return (SPEC_MAXOFFSET_T
);
756 snode_constructor(void *buf
, void *cdrarg
, int kmflags
)
758 struct snode
*sp
= buf
;
761 vp
= sp
->s_vnode
= vn_alloc(kmflags
);
765 vn_setops(vp
, spec_getvnodeops());
768 mutex_init(&sp
->s_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
769 cv_init(&sp
->s_cv
, NULL
, CV_DEFAULT
, NULL
);
775 snode_destructor(void *buf
, void *cdrarg
)
777 struct snode
*sp
= buf
;
778 struct vnode
*vp
= STOV(sp
);
780 mutex_destroy(&sp
->s_lock
);
781 cv_destroy(&sp
->s_cv
);
786 static const struct vfsops spec_vfsops
= {
787 .vfs_sync
= spec_sync
,
791 specinit(int fstype
, char *name
)
797 * Associate vfs and vnode operations.
799 error
= vfs_setfsops(fstype
, &spec_vfsops
);
801 cmn_err(CE_WARN
, "specinit: bad fstype");
805 mutex_init(&stable_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
806 mutex_init(&spec_syncbusy
, NULL
, MUTEX_DEFAULT
, NULL
);
811 snode_cache
= kmem_cache_create("snode_cache", sizeof (struct snode
),
812 0, snode_constructor
, snode_destructor
, NULL
, NULL
, NULL
, 0);
815 * Associate vfs operations with spec_vfs
817 VFS_INIT(&spec_vfs
, &spec_vfsops
, NULL
);
818 if ((dev
= getudev()) == -1)
820 specdev
= makedevice(dev
, 0);
825 device_close(struct vnode
*vp
, int flag
, struct cred
*cr
)
827 struct snode
*sp
= VTOS(vp
);
828 enum vtype type
= vp
->v_type
;
834 cvp
= sp
->s_commonvp
;
840 if (cvp
->v_stream
!= NULL
)
841 error
= strclose(cvp
, flag
, cr
);
844 error
= dev_close(dev
, flag
, OTYP_CHR
, cr
);
849 * On last close a block device we must
850 * invalidate any in-core blocks so that we
851 * can, for example, change floppy disks.
853 (void) spec_putpage(cvp
, (offset_t
)0,
854 (size_t)0, B_INVAL
|B_FORCE
, cr
, NULL
);
857 error
= dev_close(dev
, flag
, OTYP_BLK
, cr
);
860 panic("device_close: not a device");
868 makectty(vnode_t
*ovp
)
872 if (vp
= makespecvp(ovp
->v_rdev
, VCHR
)) {
878 cvp
= sp
->s_commonvp
;
880 mutex_enter(&csp
->s_lock
);
882 mutex_exit(&csp
->s_lock
);
889 spec_snode_walk(int (*callback
)(struct snode
*sp
, void *arg
), void *arg
)
896 mutex_enter(&stable_lock
);
897 for (i
= 0; i
< STABLESIZE
; i
++) {
898 for (sp
= stable
[i
]; sp
; sp
= sp
->s_next
) {
899 if (callback(sp
, arg
) != DDI_WALK_CONTINUE
)
904 mutex_exit(&stable_lock
);
908 spec_is_clone(vnode_t
*vp
)
912 if (vn_matchops(vp
, spec_getvnodeops())) {
914 return ((sp
->s_flag
& SCLONE
) ? 1 : 0);
921 spec_is_selfclone(vnode_t
*vp
)
925 if (vn_matchops(vp
, spec_getvnodeops())) {
927 return ((sp
->s_flag
& SSELFCLONE
) ? 1 : 0);
934 * We may be invoked with a NULL vp in which case we fence off
935 * all snodes associated with dip
938 spec_fence_snode(dev_info_t
*dip
, struct vnode
*vp
)
950 mutex_enter(&DEVI(dip
)->devi_lock
);
951 if (DEVI(dip
)->devi_flags
& DEVI_RETIRED
)
953 mutex_exit(&DEVI(dip
)->devi_lock
);
958 path
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
959 (void) ddi_pathname(dip
, path
);
963 ASSERT(vn_matchops(vp
, spec_getvnodeops()));
966 mutex_enter(&csp
->s_lock
);
967 csp
->s_flag
|= SFENCED
;
968 mutex_exit(&csp
->s_lock
);
969 FENDBG((CE_NOTE
, "fenced off snode(%p) for dip: %s",
971 kmem_free(path
, MAXPATHLEN
);
976 mutex_enter(&stable_lock
);
977 for (i
= 0; i
< STABLESIZE
; i
++) {
978 for (sp
= stable
[i
]; sp
!= NULL
; sp
= sp
->s_next
) {
979 ASSERT(sp
->s_commonvp
);
980 csp
= VTOS(sp
->s_commonvp
);
981 if (csp
->s_dip
== dip
) {
982 /* fence off the common snode */
983 mutex_enter(&csp
->s_lock
);
984 csp
->s_flag
|= SFENCED
;
985 mutex_exit(&csp
->s_lock
);
987 FENDBG((CE_NOTE
, "fenced 1 of N"));
993 mutex_exit(&stable_lock
);
995 FENDBG((CE_NOTE
, "fenced off all snodes for dip: %s", path
));
996 kmem_free(path
, MAXPATHLEN
);
1003 spec_unfence_snode(dev_info_t
*dip
)
1013 path
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
1014 (void) ddi_pathname(dip
, path
);
1017 mutex_enter(&stable_lock
);
1018 for (i
= 0; i
< STABLESIZE
; i
++) {
1019 for (sp
= stable
[i
]; sp
!= NULL
; sp
= sp
->s_next
) {
1020 ASSERT(sp
->s_commonvp
);
1021 csp
= VTOS(sp
->s_commonvp
);
1023 if (csp
->s_dip
== dip
) {
1024 /* unfence the common snode */
1025 mutex_enter(&csp
->s_lock
);
1026 csp
->s_flag
&= ~SFENCED
;
1027 mutex_exit(&csp
->s_lock
);
1029 FENDBG((CE_NOTE
, "unfenced 1 of N"));
1035 mutex_exit(&stable_lock
);
1037 FENDBG((CE_NOTE
, "unfenced all snodes for dip: %s", path
));
1038 kmem_free(path
, MAXPATHLEN
);
1044 spec_size_invalidate(dev_t dev
, vtype_t type
)
1049 mutex_enter(&stable_lock
);
1050 if ((csp
= sfind(dev
, type
, NULL
)) != NULL
) {
1051 mutex_enter(&csp
->s_lock
);
1052 csp
->s_flag
&= ~SSIZEVALID
;
1053 VN_RELE_ASYNC(STOV(csp
), system_taskq
);
1054 mutex_exit(&csp
->s_lock
);
1056 mutex_exit(&stable_lock
);