4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2017 by Delphix. All rights reserved.
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
32 * University Copyright- Copyright (c) 1982, 1986, 1988
33 * The Regents of the University of California
36 * University Acknowledgment- Portions of this document are derived from
37 * software developed by the University of California, Berkeley, and its
41 #include <sys/types.h>
42 #include <sys/thread.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bitmap.h>
48 #include <sys/cmn_err.h>
51 #include <sys/debug.h>
53 #include <sys/errno.h>
55 #include <sys/fcntl.h>
56 #include <sys/flock.h>
62 #include <sys/sysmacros.h>
65 #include <sys/vnode.h>
68 #include <sys/stream.h>
69 #include <sys/strsubr.h>
70 #include <sys/policy.h>
71 #include <sys/devpolicy.h>
75 #include <sys/session.h>
76 #include <sys/vmsystm.h>
77 #include <sys/vtrace.h>
78 #include <sys/pathname.h>
80 #include <sys/fs/snode.h>
83 #include <vm/seg_map.h>
86 #include <vm/seg_dev.h>
87 #include <vm/seg_vn.h>
89 #include <sys/fs_subr.h>
91 #include <sys/esunddi.h>
92 #include <sys/autoconf.h>
93 #include <sys/sunndi.h>
94 #include <sys/contract/device_impl.h>
97 static int spec_open(struct vnode
**, int, struct cred
*, caller_context_t
*);
98 static int spec_close(struct vnode
*, int, int, offset_t
, struct cred
*,
100 static int spec_read(struct vnode
*, struct uio
*, int, struct cred
*,
102 static int spec_write(struct vnode
*, struct uio
*, int, struct cred
*,
104 static int spec_ioctl(struct vnode
*, int, intptr_t, int, struct cred
*, int *,
106 static int spec_getattr(struct vnode
*, struct vattr
*, int, struct cred
*,
108 static int spec_setattr(struct vnode
*, struct vattr
*, int, struct cred
*,
110 static int spec_access(struct vnode
*, int, int, struct cred
*,
112 static int spec_create(struct vnode
*, char *, vattr_t
*, enum vcexcl
, int,
113 struct vnode
**, struct cred
*, int, caller_context_t
*, vsecattr_t
*);
114 static int spec_fsync(struct vnode
*, int, struct cred
*, caller_context_t
*);
115 static void spec_inactive(struct vnode
*, struct cred
*, caller_context_t
*);
116 static int spec_fid(struct vnode
*, struct fid
*, caller_context_t
*);
117 static int spec_seek(struct vnode
*, offset_t
, offset_t
*, caller_context_t
*);
118 static int spec_frlock(struct vnode
*, int, struct flock64
*, int, offset_t
,
119 struct flk_callback
*, struct cred
*, caller_context_t
*);
120 static int spec_realvp(struct vnode
*, struct vnode
**, caller_context_t
*);
122 static int spec_getpage(struct vnode
*, offset_t
, size_t, uint_t
*, page_t
**,
123 size_t, struct seg
*, caddr_t
, enum seg_rw
, struct cred
*,
125 static int spec_putapage(struct vnode
*, page_t
*, uoff_t
*, size_t *, int,
127 static struct buf
*spec_startio(struct vnode
*, page_t
*, uoff_t
, size_t,
129 static int spec_getapage(struct vnode
*, uoff_t
, size_t, uint_t
*,
130 page_t
**, size_t, struct seg
*, caddr_t
, enum seg_rw
, struct cred
*);
131 static int spec_map(struct vnode
*, offset_t
, struct as
*, caddr_t
*, size_t,
132 uchar_t
, uchar_t
, uint_t
, struct cred
*, caller_context_t
*);
133 static int spec_addmap(struct vnode
*, offset_t
, struct as
*, caddr_t
, size_t,
134 uchar_t
, uchar_t
, uint_t
, struct cred
*, caller_context_t
*);
135 static int spec_delmap(struct vnode
*, offset_t
, struct as
*, caddr_t
, size_t,
136 uint_t
, uint_t
, uint_t
, struct cred
*, caller_context_t
*);
138 static int spec_poll(struct vnode
*, short, int, short *, struct pollhead
**,
140 static int spec_dump(struct vnode
*, caddr_t
, offset_t
, offset_t
,
142 static int spec_pageio(struct vnode
*, page_t
*, uoff_t
, size_t, int,
143 cred_t
*, caller_context_t
*);
145 static int spec_getsecattr(struct vnode
*, vsecattr_t
*, int, struct cred
*,
147 static int spec_setsecattr(struct vnode
*, vsecattr_t
*, int, struct cred
*,
149 static int spec_pathconf(struct vnode
*, int, ulong_t
*, struct cred
*,
152 #define SN_HOLD(csp) { \
153 mutex_enter(&csp->s_lock); \
155 mutex_exit(&csp->s_lock); \
158 #define SN_RELE(csp) { \
159 mutex_enter(&csp->s_lock); \
161 ASSERT((csp->s_count > 0) || (csp->s_vnode->v_stream == NULL)); \
162 mutex_exit(&csp->s_lock); \
165 #define S_ISFENCED(sp) ((VTOS((sp)->s_commonvp))->s_flag & SFENCED)
168 * *PLEASE NOTE*: If you add new entry points to specfs, do
169 * not forget to add support for fencing. A fenced snode
170 * is indicated by the SFENCED flag in the common snode.
171 * If a snode is fenced, determine if your entry point is
172 * a configuration operation (Example: open), a detection
173 * operation (Example: gettattr), an I/O operation (Example: ioctl())
174 * or an unconfiguration operation (Example: close). If it is
175 * a configuration or detection operation, fail the operation
176 * for a fenced snode with an ENXIO or EIO as appropriate. If
177 * it is any other operation, let it through.
180 const struct vnodeops spec_vnodeops
= {
181 .vnop_name
= "specfs",
182 .vop_open
= spec_open
,
183 .vop_close
= spec_close
,
184 .vop_read
= spec_read
,
185 .vop_write
= spec_write
,
186 .vop_ioctl
= spec_ioctl
,
187 .vop_getattr
= spec_getattr
,
188 .vop_setattr
= spec_setattr
,
189 .vop_access
= spec_access
,
190 .vop_create
= spec_create
,
191 .vop_fsync
= spec_fsync
,
192 .vop_inactive
= spec_inactive
,
194 .vop_seek
= spec_seek
,
195 .vop_pathconf
= spec_pathconf
,
196 .vop_frlock
= spec_frlock
,
197 .vop_realvp
= spec_realvp
,
198 .vop_getpage
= spec_getpage
,
199 .vop_putpage
= spec_putpage
,
201 .vop_addmap
= spec_addmap
,
202 .vop_delmap
= spec_delmap
,
203 .vop_poll
= spec_poll
,
204 .vop_dump
= spec_dump
,
205 .vop_pageio
= spec_pageio
,
206 .vop_setsecattr
= spec_setsecattr
,
207 .vop_getsecattr
= spec_getsecattr
,
211 * Return address of spec_vnodeops
213 const struct vnodeops
*
214 spec_getvnodeops(void)
216 return (&spec_vnodeops
);
219 extern vnode_t
*rconsvp
;
222 * Acquire the serial lock on the common snode.
224 #define LOCK_CSP(csp) (void) spec_lockcsp(csp, 0, 1, 0)
225 #define LOCKHOLD_CSP_SIG(csp) spec_lockcsp(csp, 1, 1, 1)
226 #define SYNCHOLD_CSP_SIG(csp, intr) spec_lockcsp(csp, intr, 0, 1)
235 * Synchronize with active SLOCKED snode, optionally checking for a signal and
236 * optionally returning with SLOCKED set and SN_HOLD done. The 'intr'
237 * argument determines if the thread is interruptible by a signal while
238 * waiting, the function returns INTR if interrupted while there is another
239 * thread closing this snonde and LOOP if interrupted otherwise.
240 * When SUCCESS is returned the 'hold' argument determines if the open
241 * count (SN_HOLD) has been incremented and the 'setlock' argument
242 * determines if the function returns with SLOCKED set.
245 spec_lockcsp(struct snode
*csp
, int intr
, int setlock
, int hold
)
247 slock_ret_t ret
= SUCCESS
;
248 mutex_enter(&csp
->s_lock
);
249 while (csp
->s_flag
& SLOCKED
) {
250 csp
->s_flag
|= SWANT
;
252 if (!cv_wait_sig(&csp
->s_cv
, &csp
->s_lock
)) {
253 if (csp
->s_flag
& SCLOSING
)
257 mutex_exit(&csp
->s_lock
);
258 return (ret
); /* interrupted */
261 cv_wait(&csp
->s_cv
, &csp
->s_lock
);
265 csp
->s_flag
|= SLOCKED
;
267 csp
->s_count
++; /* one more open reference : SN_HOLD */
268 mutex_exit(&csp
->s_lock
);
269 return (ret
); /* serialized/locked */
273 * Unlock the serial lock on the common snode
275 #define UNLOCK_CSP_LOCK_HELD(csp) \
276 ASSERT(mutex_owned(&csp->s_lock)); \
277 if (csp->s_flag & SWANT) \
278 cv_broadcast(&csp->s_cv); \
279 csp->s_flag &= ~(SWANT|SLOCKED);
281 #define UNLOCK_CSP(csp) \
282 mutex_enter(&csp->s_lock); \
283 UNLOCK_CSP_LOCK_HELD(csp); \
284 mutex_exit(&csp->s_lock);
287 * compute/return the size of the device
289 #define SPEC_SIZE(csp) \
290 (((csp)->s_flag & SSIZEVALID) ? (csp)->s_size : spec_size(csp))
293 * Compute and return the size. If the size in the common snode is valid then
294 * return it. If not valid then get the size from the driver and set size in
295 * the common snode. If the device has not been attached then we don't ask for
296 * an update from the driver- for non-streams SSIZEVALID stays unset until the
297 * device is attached. A stat of a mknod outside /devices (non-devfs) may
298 * report UNKNOWN_SIZE because the device may not be attached yet (SDIPSET not
299 * established in mknod until open time). An stat in /devices will report the
300 * size correctly. Specfs should always call SPEC_SIZE instead of referring
301 * directly to s_size to initialize/retrieve the size of a device.
303 * XXX There is an inconsistency between block and raw - "unknown" is
304 * UNKNOWN_SIZE for VBLK and 0 for VCHR(raw).
307 spec_size(struct snode
*csp
)
309 struct vnode
*cvp
= STOV(csp
);
319 ASSERT((csp
)->s_commonvp
== cvp
); /* must be common node */
321 /* return cached value */
322 mutex_enter(&csp
->s_lock
);
323 if (csp
->s_flag
& SSIZEVALID
) {
324 mutex_exit(&csp
->s_lock
);
325 return (csp
->s_size
);
328 /* fop_getattr of mknod has not had devcnt restriction applied */
332 /* return non-cached UNKNOWN_SIZE */
333 mutex_exit(&csp
->s_lock
);
334 return ((cvp
->v_type
== VCHR
) ? 0 : UNKNOWN_SIZE
);
337 /* establish cached zero size for streams */
338 if (STREAMSTAB(maj
)) {
340 csp
->s_flag
|= SSIZEVALID
;
341 mutex_exit(&csp
->s_lock
);
346 * Return non-cached UNKNOWN_SIZE if not open.
348 * NB: This check is bogus, calling prop_op(9E) should be gated by
349 * attach, not open. Not having this check however opens up a new
350 * context under which a driver's prop_op(9E) could be called. Calling
351 * prop_op(9E) in this new context has been shown to expose latent
352 * driver bugs (insufficient NULL pointer checks that lead to panic).
353 * We are keeping this open check for now to avoid these panics.
355 if (csp
->s_count
== 0) {
356 mutex_exit(&csp
->s_lock
);
357 return ((cvp
->v_type
== VCHR
) ? 0 : UNKNOWN_SIZE
);
360 /* Return non-cached UNKNOWN_SIZE if not attached. */
361 if (((csp
->s_flag
& SDIPSET
) == 0) || (csp
->s_dip
== NULL
) ||
362 !i_ddi_devi_attached(csp
->s_dip
)) {
363 mutex_exit(&csp
->s_lock
);
364 return ((cvp
->v_type
== VCHR
) ? 0 : UNKNOWN_SIZE
);
370 * Established cached size obtained from the attached driver. Since we
371 * know the devinfo node, for efficiency we use cdev_prop_op directly
372 * instead of [cb]dev_[Ss]size.
374 if (cvp
->v_type
== VCHR
) {
376 plen
= sizeof (size
);
377 if (cdev_prop_op(dev
, devi
, PROP_LEN_AND_VAL_BUF
,
378 DDI_PROP_NOTPROM
| DDI_PROP_DONTPASS
|
379 DDI_PROP_CONSUMER_TYPED
, "Size", (caddr_t
)&size
,
380 &plen
) != DDI_PROP_SUCCESS
) {
381 plen
= sizeof (size32
);
382 if (cdev_prop_op(dev
, devi
, PROP_LEN_AND_VAL_BUF
,
383 DDI_PROP_NOTPROM
| DDI_PROP_DONTPASS
,
384 "size", (caddr_t
)&size32
, &plen
) ==
390 plen
= sizeof (size
);
391 if (cdev_prop_op(dev
, devi
, PROP_LEN_AND_VAL_BUF
,
392 DDI_PROP_NOTPROM
| DDI_PROP_DONTPASS
|
393 DDI_PROP_CONSUMER_TYPED
, "Nblocks", (caddr_t
)&size
,
394 &plen
) != DDI_PROP_SUCCESS
) {
395 plen
= sizeof (size32
);
396 if (cdev_prop_op(dev
, devi
, PROP_LEN_AND_VAL_BUF
,
397 DDI_PROP_NOTPROM
| DDI_PROP_DONTPASS
,
398 "nblocks", (caddr_t
)&size32
, &plen
) ==
403 if (size
!= UNKNOWN_SIZE
) {
404 blksize
= DEV_BSIZE
; /* default */
405 plen
= sizeof (blksize
);
407 /* try to get dev_t specific "blksize" */
408 if (cdev_prop_op(dev
, devi
, PROP_LEN_AND_VAL_BUF
,
409 DDI_PROP_NOTPROM
| DDI_PROP_DONTPASS
,
410 "blksize", (caddr_t
)&blksize
, &plen
) !=
413 * Try for dev_info node "device-blksize".
414 * If this fails then blksize will still be
415 * DEV_BSIZE default value.
417 (void) cdev_prop_op(DDI_DEV_T_ANY
, devi
,
418 PROP_LEN_AND_VAL_BUF
,
419 DDI_PROP_NOTPROM
| DDI_PROP_DONTPASS
,
420 "device-blksize", (caddr_t
)&blksize
, &plen
);
423 /* blksize must be a power of two */
424 ASSERT(BIT_ONLYONESET(blksize
));
425 blkshift
= highbit(blksize
) - 1;
427 /* convert from block size to byte size */
428 if (size
< (MAXOFFSET_T
>> blkshift
))
429 size
= size
<< blkshift
;
436 csp
->s_flag
|= SSIZEVALID
;
438 mutex_exit(&csp
->s_lock
);
443 * This function deal with vnode substitution in the case of
447 spec_clone(struct vnode
**vpp
, dev_t newdev
, int vtype
, struct stdata
*stp
)
449 dev_t dev
= (*vpp
)->v_rdev
;
450 major_t maj
= getmajor(dev
);
451 major_t newmaj
= getmajor(newdev
);
452 int sysclone
= (maj
== clone_major
);
453 int qassociate_used
= 0;
454 struct snode
*oldsp
, *oldcsp
;
455 struct snode
*newsp
, *newcsp
;
456 struct vnode
*newvp
, *newcvp
;
460 ASSERT(dev
!= newdev
);
463 * Check for cloning across different drivers.
464 * We only support this under the system provided clone driver
466 if ((maj
!= newmaj
) && !sysclone
) {
468 "unsupported clone open maj = %u, newmaj = %u",
475 oldcsp
= VTOS(oldsp
->s_commonvp
);
478 newvp
= makespecvp(newdev
, vtype
);
479 ASSERT(newvp
!= NULL
);
481 newcvp
= newsp
->s_commonvp
;
482 newcsp
= VTOS(newcvp
);
485 * Clones inherit fsid, realvp, and dip.
486 * XXX realvp inherit is not occurring, does fstat of clone work?
488 newsp
->s_fsid
= oldsp
->s_fsid
;
490 newsp
->s_flag
|= SCLONE
;
493 newsp
->s_flag
|= SSELFCLONE
;
498 * If we cloned to an opened newdev that already has called
499 * spec_assoc_vp_with_devi (SDIPSET set) then the association is
500 * already established.
502 if (!(newcsp
->s_flag
& SDIPSET
)) {
504 * Establish s_dip association for newdev.
506 * If we trusted the getinfo(9E) DDI_INFO_DEVT2INSTANCE
507 * implementation of all cloning drivers (SCLONE and SELFCLONE)
508 * we would always use e_ddi_hold_devi_by_dev(). We know that
509 * many drivers have had (still have?) problems with
510 * DDI_INFO_DEVT2INSTANCE, so we try to minimize reliance by
511 * detecting drivers that use QASSOCIATE (by looking down the
512 * stream) and setting their s_dip association to NULL.
516 for (dq
= stp
->sd_wrq
; dq
; dq
= dq
->q_next
) {
517 if (_RD(dq
)->q_flag
& _QASSOCIATED
) {
525 if (dip
|| qassociate_used
) {
526 spec_assoc_vp_with_devi(newvp
, dip
);
528 /* derive association from newdev */
529 dip
= e_ddi_hold_devi_by_dev(newdev
, 0);
530 spec_assoc_vp_with_devi(newvp
, dip
);
532 ddi_release_devi(dip
);
538 /* deal with stream stuff */
540 LOCK_CSP(newcsp
); /* synchronize stream open/close */
541 mutex_enter(&newcsp
->s_lock
);
542 newcvp
->v_stream
= newvp
->v_stream
= stp
;
543 stp
->sd_vnode
= newcvp
;
544 stp
->sd_strtab
= STREAMSTAB(newmaj
);
545 mutex_exit(&newcsp
->s_lock
);
549 /* substitute the vnode */
558 spec_open(struct vnode
**vpp
, int flag
, struct cred
*cr
, caller_context_t
*cc
)
562 struct vnode
*vp
, *cvp
;
563 struct snode
*sp
, *csp
;
567 contract_t
*ct
= NULL
;
568 int open_returns_eintr
;
569 slock_ret_t spec_locksp_ret
;
572 flag
&= ~FCREAT
; /* paranoia */
576 ASSERT((vp
->v_type
== VCHR
) || (vp
->v_type
== VBLK
));
577 if ((vp
->v_type
!= VCHR
) && (vp
->v_type
!= VBLK
))
581 * If the VFS_NODEVICES bit was set for the mount,
582 * do not allow opens of special devices.
584 if (sp
->s_realvp
&& (sp
->s_realvp
->v_vfsp
->vfs_flag
& VFS_NODEVICES
))
587 newdev
= dev
= vp
->v_rdev
;
590 * If we are opening a node that has not had spec_assoc_vp_with_devi
591 * called against it (mknod outside /devices or a non-dacf makespecvp
592 * node) then SDIPSET will not be set. In this case we call an
593 * interface which will reconstruct the path and lookup (drive attach)
594 * through devfs (e_ddi_hold_devi_by_dev -> e_ddi_hold_devi_by_path ->
595 * devfs_lookupname). For support of broken drivers that don't call
596 * ddi_create_minor_node for all minor nodes in their instance space,
597 * we call interfaces that operates at the directory/devinfo
598 * (major/instance) level instead of to the leaf/minor node level.
599 * After finding and attaching the dip we associate it with the
600 * common specfs vnode (s_dip), which sets SDIPSET. A DL_DETACH_REQ
601 * to style-2 stream driver may set s_dip to NULL with SDIPSET set.
603 * NOTE: Although e_ddi_hold_devi_by_dev takes a dev_t argument, its
604 * implementation operates at the major/instance level since it only
605 * need to return a dip.
607 cvp
= sp
->s_commonvp
;
609 if (!(csp
->s_flag
& SDIPSET
)) {
610 /* try to attach, return error if we fail */
611 if ((dip
= e_ddi_hold_devi_by_dev(dev
, 0)) == NULL
)
614 /* associate dip with the common snode s_dip */
615 spec_assoc_vp_with_devi(vp
, dip
);
616 ddi_release_devi(dip
); /* from e_ddi_hold_devi_by_dev */
619 /* check if device fenced off */
624 /* verify attach/open exclusion guarantee */
626 ASSERT((dip
== NULL
) || i_ddi_devi_attached(dip
));
629 if ((error
= secpolicy_spec_open(cr
, vp
, flag
)) != 0)
632 /* Verify existance of open(9E) implementation. */
634 if ((maj
>= devcnt
) ||
635 (devopsp
[maj
]->devo_cb_ops
== NULL
) ||
636 (devopsp
[maj
]->devo_cb_ops
->cb_open
== NULL
))
640 * split STREAMS vs. non-STREAMS
642 * If the device is a dual-personality device, then we might want
643 * to allow for a regular OTYP_BLK open. If however it's strictly
644 * a pure STREAMS device, the cb_open entry point will be
645 * nodev() which returns ENXIO. This does make this failure path
646 * somewhat longer, but such attempts to use OTYP_BLK with STREAMS
647 * devices should be exceedingly rare. (Most of the time they will
648 * be due to programmer error.)
650 if ((vp
->v_type
== VCHR
) && (STREAMSTAB(maj
)))
655 * Wait for in progress last close to complete. This guarantees
656 * to the driver writer that we will never be in the drivers
657 * open and close on the same (dev_t, otype) at the same time.
658 * Open count already incremented (SN_HOLD) on non-zero return.
659 * The wait is interruptible by a signal if the driver sets the
660 * D_OPEN_RETURNS_EINTR cb_ops(9S) cb_flag or sets the
661 * ddi-open-returns-eintr(9P) property in its driver.conf.
663 if ((devopsp
[maj
]->devo_cb_ops
->cb_flag
& D_OPEN_RETURNS_EINTR
) ||
664 (devnamesp
[maj
].dn_flags
& DN_OPEN_RETURNS_EINTR
))
665 open_returns_eintr
= 1;
667 open_returns_eintr
= 0;
668 while ((spec_locksp_ret
= SYNCHOLD_CSP_SIG(csp
, open_returns_eintr
)) !=
670 if (spec_locksp_ret
== INTR
)
674 /* non streams open */
675 type
= (vp
->v_type
== VBLK
? OTYP_BLK
: OTYP_CHR
);
676 error
= dev_open(&newdev
, flag
, type
, cr
);
678 /* deal with clone case */
679 if (error
== 0 && dev
!= newdev
) {
680 error
= spec_clone(vpp
, newdev
, vp
->v_type
, NULL
);
682 * bail on clone failure, further processing
683 * results in undefined behaviors.
688 csp
= VTOS(sp
->s_commonvp
);
692 * create contracts only for userland opens
693 * Successful open and cloning is done at this point.
695 if (error
== 0 && !(flag
& FKLYR
)) {
697 spec_type
= (STOV(csp
)->v_type
== VCHR
) ? S_IFCHR
: S_IFBLK
;
698 if (contract_device_open(newdev
, spec_type
, NULL
) != 0) {
704 sp
->s_size
= SPEC_SIZE(csp
);
706 if ((csp
->s_flag
& SNEEDCLOSE
) == 0) {
707 int nmaj
= getmajor(newdev
);
708 mutex_enter(&csp
->s_lock
);
709 /* successful open needs a close later */
710 csp
->s_flag
|= SNEEDCLOSE
;
713 * Invalidate possible cached "unknown" size
714 * established by a fop_getattr while open was in
715 * progress, and the driver might fail prop_op(9E).
717 if (((cvp
->v_type
== VCHR
) && (csp
->s_size
== 0)) ||
718 ((cvp
->v_type
== VBLK
) &&
719 (csp
->s_size
== UNKNOWN_SIZE
)))
720 csp
->s_flag
&= ~SSIZEVALID
;
722 if (devopsp
[nmaj
]->devo_cb_ops
->cb_flag
& D_64BIT
)
723 csp
->s_flag
|= SLOFFSET
;
724 if (devopsp
[nmaj
]->devo_cb_ops
->cb_flag
& D_U64BIT
)
725 csp
->s_flag
|= SLOFFSET
| SANYOFFSET
;
726 mutex_exit(&csp
->s_lock
);
732 * Open failed. If we missed a close operation because
733 * we were trying to get the device open and it is the
734 * last in progress open that is failing then call close.
736 * NOTE: Only non-streams open has this race condition.
738 mutex_enter(&csp
->s_lock
);
739 csp
->s_count
--; /* decrement open count : SN_RELE */
740 if ((csp
->s_count
== 0) && /* no outstanding open */
741 (csp
->s_mapcnt
== 0) && /* no mapping */
742 (csp
->s_flag
& SNEEDCLOSE
)) { /* need a close */
743 csp
->s_flag
&= ~(SNEEDCLOSE
| SSIZEVALID
);
745 /* See comment in spec_close() */
746 if (csp
->s_flag
& (SCLONE
| SSELFCLONE
))
747 csp
->s_flag
&= ~SDIPSET
;
749 csp
->s_flag
|= SCLOSING
;
750 mutex_exit(&csp
->s_lock
);
752 ASSERT(*vpp
!= NULL
);
753 (void) device_close(*vpp
, flag
, cr
);
755 mutex_enter(&csp
->s_lock
);
756 csp
->s_flag
&= ~SCLOSING
;
757 mutex_exit(&csp
->s_lock
);
759 mutex_exit(&csp
->s_lock
);
765 * Lock common snode to prevent any new clone opens on this
766 * stream while one is in progress. This is necessary since
767 * the stream currently associated with the clone device will
768 * not be part of it after the clone open completes. Unfortunately
769 * we don't know in advance if this is a clone
770 * device so we have to lock all opens.
772 * If we fail, it's because of an interrupt - EINTR return is an
773 * expected aspect of opening a stream so we don't need to check
774 * D_OPEN_RETURNS_EINTR. Open count already incremented (SN_HOLD)
775 * on non-zero return.
777 if (LOCKHOLD_CSP_SIG(csp
) != SUCCESS
)
780 error
= stropen(cvp
, &newdev
, flag
, cr
);
783 /* deal with the clone case */
784 if ((error
== 0) && (dev
!= newdev
)) {
785 vp
->v_stream
= cvp
->v_stream
= NULL
;
787 error
= spec_clone(vpp
, newdev
, vp
->v_type
, stp
);
789 * bail on clone failure, further processing
790 * results in undefined behaviors.
795 csp
= VTOS(sp
->s_commonvp
);
796 } else if (error
== 0) {
802 * create contracts only for userland opens
803 * Successful open and cloning is done at this point.
805 if (error
== 0 && !(flag
& FKLYR
)) {
806 /* STREAM is of type S_IFCHR */
807 if (contract_device_open(newdev
, S_IFCHR
, &ct
) != 0) {
809 (void) spec_close(vp
, flag
, 1, 0, cr
, cc
);
815 /* STREAMS devices don't have a size */
816 sp
->s_size
= csp
->s_size
= 0;
818 if (!(stp
->sd_flag
& STRISTTY
) || (flag
& FNOCTTY
))
821 /* try to allocate it as a controlling terminal */
822 if (strctty(stp
) != EINTR
)
825 /* strctty() was interrupted by a signal */
827 /* we only create contracts for userland opens */
828 ASSERT(ttoproc(curthread
));
829 (void) contract_abandon(ct
, ttoproc(curthread
), 0);
831 (void) spec_close(vp
, flag
, 1, 0, cr
, cc
);
836 * Deal with stropen failure.
838 * sd_flag in the stream head cannot change since the
839 * common snode is locked before the call to stropen().
841 if ((stp
!= NULL
) && (stp
->sd_flag
& STREOPENFAIL
)) {
843 * Open failed part way through.
845 mutex_enter(&stp
->sd_lock
);
846 stp
->sd_flag
&= ~STREOPENFAIL
;
847 mutex_exit(&stp
->sd_lock
);
850 (void) spec_close(vp
, flag
, 1, 0, cr
, cc
);
857 * Resolution for STREAMS vs. regular character device: If the
858 * STREAMS open(9e) returns ENOSTR, then try an ordinary device
861 if (error
== ENOSTR
) {
875 caller_context_t
*ct
)
878 struct snode
*sp
, *csp
;
884 if (!(flag
& FKLYR
)) {
885 /* this only applies to closes of devices from userland */
886 cleanlocks(vp
, ttoproc(curthread
)->p_pid
, 0);
887 cleanshares(vp
, ttoproc(curthread
)->p_pid
);
894 /* we allow close to succeed even if device is fenced off */
896 cvp
= sp
->s_commonvp
;
901 ASSERT(type
== VCHR
|| type
== VBLK
);
904 * Prevent close/close and close/open races by serializing closes
905 * on this common snode. Clone opens are held up until after
906 * we have closed this device so the streams linkage is maintained
911 mutex_enter(&csp
->s_lock
);
913 csp
->s_count
--; /* one fewer open reference : SN_RELE */
914 sysclone
= sp
->s_flag
& SCLONE
;
917 * Invalidate size on each close.
919 * XXX We do this on each close because we don't have interfaces that
920 * allow a driver to invalidate the size. Since clearing this on each
921 * close this causes property overhead we skip /dev/null and
922 * /dev/zero to avoid degrading kenbus performance.
924 if (getmajor(dev
) != mm_major
)
925 csp
->s_flag
&= ~SSIZEVALID
;
928 * Only call the close routine when the last open reference through
929 * any [s, v]node goes away. This can be checked by looking at
930 * s_count on the common vnode.
932 if ((csp
->s_count
== 0) && (csp
->s_mapcnt
== 0)) {
933 /* we don't need a close */
934 csp
->s_flag
&= ~(SNEEDCLOSE
| SSIZEVALID
);
937 * A cloning driver may open-clone to the same dev_t that we
938 * are closing before spec_inactive destroys the common snode.
939 * If this occurs the s_dip association needs to be reevaluated.
940 * We clear SDIPSET to force reevaluation in this case. When
941 * reevaluation occurs (by spec_clone after open), if the
942 * devinfo association has changed then the old association
943 * will be released as the new association is established by
944 * spec_assoc_vp_with_devi().
946 if (csp
->s_flag
& (SCLONE
| SSELFCLONE
))
947 csp
->s_flag
&= ~SDIPSET
;
949 csp
->s_flag
|= SCLOSING
;
950 mutex_exit(&csp
->s_lock
);
951 error
= device_close(vp
, flag
, cr
);
954 * Decrement the devops held in clnopen()
957 ddi_rele_driver(getmajor(dev
));
959 mutex_enter(&csp
->s_lock
);
960 csp
->s_flag
&= ~SCLOSING
;
963 UNLOCK_CSP_LOCK_HELD(csp
);
964 mutex_exit(&csp
->s_lock
);
976 caller_context_t
*ct
)
979 struct snode
*sp
= VTOS(vp
);
980 dev_t dev
= sp
->s_dev
;
988 ASSERT(vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
);
991 ASSERT(vp
->v_type
== VCHR
);
993 return (strread(vp
, uiop
, cr
));
996 if (uiop
->uio_resid
== 0)
1000 * Plain old character devices that set D_U64BIT can have
1001 * unrestricted offsets.
1003 maxoff
= spec_maxoffset(vp
);
1004 ASSERT(maxoff
!= -1 || vp
->v_type
== VCHR
);
1006 if (maxoff
!= -1 && (uiop
->uio_loffset
< 0 ||
1007 uiop
->uio_loffset
+ uiop
->uio_resid
> maxoff
))
1010 if (vp
->v_type
== VCHR
) {
1012 ASSERT(vp
->v_stream
== NULL
);
1013 return (cdev_read(dev
, uiop
, cr
));
1020 blkvp
= sp
->s_commonvp
;
1021 bdevsize
= SPEC_SIZE(VTOS(blkvp
));
1027 off
= uiop
->uio_loffset
& (offset_t
)MAXBMASK
;
1028 on
= (size_t)(uiop
->uio_loffset
& MAXBOFFSET
);
1029 n
= (size_t)MIN(MAXBSIZE
- on
, uiop
->uio_resid
);
1030 diff
= bdevsize
- uiop
->uio_loffset
;
1038 error
= vpm_data_copy(blkvp
, (uoff_t
)(off
+ on
),
1039 n
, uiop
, 1, NULL
, 0, S_READ
);
1041 base
= segmap_getmapflt(segkmap
, blkvp
,
1042 (uoff_t
)(off
+ on
), n
, 1, S_READ
);
1044 error
= uiomove(base
+ on
, n
, UIO_READ
, uiop
);
1049 * If we read a whole block, we won't need this
1050 * buffer again soon.
1052 if (n
+ on
== MAXBSIZE
)
1053 flags
= SM_DONTNEED
| SM_FREE
;
1055 error
= vpm_sync_pages(blkvp
, off
, n
, flags
);
1057 error
= segmap_release(segkmap
, base
, flags
);
1061 (void) vpm_sync_pages(blkvp
, off
, n
, 0);
1063 (void) segmap_release(segkmap
, base
, 0);
1065 if (bdevsize
== UNKNOWN_SIZE
) {
1070 } while (error
== 0 && uiop
->uio_resid
> 0 && n
!= 0);
1082 caller_context_t
*ct
)
1085 struct snode
*sp
= VTOS(vp
);
1086 dev_t dev
= sp
->s_dev
;
1092 struct vnode
*blkvp
;
1094 ASSERT(vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
);
1097 ASSERT(vp
->v_type
== VCHR
);
1099 return (strwrite(vp
, uiop
, cr
));
1103 * Plain old character devices that set D_U64BIT can have
1104 * unrestricted offsets.
1106 maxoff
= spec_maxoffset(vp
);
1107 ASSERT(maxoff
!= -1 || vp
->v_type
== VCHR
);
1109 if (maxoff
!= -1 && (uiop
->uio_loffset
< 0 ||
1110 uiop
->uio_loffset
+ uiop
->uio_resid
> maxoff
))
1113 if (vp
->v_type
== VCHR
) {
1115 ASSERT(vp
->v_stream
== NULL
);
1116 return (cdev_write(dev
, uiop
, cr
));
1119 if (uiop
->uio_resid
== 0)
1123 blkvp
= sp
->s_commonvp
;
1124 bdevsize
= SPEC_SIZE(VTOS(blkvp
));
1132 off
= uiop
->uio_loffset
& (offset_t
)MAXBMASK
;
1133 on
= (ulong_t
)(uiop
->uio_loffset
& MAXBOFFSET
);
1134 n
= (size_t)MIN(MAXBSIZE
- on
, uiop
->uio_resid
);
1137 diff
= bdevsize
- uiop
->uio_loffset
;
1146 * Check to see if we can skip reading in the page
1147 * and just allocate the memory. We can do this
1148 * if we are going to rewrite the entire mapping
1149 * or if we are going to write to end of the device
1150 * from the beginning of the mapping.
1152 if (n
== MAXBSIZE
|| (on
== 0 && (off
+ n
) == bdevsize
))
1158 * Touch the page and fault it in if it is not in core
1159 * before segmap_getmapflt or vpm_data_copy can lock it.
1160 * This is to avoid the deadlock if the buffer is mapped
1161 * to the same file through mmap which we want to write.
1163 uio_prefaultpages((long)n
, uiop
);
1166 error
= vpm_data_copy(blkvp
, (uoff_t
)(off
+ on
),
1167 n
, uiop
, !pagecreate
, NULL
, 0, S_WRITE
);
1169 base
= segmap_getmapflt(segkmap
, blkvp
,
1170 (uoff_t
)(off
+ on
), n
, !pagecreate
, S_WRITE
);
1173 * segmap_pagecreate() returns 1 if it calls
1174 * page_create_va() to allocate any pages.
1178 newpage
= segmap_pagecreate(segkmap
, base
+ on
,
1181 error
= uiomove(base
+ on
, n
, UIO_WRITE
, uiop
);
1184 if (!vpm_enable
&& pagecreate
&&
1186 P2ROUNDUP_TYPED(off
+ on
+ n
, PAGESIZE
, offset_t
)) {
1188 * We created pages w/o initializing them completely,
1189 * thus we need to zero the part that wasn't set up.
1190 * This can happen if we write to the end of the device
1191 * or if we had some sort of error during the uiomove.
1196 nmoved
= (uiop
->uio_loffset
- (off
+ on
));
1197 if (nmoved
< 0 || nmoved
> n
) {
1198 panic("spec_write: nmoved bogus");
1201 nzero
= (long)P2ROUNDUP(on
+ n
, PAGESIZE
) -
1203 if (nzero
< 0 || (on
+ nmoved
+ nzero
> MAXBSIZE
)) {
1204 panic("spec_write: nzero bogus");
1207 (void) kzero(base
+ on
+ nmoved
, (size_t)nzero
);
1211 * Unlock the pages which have been allocated by
1212 * page_create_va() in segmap_pagecreate().
1214 if (!vpm_enable
&& newpage
)
1215 segmap_pageunlock(segkmap
, base
+ on
,
1216 (size_t)n
, S_WRITE
);
1222 * Force write back for synchronous write cases.
1224 if (ioflag
& (FSYNC
|FDSYNC
))
1226 else if (n
+ on
== MAXBSIZE
|| IS_SWAPVP(vp
)) {
1228 * Have written a whole block.
1229 * Start an asynchronous write and
1230 * mark the buffer to indicate that
1231 * it won't be needed again soon.
1232 * Push swap files here, since it
1233 * won't happen anywhere else.
1235 flags
= SM_WRITE
| SM_ASYNC
| SM_DONTNEED
;
1237 smark(sp
, SUPD
|SCHG
);
1239 error
= vpm_sync_pages(blkvp
, off
, n
, flags
);
1241 error
= segmap_release(segkmap
, base
, flags
);
1245 (void) vpm_sync_pages(blkvp
, off
, n
, SM_INVAL
);
1247 (void) segmap_release(segkmap
, base
, SM_INVAL
);
1251 } while (error
== 0 && uiop
->uio_resid
> 0 && n
!= 0);
1258 spec_ioctl(struct vnode
*vp
, int cmd
, intptr_t arg
, int mode
, struct cred
*cr
,
1259 int *rvalp
, caller_context_t
*ct
)
1265 if (vp
->v_type
!= VCHR
)
1269 * allow ioctls() to go through even for fenced snodes, as they
1270 * may include unconfiguration operation - for example popping of
1277 error
= strioctl(vp
, cmd
, arg
, mode
, U_TO_K
, cr
, rvalp
);
1279 error
= cdev_ioctl(dev
, cmd
, arg
, mode
, cr
, rvalp
);
1290 caller_context_t
*ct
)
1294 struct vnode
*realvp
;
1296 /* With ATTR_COMM we will not get attributes from realvp */
1297 if (flags
& ATTR_COMM
) {
1299 vp
= sp
->s_commonvp
;
1303 /* we want stat() to fail with ENXIO if the device is fenced off */
1307 realvp
= sp
->s_realvp
;
1309 if (realvp
== NULL
) {
1310 static int snode_shift
= 0;
1313 * Calculate the amount of bitshift to a snode pointer which
1314 * will still keep it unique. See below.
1316 if (snode_shift
== 0)
1317 snode_shift
= highbit(sizeof (struct snode
));
1318 ASSERT(snode_shift
> 0);
1321 * No real vnode behind this one. Fill in the fields
1324 * This code should be refined to return only the
1325 * attributes asked for instead of all of them.
1327 vap
->va_type
= vp
->v_type
;
1329 vap
->va_uid
= vap
->va_gid
= 0;
1330 vap
->va_fsid
= sp
->s_fsid
;
1333 * If the va_nodeid is > MAX_USHORT, then i386 stats might
1334 * fail. So we shift down the snode pointer to try and get
1335 * the most uniqueness into 16-bits.
1337 vap
->va_nodeid
= ((ino64_t
)(uintptr_t)sp
>> snode_shift
) &
1340 vap
->va_rdev
= sp
->s_dev
;
1343 * va_nblocks is the number of 512 byte blocks used to store
1344 * the mknod for the device, not the number of blocks on the
1345 * device itself. This is typically zero since the mknod is
1346 * represented directly in the inode itself.
1348 vap
->va_nblocks
= 0;
1350 error
= fop_getattr(realvp
, vap
, flags
, cr
, ct
);
1355 /* set the size from the snode */
1356 vap
->va_size
= SPEC_SIZE(VTOS(sp
->s_commonvp
));
1357 vap
->va_blksize
= MAXBSIZE
;
1359 mutex_enter(&sp
->s_lock
);
1360 vap
->va_atime
.tv_sec
= sp
->s_atime
;
1361 vap
->va_mtime
.tv_sec
= sp
->s_mtime
;
1362 vap
->va_ctime
.tv_sec
= sp
->s_ctime
;
1363 mutex_exit(&sp
->s_lock
);
1365 vap
->va_atime
.tv_nsec
= 0;
1366 vap
->va_mtime
.tv_nsec
= 0;
1367 vap
->va_ctime
.tv_nsec
= 0;
1379 caller_context_t
*ct
)
1381 struct snode
*sp
= VTOS(vp
);
1382 struct vnode
*realvp
;
1385 /* fail with ENXIO if the device is fenced off */
1389 if (vp
->v_type
== VCHR
&& vp
->v_stream
&& (vap
->va_mask
& AT_SIZE
)) {
1391 * 1135080: O_TRUNC should have no effect on
1392 * named pipes and terminal devices.
1394 ASSERT(vap
->va_mask
== AT_SIZE
);
1398 if ((realvp
= sp
->s_realvp
) == NULL
)
1399 error
= 0; /* no real vnode to update */
1401 error
= fop_setattr(realvp
, vap
, flags
, cr
, ct
);
1404 * If times were changed, update snode.
1406 mutex_enter(&sp
->s_lock
);
1407 if (vap
->va_mask
& AT_ATIME
)
1408 sp
->s_atime
= vap
->va_atime
.tv_sec
;
1409 if (vap
->va_mask
& AT_MTIME
) {
1410 sp
->s_mtime
= vap
->va_mtime
.tv_sec
;
1411 sp
->s_ctime
= gethrestime_sec();
1413 mutex_exit(&sp
->s_lock
);
1424 caller_context_t
*ct
)
1426 struct vnode
*realvp
;
1427 struct snode
*sp
= VTOS(vp
);
1429 /* fail with ENXIO if the device is fenced off */
1433 if ((realvp
= sp
->s_realvp
) != NULL
)
1434 return (fop_access(realvp
, mode
, flags
, cr
, ct
));
1436 return (0); /* Allow all access. */
1440 * This can be called if creat or an open with O_CREAT is done on the root
1441 * of a lofs mount where the mounted entity is a special file.
1454 caller_context_t
*ct
,
1458 struct snode
*sp
= VTOS(dvp
);
1460 /* fail with ENXIO if the device is fenced off */
1464 ASSERT(dvp
&& (dvp
->v_flag
& VROOT
) && *name
== '\0');
1465 if (excl
== NONEXCL
) {
1466 if (mode
&& (error
= spec_access(dvp
, mode
, 0, cr
, ct
)))
1475 * In order to sync out the snode times without multi-client problems,
1476 * make sure the times written out are never earlier than the times
1477 * already set in the vnode.
1484 caller_context_t
*ct
)
1486 struct snode
*sp
= VTOS(vp
);
1487 struct vnode
*realvp
;
1489 struct vattr va
, vatmp
;
1491 /* allow syncing even if device is fenced off */
1493 /* If times didn't change, don't flush anything. */
1494 mutex_enter(&sp
->s_lock
);
1495 if ((sp
->s_flag
& (SACC
|SUPD
|SCHG
)) == 0 && vp
->v_type
!= VBLK
) {
1496 mutex_exit(&sp
->s_lock
);
1499 sp
->s_flag
&= ~(SACC
|SUPD
|SCHG
);
1500 mutex_exit(&sp
->s_lock
);
1501 cvp
= sp
->s_commonvp
;
1502 realvp
= sp
->s_realvp
;
1504 if (vp
->v_type
== VBLK
&& cvp
!= vp
&& vn_has_cached_data(cvp
) &&
1505 (cvp
->v_flag
& VISSWAP
) == 0)
1506 (void) fop_putpage(cvp
, (offset_t
)0, 0, 0, cr
, ct
);
1509 * For devices that support it, force write cache to stable storage.
1510 * We don't need the lock to check s_flags since we can treat
1511 * SNOFLUSH as a hint.
1513 if ((vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
) &&
1514 !(sp
->s_flag
& SNOFLUSH
)) {
1516 struct dk_callback spec_callback
;
1518 spec_callback
.dkc_flag
= FLUSH_VOLATILE
;
1519 spec_callback
.dkc_callback
= NULL
;
1521 /* synchronous flush on volatile cache */
1522 rc
= cdev_ioctl(vp
->v_rdev
, DKIOCFLUSHWRITECACHE
,
1523 (intptr_t)&spec_callback
, FNATIVE
|FKIOCTL
, cr
, &rval
);
1525 if (rc
== ENOTSUP
|| rc
== ENOTTY
) {
1526 mutex_enter(&sp
->s_lock
);
1527 sp
->s_flag
|= SNOFLUSH
;
1528 mutex_exit(&sp
->s_lock
);
1533 * If no real vnode to update, don't flush anything.
1538 vatmp
.va_mask
= AT_ATIME
|AT_MTIME
;
1539 if (fop_getattr(realvp
, &vatmp
, 0, cr
, ct
) == 0) {
1541 mutex_enter(&sp
->s_lock
);
1542 if (vatmp
.va_atime
.tv_sec
> sp
->s_atime
)
1543 va
.va_atime
= vatmp
.va_atime
;
1545 va
.va_atime
.tv_sec
= sp
->s_atime
;
1546 va
.va_atime
.tv_nsec
= 0;
1548 if (vatmp
.va_mtime
.tv_sec
> sp
->s_mtime
)
1549 va
.va_mtime
= vatmp
.va_mtime
;
1551 va
.va_mtime
.tv_sec
= sp
->s_mtime
;
1552 va
.va_mtime
.tv_nsec
= 0;
1554 mutex_exit(&sp
->s_lock
);
1556 va
.va_mask
= AT_ATIME
|AT_MTIME
;
1557 (void) fop_setattr(realvp
, &va
, 0, cr
, ct
);
1559 (void) fop_fsync(realvp
, syncflag
, cr
, ct
);
1565 spec_inactive(struct vnode
*vp
, struct cred
*cr
, caller_context_t
*ct
)
1567 struct snode
*sp
= VTOS(vp
);
1572 * If no one has reclaimed the vnode, remove from the
1575 if (vp
->v_count
< 1) {
1576 panic("spec_inactive: Bad v_count");
1579 mutex_enter(&stable_lock
);
1581 mutex_enter(&vp
->v_lock
);
1583 if (vp
->v_count
!= 0) {
1584 mutex_exit(&vp
->v_lock
);
1585 mutex_exit(&stable_lock
);
1588 mutex_exit(&vp
->v_lock
);
1591 mutex_exit(&stable_lock
);
1593 /* We are the sole owner of sp now */
1594 cvp
= sp
->s_commonvp
;
1599 * If the snode times changed, then update the times
1600 * associated with the "realvp".
1602 if ((sp
->s_flag
& (SACC
|SUPD
|SCHG
)) != 0) {
1604 struct vattr va
, vatmp
;
1606 mutex_enter(&sp
->s_lock
);
1607 sp
->s_flag
&= ~(SACC
|SUPD
|SCHG
);
1608 mutex_exit(&sp
->s_lock
);
1609 vatmp
.va_mask
= AT_ATIME
|AT_MTIME
;
1611 * The user may not own the device, but we
1612 * want to update the attributes anyway.
1614 if (fop_getattr(rvp
, &vatmp
, 0, kcred
, ct
) == 0) {
1615 if (vatmp
.va_atime
.tv_sec
> sp
->s_atime
)
1616 va
.va_atime
= vatmp
.va_atime
;
1618 va
.va_atime
.tv_sec
= sp
->s_atime
;
1619 va
.va_atime
.tv_nsec
= 0;
1621 if (vatmp
.va_mtime
.tv_sec
> sp
->s_mtime
)
1622 va
.va_mtime
= vatmp
.va_mtime
;
1624 va
.va_mtime
.tv_sec
= sp
->s_mtime
;
1625 va
.va_mtime
.tv_nsec
= 0;
1628 va
.va_mask
= AT_ATIME
|AT_MTIME
;
1629 (void) fop_setattr(rvp
, &va
, 0, kcred
, ct
);
1633 ASSERT(!vn_has_cached_data(vp
));
1636 /* if we are sharing another file systems vfs, release it */
1637 if (vp
->v_vfsp
&& (vp
->v_vfsp
!= &spec_vfs
))
1638 VFS_RELE(vp
->v_vfsp
);
1640 /* if we have a realvp, release the realvp */
1644 /* if we have a common, release the common */
1645 if (cvp
&& (cvp
!= vp
)) {
1650 * if this is the last reference to a common vnode, any
1651 * associated stream had better have been closed
1654 ASSERT(cvp
->v_stream
== NULL
);
1659 * if we have a hold on a devinfo node (established by
1660 * spec_assoc_vp_with_devi), release the hold
1663 ddi_release_devi(sp
->s_dip
);
1666 * If we have an associated device policy, release it.
1668 if (sp
->s_plcy
!= NULL
)
1672 * If all holds on the devinfo node are through specfs/devfs
1673 * and we just destroyed the last specfs node associated with the
1674 * device, then the devinfo node reference count should now be
1675 * zero. We can't check this because there may be other holds
1676 * on the node from non file system sources: ddi_hold_devi_by_instance
1679 kmem_cache_free(snode_cache
, sp
);
1683 spec_fid(struct vnode
*vp
, struct fid
*fidp
, caller_context_t
*ct
)
1685 struct vnode
*realvp
;
1686 struct snode
*sp
= VTOS(vp
);
1688 if ((realvp
= sp
->s_realvp
) != NULL
)
1689 return (fop_fid(realvp
, fidp
, ct
));
1700 caller_context_t
*ct
)
1702 offset_t maxoff
= spec_maxoffset(vp
);
1704 if (maxoff
== -1 || *noffp
<= maxoff
)
1714 struct flock64
*bfp
,
1717 struct flk_callback
*flk_cbp
,
1719 caller_context_t
*ct
)
1721 struct snode
*sp
= VTOS(vp
);
1724 csp
= VTOS(sp
->s_commonvp
);
1726 * If file is being mapped, disallow frlock.
1728 if (csp
->s_mapcnt
> 0)
1731 return (fs_frlock(vp
, cmd
, bfp
, flag
, offset
, flk_cbp
, cr
, ct
));
1735 spec_realvp(struct vnode
*vp
, struct vnode
**vpp
, caller_context_t
*ct
)
1739 if ((rvp
= VTOS(vp
)->s_realvp
) != NULL
) {
1741 if (fop_realvp(vp
, &rvp
, ct
) == 0)
1750 * Return all the pages from [off..off + len] in block
1751 * or character device.
1766 caller_context_t
*ct
)
1768 struct snode
*sp
= VTOS(vp
);
1771 ASSERT(sp
->s_commonvp
== vp
);
1774 * XXX Given the above assertion, this might not do
1775 * what is wanted here.
1777 if (vp
->v_flag
& VNOMAP
)
1779 TRACE_4(TR_FAC_SPECFS
, TR_SPECFS_GETPAGE
,
1780 "specfs getpage:vp %p off %llx len %ld snode %p",
1783 switch (vp
->v_type
) {
1788 if (((uoff_t
)off
+ len
) > (SPEC_SIZE(sp
) + PAGEOFFSET
))
1789 return (EFAULT
); /* beyond EOF */
1791 err
= pvn_getpages(spec_getapage
, vp
, (uoff_t
)off
, len
,
1792 protp
, pl
, plsz
, seg
, addr
, rw
, cr
);
1796 cmn_err(CE_NOTE
, "spec_getpage called for character device. "
1797 "Check any non-ON consolidation drivers");
1799 pl
[0] = (page_t
*)0;
1803 panic("spec_getpage: bad v_type 0x%x", vp
->v_type
);
1810 extern int klustsize
; /* set in machdep.c */
1813 int spec_lostpage
; /* number of times we lost original page */
1832 uoff_t io_off1
, io_off2
;
1840 size_t adj_klustsize
;
1845 TRACE_3(TR_FAC_SPECFS
, TR_SPECFS_GETAPAGE
,
1846 "specfs getapage:vp %p off %llx snode %p", vp
, off
, sp
);
1857 size
= SPEC_SIZE(VTOS(sp
->s_commonvp
));
1859 if (spec_ra
&& sp
->s_nextr
== off
)
1864 if (size
== UNKNOWN_SIZE
) {
1866 adj_klustsize
= PAGESIZE
;
1868 adj_klustsize
= dora
? klustsize
: PAGESIZE
;
1872 if ((pagefound
= page_exists(&vp
->v_object
, off
)) == NULL
) {
1873 if (rw
== S_CREATE
) {
1875 * We're allocating a swap slot and it's
1876 * associated page was not found, so allocate
1879 if ((pp
= page_create_va(&vp
->v_object
, off
,
1880 PAGESIZE
, PG_WAIT
, seg
, addr
)) == NULL
) {
1881 panic("spec_getapage: page_create");
1885 sp
->s_nextr
= off
+ PAGESIZE
;
1888 * Need to really do disk I/O to get the page(s).
1890 blkoff
= (off
/ adj_klustsize
) * adj_klustsize
;
1891 if (size
== UNKNOWN_SIZE
) {
1894 if (blkoff
+ adj_klustsize
<= size
)
1895 blksz
= adj_klustsize
;
1898 MIN(size
- blkoff
, adj_klustsize
);
1901 pp
= pvn_read_kluster(vp
, off
, seg
, addr
, &tmpoff
,
1902 &io_len1
, blkoff
, blksz
, 0);
1905 * Make sure the page didn't sneek into the
1906 * cache while we blocked in pvn_read_kluster.
1912 * Zero part of page which we are not
1913 * going to be reading from disk now.
1915 xlen
= (uint_t
)(io_len1
& PAGEOFFSET
);
1917 pagezero(pp
->p_prev
, xlen
, PAGESIZE
- xlen
);
1919 bp
= spec_startio(vp
, pp
, io_off1
, io_len1
,
1920 pl
== NULL
? (B_ASYNC
| B_READ
) : B_READ
);
1921 sp
->s_nextr
= io_off1
+ io_len1
;
1925 if (dora
&& rw
!= S_CREATE
) {
1929 off2
= ((off
/ adj_klustsize
) + 1) * adj_klustsize
;
1930 addr2
= addr
+ (off2
- off
);
1934 * If we are past EOF then don't bother trying
1940 if (off2
+ adj_klustsize
<= size
)
1941 blksz
= adj_klustsize
;
1943 blksz
= MIN(size
- off2
, adj_klustsize
);
1945 pp2
= pvn_read_kluster(vp
, off2
, seg
, addr2
, &tmpoff
,
1946 &io_len2
, off2
, blksz
, 1);
1952 * Zero part of page which we are not
1953 * going to be reading from disk now.
1955 xlen
= (uint_t
)(io_len2
& PAGEOFFSET
);
1957 pagezero(pp2
->p_prev
, xlen
, PAGESIZE
- xlen
);
1959 (void) spec_startio(vp
, pp2
, io_off2
, io_len2
,
1973 pvn_read_done(pp
, B_ERROR
);
1979 se_t se
= (rw
== S_CREATE
? SE_EXCL
: SE_SHARED
);
1981 * Page exists in the cache, acquire the appropriate
1982 * lock. If this fails, start all over again.
1985 if ((pp
= page_lookup(&vp
->v_object
, off
, se
)) == NULL
) {
1992 sp
->s_nextr
= off
+ PAGESIZE
;
1997 pvn_plist_init(pp
, pl
, plsz
, off
, io_len1
, rw
);
2002 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED, B_FORCE}.
2003 * If len == 0, do from off to EOF.
2005 * The normal cases should be len == 0 & off == 0 (entire vp list),
2006 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
2017 caller_context_t
*ct
)
2019 struct snode
*sp
= VTOS(vp
);
2023 size_t io_len
= 0; /* for lint */
2028 ASSERT(vp
->v_count
!= 0);
2030 if (vp
->v_flag
& VNOMAP
)
2033 cvp
= sp
->s_commonvp
;
2034 size
= SPEC_SIZE(VTOS(cvp
));
2036 if (!vn_has_cached_data(vp
) || off
>= size
)
2039 ASSERT(vp
->v_type
== VBLK
&& cvp
== vp
);
2040 TRACE_4(TR_FAC_SPECFS
, TR_SPECFS_PUTPAGE
,
2041 "specfs putpage:vp %p off %llx len %ld snode %p",
2046 * Search the entire vp list for pages >= off.
2048 err
= pvn_vplist_dirty(vp
, off
, spec_putapage
,
2054 * Loop over all offsets in the range [off...off + len]
2055 * looking for pages to deal with. We set limits so
2056 * that we kluster to klustsize boundaries.
2059 for (io_off
= off
; io_off
< eoff
&& io_off
< size
;
2062 * If we are not invalidating, synchronously
2063 * freeing or writing pages use the routine
2064 * page_lookup_nowait() to prevent reclaiming
2065 * them from the free list.
2067 if ((flags
& B_INVAL
) || ((flags
& B_ASYNC
) == 0)) {
2068 pp
= page_lookup(&vp
->v_object
, io_off
,
2069 (flags
& (B_INVAL
| B_FREE
)) ? SE_EXCL
: SE_SHARED
);
2071 pp
= page_lookup_nowait(&vp
->v_object
,
2073 (flags
& B_FREE
) ? SE_EXCL
: SE_SHARED
);
2076 if (pp
== NULL
|| pvn_getdirty(pp
, flags
) == 0)
2079 err
= spec_putapage(vp
, pp
, &tmpoff
, &io_len
,
2085 * "io_off" and "io_len" are returned as
2086 * the range of pages we actually wrote.
2087 * This allows us to skip ahead more quickly
2088 * since several pages may've been dealt
2089 * with by this iteration of the loop.
2099 * Write out a single page, possibly klustering adjacent
2107 uoff_t
*offp
, /* return value */
2108 size_t *lenp
, /* return value */
2112 struct snode
*sp
= VTOS(vp
);
2120 size_t adj_klustsize
;
2124 * Destroy read ahead value since we are really going to write.
2127 size
= SPEC_SIZE(VTOS(sp
->s_commonvp
));
2129 adj_klustsize
= klustsize
;
2131 blkoff
= (pp
->p_offset
/ adj_klustsize
) * adj_klustsize
;
2133 if (blkoff
+ adj_klustsize
<= size
)
2134 blksz
= adj_klustsize
;
2136 blksz
= size
- blkoff
;
2139 * Find a kluster that fits in one contiguous chunk.
2141 pp
= pvn_write_kluster(vp
, pp
, &tmpoff
, &io_len
, blkoff
,
2146 * Check for page length rounding problems
2147 * XXX - Is this necessary?
2149 if (io_off
+ io_len
> size
) {
2150 ASSERT((io_off
+ io_len
) - size
< PAGESIZE
);
2151 io_len
= size
- io_off
;
2154 bp
= spec_startio(vp
, pp
, io_off
, io_len
, B_WRITE
| flags
);
2157 * Wait for i/o to complete if the request is not B_ASYNC.
2159 if ((flags
& B_ASYNC
) == 0) {
2162 pvn_write_done(pp
, ((err
) ? B_ERROR
: 0) | B_WRITE
| flags
);
2169 TRACE_4(TR_FAC_SPECFS
, TR_SPECFS_PUTAPAGE
,
2170 "specfs putapage:vp %p offp %p snode %p err %d",
2176 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED}
2188 bp
= pageio_setup(pp
, io_len
, vp
, flags
);
2190 bp
->b_edev
= vp
->v_rdev
;
2191 bp
->b_dev
= cmpdev(vp
->v_rdev
);
2192 bp
->b_blkno
= btodt(io_off
);
2193 bp
->b_un
.b_addr
= (caddr_t
)0;
2195 (void) bdev_strategy(bp
);
2198 lwp_stat_update(LWP_STAT_INBLK
, 1);
2200 lwp_stat_update(LWP_STAT_OUBLK
, 1);
2211 struct pollhead
**phpp
,
2212 caller_context_t
*ct
)
2217 if (vp
->v_type
== VBLK
)
2218 error
= fs_poll(vp
, events
, anyyet
, reventsp
, phpp
, ct
);
2220 ASSERT(vp
->v_type
== VCHR
);
2223 ASSERT(vp
->v_stream
!= NULL
);
2224 error
= strpoll(vp
->v_stream
, events
, anyyet
,
2226 } else if (devopsp
[getmajor(dev
)]->devo_cb_ops
->cb_chpoll
) {
2227 error
= cdev_poll(dev
, events
, anyyet
, reventsp
, phpp
);
2229 error
= fs_poll(vp
, events
, anyyet
, reventsp
, phpp
, ct
);
2236 * This routine is called through the cdevsw[] table to handle
2237 * traditional mmap'able devices that support a d_mmap function.
2252 struct segdev_crargs dev_a
;
2253 int (*mapfunc
)(dev_t dev
, off_t off
, int prot
);
2257 if ((mapfunc
= devopsp
[getmajor(dev
)]->devo_cb_ops
->cb_mmap
) == nodev
)
2259 TRACE_4(TR_FAC_SPECFS
, TR_SPECFS_SEGMAP
,
2260 "specfs segmap:dev %x as %p len %lx prot %x",
2261 dev
, as
, len
, prot
);
2264 * Character devices that support the d_mmap
2265 * interface can only be mmap'ed shared.
2267 if ((flags
& MAP_TYPE
) != MAP_SHARED
)
2271 * Check to ensure that the entire range is
2272 * legal and we are not trying to map in
2273 * more than the device will let us.
2275 for (i
= 0; i
< len
; i
+= PAGESIZE
) {
2276 if (cdev_mmap(mapfunc
, dev
, off
+ i
, maxprot
) == -1)
2281 /* Pick an address w/o worrying about any vac alignment constraints. */
2282 error
= choose_addr(as
, addrp
, len
, off
, ADDR_NOVACALIGN
, flags
);
2288 dev_a
.mapfunc
= mapfunc
;
2291 dev_a
.prot
= (uchar_t
)prot
;
2292 dev_a
.maxprot
= (uchar_t
)maxprot
;
2293 dev_a
.hat_flags
= 0;
2295 dev_a
.devmap_data
= NULL
;
2297 error
= as_map(as
, *addrp
, len
, segdev_create
, &dev_a
);
2315 major_t maj
= getmajor(dev
);
2317 int (*segmap
)(dev_t
, off_t
, struct as
*,
2318 caddr_t
*, off_t
, uint_t
, uint_t
, uint_t
, cred_t
*);
2319 int (*devmap
)(dev_t
, devmap_cookie_t
, offset_t
,
2320 size_t, size_t *, uint_t
);
2321 int (*mmap
)(dev_t dev
, off_t off
, int prot
);
2324 * Character device: let the device driver
2325 * pick the appropriate segment driver.
2327 * 4.x compat.: allow 'NULL' cb_segmap => spec_segmap
2328 * Kindness: allow 'nulldev' cb_segmap => spec_segmap
2330 segmap
= devopsp
[maj
]->devo_cb_ops
->cb_segmap
;
2331 if (segmap
== NULL
|| segmap
== nulldev
|| segmap
== nodev
) {
2332 mmap
= devopsp
[maj
]->devo_cb_ops
->cb_mmap
;
2333 map_flag
= devopsp
[maj
]->devo_cb_ops
->cb_flag
;
2336 * Use old mmap framework if the driver has both mmap
2337 * and devmap entry points. This is to prevent the
2338 * system from calling invalid devmap entry point
2339 * for some drivers that might have put garbage in the
2340 * devmap entry point.
2342 if ((map_flag
& D_DEVMAP
) || mmap
== NULL
||
2343 mmap
== nulldev
|| mmap
== nodev
) {
2344 devmap
= devopsp
[maj
]->devo_cb_ops
->cb_devmap
;
2347 * If driver provides devmap entry point in
2348 * cb_ops but not xx_segmap(9E), call
2349 * devmap_setup with default settings
2350 * (NULL) for callback_ops and driver
2351 * callback private data
2353 if (devmap
== nodev
|| devmap
== NULL
||
2357 error
= devmap_setup(dev
, off
, as
, addrp
,
2358 len
, prot
, maxprot
, flags
, cred
);
2362 segmap
= spec_segmap
;
2364 segmap
= cdev_segmap
;
2366 return ((*segmap
)(dev
, (off_t
)off
, as
, addrp
, len
, prot
,
2367 maxprot
, flags
, cred
));
2382 caller_context_t
*ct
)
2385 struct snode
*sp
= VTOS(vp
);
2387 if (vp
->v_flag
& VNOMAP
)
2390 /* fail map with ENXIO if the device is fenced off */
2395 * If file is locked, fail mapping attempt.
2397 if (vn_has_flocks(vp
))
2400 if (vp
->v_type
== VCHR
) {
2401 return (spec_char_map(vp
->v_rdev
, off
, as
, addrp
, len
, prot
,
2402 maxprot
, flags
, cred
));
2403 } else if (vp
->v_type
== VBLK
) {
2404 struct segvn_crargs vn_a
;
2409 * Block device, use segvn mapping to the underlying commonvp
2412 if (off
> spec_maxoffset(vp
))
2416 cvp
= sp
->s_commonvp
;
2417 ASSERT(cvp
!= NULL
);
2419 if (off
< 0 || ((offset_t
)(off
+ len
) < 0))
2423 error
= choose_addr(as
, addrp
, len
, off
, ADDR_VACALIGN
, flags
);
2431 vn_a
.type
= flags
& MAP_TYPE
;
2432 vn_a
.prot
= (uchar_t
)prot
;
2433 vn_a
.maxprot
= (uchar_t
)maxprot
;
2434 vn_a
.flags
= flags
& ~MAP_TYPE
;
2438 vn_a
.lgrp_mem_policy_flags
= 0;
2440 error
= as_map(as
, *addrp
, len
, segvn_create
, &vn_a
);
2451 struct vnode
*vp
, /* the common vnode */
2455 size_t len
, /* how many bytes to add */
2460 caller_context_t
*ct
)
2463 struct snode
*csp
= VTOS(vp
);
2466 ASSERT(vp
!= NULL
&& VTOS(vp
)->s_commonvp
== vp
);
2469 * XXX Given the above assertion, this might not
2470 * be a particularly sensible thing to test.
2472 if (vp
->v_flag
& VNOMAP
)
2475 /* fail with EIO if the device is fenced off */
2476 if (S_ISFENCED(csp
))
2479 npages
= btopr(len
);
2481 csp
->s_mapcnt
+= npages
;
2490 struct vnode
*vp
, /* the common vnode */
2494 size_t len
, /* how many bytes to take away */
2499 caller_context_t
*ct
)
2501 struct snode
*csp
= VTOS(vp
);
2505 /* segdev passes us the common vp */
2507 ASSERT(vp
!= NULL
&& VTOS(vp
)->s_commonvp
== vp
);
2509 /* allow delmap to succeed even if device fenced off */
2512 * XXX Given the above assertion, this might not
2513 * be a particularly sensible thing to test..
2515 if (vp
->v_flag
& VNOMAP
)
2518 npages
= btopr(len
);
2521 mutex_enter(&csp
->s_lock
);
2522 mcnt
= (csp
->s_mapcnt
-= npages
);
2526 * Call the close routine when the last reference of any
2527 * kind through any [s, v]node goes away. The s_dip hold
2528 * on the devinfo node is released when the vnode is
2531 if (csp
->s_count
== 0) {
2532 csp
->s_flag
&= ~(SNEEDCLOSE
| SSIZEVALID
);
2534 /* See comment in spec_close() */
2535 if (csp
->s_flag
& (SCLONE
| SSELFCLONE
))
2536 csp
->s_flag
&= ~SDIPSET
;
2538 mutex_exit(&csp
->s_lock
);
2540 (void) device_close(vp
, 0, cred
);
2542 mutex_exit(&csp
->s_lock
);
2544 mutex_enter(&csp
->s_lock
);
2548 UNLOCK_CSP_LOCK_HELD(csp
);
2549 mutex_exit(&csp
->s_lock
);
2561 caller_context_t
*ct
)
2563 /* allow dump to succeed even if device fenced off */
2565 ASSERT(vp
->v_type
== VBLK
);
2566 return (bdev_dump(vp
->v_rdev
, addr
, (daddr_t
)bn
, (int)count
));
2571 * Do i/o on the given page list from/to vp, io_off for io_len.
2572 * Flags are composed of:
2573 * {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_READ, B_WRITE}
2574 * If B_ASYNC is not set i/o is waited for.
2585 caller_context_t
*ct
)
2587 struct buf
*bp
= NULL
;
2593 bp
= spec_startio(vp
, pp
, io_off
, io_len
, flags
);
2596 * Wait for i/o to complete if the request is not B_ASYNC.
2598 if ((flags
& B_ASYNC
) == 0) {
2606 * Set ACL on underlying vnode if one exists, or return ENOSYS otherwise.
2614 caller_context_t
*ct
)
2616 struct vnode
*realvp
;
2617 struct snode
*sp
= VTOS(vp
);
2620 /* fail with ENXIO if the device is fenced off */
2625 * The acl(2) system calls fop_rwlock on the file before setting an
2626 * ACL, but since specfs does not serialize reads and writes, this
2627 * VOP does not do anything. However, some backing file systems may
2628 * expect the lock to be held before setting an ACL, so it is taken
2629 * here privately to avoid serializing specfs reads and writes.
2631 if ((realvp
= sp
->s_realvp
) != NULL
) {
2632 (void) fop_rwlock(realvp
, V_WRITELOCK_TRUE
, ct
);
2633 error
= fop_setsecattr(realvp
, vsap
, flag
, cr
, ct
);
2634 (void) fop_rwunlock(realvp
, V_WRITELOCK_TRUE
, ct
);
2637 return (fs_nosys());
2641 * Get ACL from underlying vnode if one exists, or fabricate it from
2642 * the permissions returned by spec_getattr() otherwise.
2650 caller_context_t
*ct
)
2652 struct vnode
*realvp
;
2653 struct snode
*sp
= VTOS(vp
);
2655 /* fail with ENXIO if the device is fenced off */
2659 if ((realvp
= sp
->s_realvp
) != NULL
)
2660 return (fop_getsecattr(realvp
, vsap
, flag
, cr
, ct
));
2662 return (fs_fab_acl(vp
, vsap
, flag
, cr
, ct
));
2671 caller_context_t
*ct
)
2674 struct snode
*sp
= VTOS(vp
);
2676 /* fail with ENXIO if the device is fenced off */
2680 if ((realvp
= sp
->s_realvp
) != NULL
)
2681 return (fop_pathconf(realvp
, cmd
, valp
, cr
, ct
));
2683 return (fs_pathconf(vp
, cmd
, valp
, cr
, ct
));