4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Portions Copyright 2007 Jeremy Teo */
28 #include <sys/types.h>
29 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/sysmacros.h>
33 #include <sys/resource.h>
35 #include <sys/vnode.h>
39 #include <sys/taskq.h>
41 #include <sys/atomic.h>
42 #include <sys/namei.h>
44 #include <sys/cmn_err.h>
45 #include <sys/errno.h>
46 #include <sys/unistd.h>
47 #include <sys/zfs_dir.h>
48 #include <sys/zfs_ioctl.h>
49 #include <sys/fs/zfs.h>
55 #include <sys/dirent.h>
56 #include <sys/policy.h>
57 #include <sys/sunddi.h>
58 #include <sys/filio.h>
59 #include <sys/zfs_ctldir.h>
60 #include <sys/zfs_fuid.h>
61 #include <sys/zfs_vfsops.h>
63 #include <sys/zfs_rlock.h>
64 #include <sys/extdirent.h>
65 #include <sys/kidmap.h>
67 #include <sys/sched.h>
69 #include <sys/extattr.h>
72 #include <miscfs/genfs/genfs.h>
78 * Each vnode op performs some logical unit of work. To do this, the ZPL must
79 * properly lock its in-core state, create a DMU transaction, do the work,
80 * record this work in the intent log (ZIL), commit the DMU transaction,
81 * and wait for the intent log to commit if it is a synchronous operation.
82 * Moreover, the vnode ops must work in both normal and log replay context.
83 * The ordering of events is important to avoid deadlocks and references
84 * to freed memory. The example below illustrates the following Big Rules:
86 * (1) A check must be made in each zfs thread for a mounted file system.
87 * This is done avoiding races using ZFS_ENTER(zfsvfs).
88 * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
89 * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
90 * can return EIO from the calling function.
92 * (2) VN_RELE() should always be the last thing except for zil_commit()
93 * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
94 * First, if it's the last reference, the vnode/znode
95 * can be freed, so the zp may point to freed memory. Second, the last
96 * reference will call zfs_zinactive(), which may induce a lot of work --
97 * pushing cached pages (which acquires range locks) and syncing out
98 * cached atime changes. Third, zfs_zinactive() may require a new tx,
99 * which could deadlock the system if you were already holding one.
100 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
102 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
103 * as they can span dmu_tx_assign() calls.
105 * (4) Always pass zfsvfs->z_assign as the second argument to dmu_tx_assign().
106 * In normal operation, this will be TXG_NOWAIT. During ZIL replay,
107 * it will be a specific txg. Either way, dmu_tx_assign() never blocks.
108 * This is critical because we don't want to block while holding locks.
109 * Note, in particular, that if a lock is sometimes acquired before
110 * the tx assigns, and sometimes after (e.g. z_lock), then failing to
111 * use a non-blocking assign can deadlock the system. The scenario:
113 * Thread A has grabbed a lock before calling dmu_tx_assign().
114 * Thread B is in an already-assigned tx, and blocks for this lock.
115 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
116 * forever, because the previous txg can't quiesce until B's tx commits.
118 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
119 * then drop all locks, call dmu_tx_wait(), and try again.
121 * (5) If the operation succeeded, generate the intent log entry for it
122 * before dropping locks. This ensures that the ordering of events
123 * in the intent log matches the order in which they actually occurred.
125 * (6) At the end of each vnode op, the DMU tx must always commit,
126 * regardless of whether there were any errors.
128 * (7) After dropping all locks, invoke zil_commit(zilog, seq, foid)
129 * to ensure that synchronous semantics are provided when necessary.
131 * In general, this is how things should be ordered in each vnode op:
133 * ZFS_ENTER(zfsvfs); // exit if unmounted
135 * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD())
136 * rw_enter(...); // grab any other locks you need
137 * tx = dmu_tx_create(...); // get DMU tx
138 * dmu_tx_hold_*(); // hold each object you might modify
139 * error = dmu_tx_assign(tx, zfsvfs->z_assign); // try to assign
141 * rw_exit(...); // drop locks
142 * zfs_dirent_unlock(...); // unlock directory entry
143 * VN_RELE(...); // release held vnodes
144 * if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) {
149 * dmu_tx_abort(tx); // abort DMU tx
150 * ZFS_EXIT(zfsvfs); // finished in zfs
151 * return (error); // really out of space
153 * error = do_real_work(); // do whatever this VOP does
155 * zfs_log_*(...); // on success, make ZIL entry
156 * dmu_tx_commit(tx); // commit DMU tx -- error or not
157 * rw_exit(...); // drop locks
158 * zfs_dirent_unlock(dl, 0); // unlock directory entry
159 * VN_RELE(...); // release held vnodes
160 * zil_commit(zilog, seq, foid); // synchronous when necessary
161 * ZFS_EXIT(zfsvfs); // finished in zfs
162 * return (error); // done, report error
167 zfs_open(vnode_t
**vpp
, int flag
, cred_t
*cr
, caller_context_t
*ct
)
169 znode_t
*zp
= VTOZ(*vpp
);
171 if ((flag
& FWRITE
) && (zp
->z_phys
->zp_flags
& ZFS_APPENDONLY
) &&
172 ((flag
& FAPPEND
) == 0)) {
176 if (!zfs_has_ctldir(zp
) && zp
->z_zfsvfs
->z_vscan
&&
177 ZTOV(zp
)->v_type
== VREG
&&
178 !(zp
->z_phys
->zp_flags
& ZFS_AV_QUARANTINED
) &&
179 zp
->z_phys
->zp_size
> 0)
180 if (fs_vscan(*vpp
, cr
, 0) != 0)
183 /* Keep a count of the synchronous opens in the znode */
184 if (flag
& (FSYNC
| FDSYNC
))
185 atomic_inc_32(&zp
->z_sync_cnt
);
192 zfs_close(vnode_t
*vp
, int flag
, int count
, offset_t offset
, cred_t
*cr
,
193 caller_context_t
*ct
)
195 znode_t
*zp
= VTOZ(vp
);
197 dprintf("zfs_close called \n");
198 /* Decrement the synchronous opens in the znode */
199 if ((flag
& (FSYNC
| FDSYNC
)) && (count
== 1))
200 atomic_dec_32(&zp
->z_sync_cnt
);
203 * Clean up any locks held by this process on the vp.
205 cleanlocks(vp
, ddi_get_pid(), 0);
206 cleanshares(vp
, ddi_get_pid());
208 if (!zfs_has_ctldir(zp
) && zp
->z_zfsvfs
->z_vscan
&&
209 ZTOV(zp
)->v_type
== VREG
&&
210 !(zp
->z_phys
->zp_flags
& ZFS_AV_QUARANTINED
) &&
211 zp
->z_phys
->zp_size
> 0)
212 VERIFY(fs_vscan(vp
, cr
, 1) == 0);
219 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
220 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
223 zfs_holey(vnode_t
*vp
, u_long cmd
, offset_t
*off
)
225 znode_t
*zp
= VTOZ(vp
);
226 uint64_t noff
= (uint64_t)*off
; /* new offset */
231 file_sz
= zp
->z_phys
->zp_size
;
232 if (noff
>= file_sz
) {
236 if (cmd
== _FIO_SEEK_HOLE
)
241 error
= dmu_offset_next(zp
->z_zfsvfs
->z_os
, zp
->z_id
, hole
, &noff
);
244 if ((error
== ESRCH
) || (noff
> file_sz
)) {
246 * Handle the virtual hole at the end of file.
260 #endif /* PORT_NETBSD */
263 zfs_ioctl(vnode_t
*vp
, u_long com
, intptr_t data
, int flag
, cred_t
*cred
,
264 int *rvalp
, caller_context_t
*ct
)
276 * The following two ioctls are used by bfu. Faking out,
277 * necessary to avoid bfu errors.
282 #ifdef PORT_NETBSD /* XXX NetBSD Do we support holes in files ? */
285 if (ddi_copyin((void *)data
, &off
, sizeof (off
), flag
))
289 zfsvfs
= zp
->z_zfsvfs
;
293 /* offset parameter is in/out */
294 error
= zfs_holey(vp
, com
, &off
);
298 if (ddi_copyout(&off
, (void *)data
, sizeof (off
), flag
))
309 * When a file is memory mapped, we must keep the IO data synchronized
310 * between the DMU cache and the memory mapped pages. What this means:
312 * On Write: If we find a memory mapped page, we write to *both*
313 * the page and the dmu buffer.
315 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
316 * the file is memory mapped.
319 mappedwrite(vnode_t
*vp
, int nbytes
, uio_t
*uio
, dmu_tx_t
*tx
)
321 znode_t
*zp
= VTOZ(vp
);
322 objset_t
*os
= zp
->z_zfsvfs
->z_os
;
331 ASSERT(vp
->v_mount
!= NULL
);
335 start
= uio
->uio_loffset
;
336 off
= start
& PAGEOFFSET
;
339 for (start
&= PAGEMASK
; len
> 0; start
+= PAGESIZE
) {
340 uint64_t bytes
= MIN(PAGESIZE
- off
, len
);
344 if ((m
= vm_page_lookup(obj
, OFF_TO_IDX(start
))) != NULL
&&
345 vm_page_is_valid(m
, (vm_offset_t
)off
, bytes
)) {
349 if (vm_page_sleep_if_busy(m
, FALSE
, "zfsmwb"))
351 fsize
= obj
->un_pager
.vnp
.vnp_size
;
353 vm_page_lock_queues();
355 vm_page_unlock_queues();
356 VM_OBJECT_UNLOCK(obj
);
358 error
= dmu_write_uio(os
, zp
->z_id
, uio
,
364 sf
= sf_buf_alloc(m
, SFB_CPUPRIVATE
);
365 va
= (caddr_t
)sf_buf_kva(sf
);
366 woff
= uio
->uio_loffset
- off
;
367 error
= uiomove(va
+ off
, bytes
, UIO_WRITE
, uio
);
369 * The uiomove() above could have been partially
370 * successful, that's why we call dmu_write()
371 * below unconditionally. The page was marked
372 * non-dirty above and we would lose the changes
373 * without doing so. If the uiomove() failed
374 * entirely, well, we just write what we got
375 * before one more time.
377 dmu_write(os
, zp
->z_id
, woff
,
378 MIN(PAGESIZE
, fsize
- woff
), va
, tx
);
385 if (__predict_false(obj
->cache
!= NULL
)) {
386 vm_page_cache_free(obj
, OFF_TO_IDX(start
),
387 OFF_TO_IDX(start
) + 1);
396 VM_OBJECT_UNLOCK(obj
);
397 if (error
== 0 && dirbytes
> 0)
398 error
= dmu_write_uio(os
, zp
->z_id
, uio
, dirbytes
, tx
);
403 * When a file is memory mapped, we must keep the IO data synchronized
404 * between the DMU cache and the memory mapped pages. What this means:
406 * On Read: We "read" preferentially from memory mapped pages,
407 * else we default from the dmu buffer.
409 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
410 * the file is memory mapped.
413 mappedread(vnode_t
*vp
, int nbytes
, uio_t
*uio
)
415 znode_t
*zp
= VTOZ(vp
);
416 objset_t
*os
= zp
->z_zfsvfs
->z_os
;
426 ASSERT(vp
->v_mount
!= NULL
);
430 start
= uio
->uio_loffset
;
431 off
= start
& PAGEOFFSET
;
434 for (start
&= PAGEMASK
; len
> 0; start
+= PAGESIZE
) {
435 uint64_t bytes
= MIN(PAGESIZE
- off
, len
);
438 if ((m
= vm_page_lookup(obj
, OFF_TO_IDX(start
))) != NULL
&&
439 vm_page_is_valid(m
, (vm_offset_t
)off
, bytes
)) {
440 if (vm_page_sleep_if_busy(m
, FALSE
, "zfsmrb"))
443 VM_OBJECT_UNLOCK(obj
);
445 error
= dmu_read_uio(os
, zp
->z_id
, uio
,
451 sf
= sf_buf_alloc(m
, SFB_CPUPRIVATE
);
452 va
= (caddr_t
)sf_buf_kva(sf
);
453 error
= uiomove(va
+ off
, bytes
, UIO_READ
, uio
);
459 } else if (m
!= NULL
&& uio
->uio_segflg
== UIO_NOCOPY
) {
461 * The code below is here to make sendfile(2) work
462 * correctly with ZFS. As pointed out by ups@
463 * sendfile(2) should be changed to use VOP_GETPAGES(),
464 * but it pessimize performance of sendfile/UFS, that's
465 * why I handle this special case in ZFS code.
467 if (vm_page_sleep_if_busy(m
, FALSE
, "zfsmrb"))
470 VM_OBJECT_UNLOCK(obj
);
472 error
= dmu_read_uio(os
, zp
->z_id
, uio
,
478 sf
= sf_buf_alloc(m
, SFB_CPUPRIVATE
);
479 va
= (caddr_t
)sf_buf_kva(sf
);
480 error
= dmu_read(os
, zp
->z_id
, start
+ off
,
481 bytes
, (void *)(va
+ off
));
488 uio
->uio_resid
-= bytes
;
497 VM_OBJECT_UNLOCK(obj
);
498 if (error
== 0 && dirbytes
> 0)
499 error
= dmu_read_uio(os
, zp
->z_id
, uio
, dirbytes
);
502 #endif /* PORT_NETBSD */
503 offset_t zfs_read_chunk_size
= 1024 * 1024; /* Tunable */
506 * Read bytes from specified file into supplied buffer.
508 * IN: vp - vnode of file to be read from.
509 * uio - structure supplying read location, range info,
511 * ioflag - SYNC flags; used to provide FRSYNC semantics.
512 * cr - credentials of caller.
513 * ct - caller context
515 * OUT: uio - updated offset and range, buffer filled.
517 * RETURN: 0 if success
518 * error code if failure
521 * vp - atime updated if byte count > 0
525 zfs_read(vnode_t
*vp
, uio_t
*uio
, int ioflag
, cred_t
*cr
, caller_context_t
*ct
)
527 znode_t
*zp
= VTOZ(vp
);
528 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
534 dprintf("zfs_read called\n");
540 if (zp
->z_phys
->zp_flags
& ZFS_AV_QUARANTINED
) {
546 * Validate file offset
548 if (uio
->uio_loffset
< (offset_t
)0) {
554 * Fasttrack empty reads
556 if (uio
->uio_resid
== 0) {
562 * Check for mandatory locks
564 if (MANDMODE((mode_t
)zp
->z_phys
->zp_mode
)) {
565 if (error
= chklock(vp
, FREAD
,
566 uio
->uio_loffset
, uio
->uio_resid
, uio
->uio_fmode
, ct
)) {
573 * If we're in FRSYNC mode, sync out this znode before reading it.
576 zil_commit(zfsvfs
->z_log
, zp
->z_last_itx
, zp
->z_id
);
579 * Lock the range against changes.
581 rl
= zfs_range_lock(zp
, uio
->uio_loffset
, uio
->uio_resid
, RL_READER
);
584 * If we are reading past end-of-file we can skip
585 * to the end; but we might still need to set atime.
587 if (uio
->uio_loffset
>= zp
->z_phys
->zp_size
) {
592 ASSERT(uio
->uio_loffset
< zp
->z_phys
->zp_size
);
593 n
= MIN(uio
->uio_resid
, zp
->z_phys
->zp_size
- uio
->uio_loffset
);
596 nbytes
= MIN(n
, zfs_read_chunk_size
-
597 P2PHASE(uio
->uio_loffset
, zfs_read_chunk_size
));
599 // if (vn_has_cached_data(vp))
600 // error = mappedread(vp, nbytes, uio);
602 error
= dmu_read_uio(os
, zp
->z_id
, uio
, nbytes
);
604 /* convert checksum errors into IO errors */
614 zfs_range_unlock(rl
);
616 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
622 * Fault in the pages of the first n bytes specified by the uio structure.
623 * 1 byte in each page is touched and the uio struct is unmodified.
624 * Any error will exit this routine as this is only a best
625 * attempt to get the pages resident. This is a copy of ufs_trans_touch().
628 zfs_prefault_write(ssize_t n
, struct uio
*uio
)
634 if (VMSPACE_IS_KERNEL_P(uio
->uio_vmspace
))
640 cnt
= MIN(iov
->iov_len
, n
);
642 /* empty iov entry */
648 * touch each page in this segment.
654 incr
= MIN(cnt
, PAGESIZE
);
659 * touch the last byte in case it straddles a page.
669 * Write the bytes to a file.
671 * IN: vp - vnode of file to be written to.
672 * uio - structure supplying write location, range info,
674 * ioflag - IO_APPEND flag set if in append mode.
675 * cr - credentials of caller.
676 * ct - caller context (NFS/CIFS fem monitor only)
678 * OUT: uio - updated offset and range.
680 * RETURN: 0 if success
681 * error code if failure
684 * vp - ctime|mtime updated if byte count > 0
688 zfs_write(vnode_t
*vp
, uio_t
*uio
, int ioflag
, cred_t
*cr
, caller_context_t
*ct
)
690 znode_t
*zp
= VTOZ(vp
);
691 rlim64_t limit
= MAXOFFSET_T
;
692 ssize_t start_resid
= uio
->uio_resid
;
696 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
701 int max_blksz
= zfsvfs
->z_max_blksz
;
705 dprintf("zfs_write called\n");
708 * Fasttrack empty write
714 if (limit
== RLIM64_INFINITY
|| limit
> MAXOFFSET_T
)
721 * If immutable or not appending then return EPERM
723 pflags
= zp
->z_phys
->zp_flags
;
724 if ((pflags
& (ZFS_IMMUTABLE
| ZFS_READONLY
)) ||
725 ((pflags
& ZFS_APPENDONLY
) && !(ioflag
& FAPPEND
) &&
726 (uio
->uio_loffset
< zp
->z_phys
->zp_size
))) {
731 zilog
= zfsvfs
->z_log
;
734 * Pre-fault the pages to ensure slow (eg NFS) pages
737 zfs_prefault_write(n
, uio
);
740 * If in append mode, set the io offset pointer to eof.
742 if (ioflag
& IO_APPEND
) {
744 * Range lock for a file append:
745 * The value for the start of range will be determined by
746 * zfs_range_lock() (to guarantee append semantics).
747 * If this write will cause the block size to increase,
748 * zfs_range_lock() will lock the entire file, so we must
749 * later reduce the range after we grow the block size.
751 rl
= zfs_range_lock(zp
, 0, n
, RL_APPEND
);
752 if (rl
->r_len
== UINT64_MAX
) {
753 /* overlocked, zp_size can't change */
754 woff
= uio
->uio_loffset
= zp
->z_phys
->zp_size
;
756 woff
= uio
->uio_loffset
= rl
->r_off
;
759 woff
= uio
->uio_loffset
;
761 * Validate file offset
769 * If we need to grow the block size then zfs_range_lock()
770 * will lock a wider range than we request here.
771 * Later after growing the block size we reduce the range.
773 rl
= zfs_range_lock(zp
, woff
, n
, RL_WRITER
);
777 zfs_range_unlock(rl
);
782 if ((woff
+ n
) > limit
|| woff
> (limit
- n
))
786 * Check for mandatory locks
788 if (MANDMODE((mode_t
)zp
->z_phys
->zp_mode
) &&
789 (error
= chklock(vp
, FWRITE
, woff
, n
, uio
->uio_fmode
, ct
)) != 0) {
790 zfs_range_unlock(rl
);
794 end_size
= MAX(zp
->z_phys
->zp_size
, woff
+ n
);
797 * Write the file in reasonable size chunks. Each chunk is written
798 * in a separate transaction; this keeps the intent log records small
799 * and allows us to do more fine-grained space accounting.
803 * Start a transaction.
805 woff
= uio
->uio_loffset
;
806 tx
= dmu_tx_create(zfsvfs
->z_os
);
807 dmu_tx_hold_bonus(tx
, zp
->z_id
);
808 dmu_tx_hold_write(tx
, zp
->z_id
, woff
, MIN(n
, max_blksz
));
809 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
811 if (error
== ERESTART
&&
812 zfsvfs
->z_assign
== TXG_NOWAIT
) {
822 * If zfs_range_lock() over-locked we grow the blocksize
823 * and then reduce the lock range. This will only happen
824 * on the first iteration since zfs_range_reduce() will
825 * shrink down r_len to the appropriate size.
827 if (rl
->r_len
== UINT64_MAX
) {
830 if (zp
->z_blksz
> max_blksz
) {
831 ASSERT(!ISP2(zp
->z_blksz
));
832 new_blksz
= MIN(end_size
, SPA_MAXBLOCKSIZE
);
834 new_blksz
= MIN(end_size
, max_blksz
);
836 zfs_grow_blocksize(zp
, new_blksz
, tx
);
837 zfs_range_reduce(rl
, woff
, n
);
841 * XXX - should we really limit each write to z_max_blksz?
842 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
844 nbytes
= MIN(n
, max_blksz
- P2PHASE(woff
, max_blksz
));
846 if (woff
+ nbytes
> zp
->z_phys
->zp_size
)
847 uvm_vnp_setsize(vp
, woff
+ nbytes
);
849 rw_enter(&zp
->z_map_lock
, RW_READER
);
851 tx_bytes
= uio
->uio_resid
;
852 if (vn_has_cached_data(vp
)) {
853 rw_exit(&zp
->z_map_lock
);
854 // error = mappedwrite(vp, nbytes, uio, tx);
856 error
= dmu_write_uio(zfsvfs
->z_os
, zp
->z_id
,
858 rw_exit(&zp
->z_map_lock
);
860 tx_bytes
-= uio
->uio_resid
;
863 * If we made no progress, we're done. If we made even
864 * partial progress, update the znode and ZIL accordingly.
873 * Clear Set-UID/Set-GID bits on successful write if not
874 * privileged and at least one of the excute bits is set.
876 * It would be nice to to this after all writes have
877 * been done, but that would still expose the ISUID/ISGID
878 * to another app after the partial write is committed.
880 * Note: we don't call zfs_fuid_map_id() here because
881 * user 0 is not an ephemeral uid.
883 mutex_enter(&zp
->z_acl_lock
);
884 if ((zp
->z_phys
->zp_mode
& (S_IXUSR
| (S_IXUSR
>> 3) |
885 (S_IXUSR
>> 6))) != 0 &&
886 (zp
->z_phys
->zp_mode
& (S_ISUID
| S_ISGID
)) != 0 &&
887 secpolicy_vnode_setid_retain(cr
, (zp
->z_phys
->zp_mode
& S_ISUID
) != 0 && zp
->z_phys
->zp_uid
== 0) != 0) {
888 zp
->z_phys
->zp_mode
&= ~(S_ISUID
| S_ISGID
);
890 mutex_exit(&zp
->z_acl_lock
);
893 * Update time stamp. NOTE: This marks the bonus buffer as
894 * dirty, so we don't have to do it again for zp_size.
896 zfs_time_stamper(zp
, CONTENT_MODIFIED
, tx
);
899 * Update the file size (zp_size) if it has changed;
900 * account for possible concurrent updates.
902 while ((end_size
= zp
->z_phys
->zp_size
) < uio
->uio_loffset
)
903 (void) atomic_cas_64(&zp
->z_phys
->zp_size
, end_size
,
905 zfs_log_write(zilog
, tx
, TX_WRITE
, zp
, woff
, tx_bytes
, ioflag
);
910 ASSERT(tx_bytes
== nbytes
);
914 zfs_range_unlock(rl
);
917 * If we're in replay mode, or we made no progress, return error.
918 * Otherwise, it's at least a partial write, so it's successful.
920 if (zfsvfs
->z_assign
>= TXG_INITIAL
|| uio
->uio_resid
== start_resid
) {
925 if (ioflag
& (FSYNC
| FDSYNC
))
926 zil_commit(zilog
, zp
->z_last_itx
, zp
->z_id
);
934 zfs_get_done(dmu_buf_t
*db
, void *vzgd
)
936 zgd_t
*zgd
= (zgd_t
*)vzgd
;
937 rl_t
*rl
= zgd
->zgd_rl
;
938 vnode_t
*vp
= ZTOV(rl
->r_zp
);
941 dmu_buf_rele(db
, vzgd
);
942 zfs_range_unlock(rl
);
944 * Release the vnode asynchronously as we currently have the
945 * txg stopped from syncing.
948 zil_add_block(zgd
->zgd_zilog
, zgd
->zgd_bp
);
949 kmem_free(zgd
, sizeof (zgd_t
));
953 * Get data to generate a TX_WRITE intent log record.
956 zfs_get_data(void *arg
, lr_write_t
*lr
, char *buf
, zio_t
*zio
)
958 zfsvfs_t
*zfsvfs
= arg
;
959 objset_t
*os
= zfsvfs
->z_os
;
961 uint64_t off
= lr
->lr_offset
;
965 int dlen
= lr
->lr_length
; /* length of user data */
972 * Nothing to do if the file has been removed
974 if (zfs_zget(zfsvfs
, lr
->lr_foid
, &zp
) != 0)
976 if (zp
->z_unlinked
) {
978 * Release the vnode asynchronously as we currently have the
979 * txg stopped from syncing.
987 * Write records come in two flavors: immediate and indirect.
988 * For small writes it's cheaper to store the data with the
989 * log record (immediate); for large writes it's cheaper to
990 * sync the data and get a pointer to it (indirect) so that
991 * we don't have to write the data twice.
993 if (buf
!= NULL
) { /* immediate write */
994 rl
= zfs_range_lock(zp
, off
, dlen
, RL_READER
);
995 /* test for truncation needs to be done while range locked */
996 if (off
>= zp
->z_phys
->zp_size
) {
1000 VERIFY(0 == dmu_read(os
, lr
->lr_foid
, off
, dlen
, buf
));
1001 } else { /* indirect write */
1002 uint64_t boff
; /* block starting offset */
1005 * Have to lock the whole block to ensure when it's
1006 * written out and it's checksum is being calculated
1007 * that no one can change the data. We need to re-check
1008 * blocksize after we get the lock in case it's changed!
1011 if (ISP2(zp
->z_blksz
)) {
1012 boff
= P2ALIGN_TYPED(off
, zp
->z_blksz
,
1018 rl
= zfs_range_lock(zp
, boff
, dlen
, RL_READER
);
1019 if (zp
->z_blksz
== dlen
)
1021 zfs_range_unlock(rl
);
1023 /* test for truncation needs to be done while range locked */
1024 if (off
>= zp
->z_phys
->zp_size
) {
1028 zgd
= (zgd_t
*)kmem_alloc(sizeof (zgd_t
), KM_SLEEP
);
1030 zgd
->zgd_zilog
= zfsvfs
->z_log
;
1031 zgd
->zgd_bp
= &lr
->lr_blkptr
;
1032 VERIFY(0 == dmu_buf_hold(os
, lr
->lr_foid
, boff
, zgd
, &db
));
1033 ASSERT(boff
== db
->db_offset
);
1034 lr
->lr_blkoff
= off
- boff
;
1035 error
= dmu_sync(zio
, db
, &lr
->lr_blkptr
,
1036 lr
->lr_common
.lrc_txg
, zfs_get_done
, zgd
);
1037 ASSERT((error
&& error
!= EINPROGRESS
) ||
1038 lr
->lr_length
<= zp
->z_blksz
);
1040 zil_add_block(zfsvfs
->z_log
, &lr
->lr_blkptr
);
1042 * If we get EINPROGRESS, then we need to wait for a
1043 * write IO initiated by dmu_sync() to complete before
1044 * we can release this dbuf. We will finish everything
1045 * up in the zfs_get_done() callback.
1047 if (error
== EINPROGRESS
)
1049 dmu_buf_rele(db
, zgd
);
1050 kmem_free(zgd
, sizeof (zgd_t
));
1053 zfs_range_unlock(rl
);
1055 * Release the vnode asynchronously as we currently have the
1056 * txg stopped from syncing.
1064 zfs_access(vnode_t
*vp
, int mode
, int flag
, cred_t
*cr
,
1065 caller_context_t
*ct
)
1067 znode_t
*zp
= VTOZ(vp
);
1068 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1074 if (flag
& V_ACE_MASK
)
1075 error
= zfs_zaccess(zp
, mode
, flag
, B_FALSE
, cr
);
1077 error
= zfs_zaccess_rwx(zp
, mode
, flag
, cr
);
1084 * Lookup an entry in a directory, or an extended attribute directory.
1085 * If it exists, return a held vnode reference for it.
1087 * IN: dvp - vnode of directory to search.
1088 * nm - name of entry to lookup.
1089 * pnp - full pathname to lookup [UNUSED].
1090 * flags - LOOKUP_XATTR set if looking for an attribute.
1091 * rdir - root directory vnode [UNUSED].
1092 * cr - credentials of caller.
1093 * ct - caller context
1094 * direntflags - directory lookup flags
1095 * realpnp - returned pathname.
1097 * OUT: vpp - vnode of located entry, NULL if not found.
1099 * RETURN: 0 if success
1100 * error code if failure
1107 zfs_lookup(vnode_t
*dvp
, char *nm
, vnode_t
**vpp
, struct componentname
*cnp
,
1108 int nameiop
, cred_t
*cr
, int flags
)
1110 znode_t
*zdp
= VTOZ(dvp
);
1111 zfsvfs_t
*zfsvfs
= zdp
->z_zfsvfs
;
1113 int *direntflags
= NULL
;
1114 void *realpnp
= NULL
;
1120 dprintf("zfs_lookup called %s\n", nm
);
1121 if (flags
& LOOKUP_XATTR
) {
1124 * If the xattr property is off, refuse the lookup request.
1126 if (!(zfsvfs
->z_vfs
->vfs_flag
& VFS_XATTR
)) {
1133 * We don't allow recursive attributes..
1134 * Maybe someday we will.
1136 if (zdp
->z_phys
->zp_flags
& ZFS_XATTR
) {
1141 if (error
= zfs_get_xattrdir(VTOZ(dvp
), vpp
, cr
, flags
)) {
1147 * Do we have permission to get into attribute directory?
1149 if (error
= zfs_zaccess(VTOZ(*vpp
), ACE_EXECUTE
, 0,
1159 if (dvp
->v_type
!= VDIR
) {
1165 * Check accessibility of directory.
1167 if (error
= zfs_zaccess(zdp
, ACE_EXECUTE
, 0, B_FALSE
, cr
)) {
1173 * Before tediously performing a linear scan of the directory,
1174 * check the name cache to see if the directory/name pair
1175 * we are looking for is known already.
1178 if ((error
= cache_lookup(dvp
, vpp
, cnp
)) >= 0) {
1183 if (zfsvfs
->z_utf8
&& u8_validate(nm
, strlen(nm
),
1184 NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
1189 error
= zfs_dirlook(zdp
, nm
, vpp
, flags
, direntflags
, realpnp
);
1192 * Convert device special files
1194 if (IS_DEVVP(*vpp
)) {
1197 svp
= specvp(*vpp
, (*vpp
)->v_rdev
, (*vpp
)->v_type
, cr
);
1208 /* Translate errors and add SAVENAME when needed. */
1209 if (cnp
->cn_flags
& ISLASTCN
) {
1213 if (error
== ENOENT
) {
1214 error
= EJUSTRETURN
;
1215 cnp
->cn_flags
|= SAVENAME
;
1221 cnp
->cn_flags
|= SAVENAME
;
1226 if (error
== 0 && (nm
[0] != '.' || nm
[1] != '\0')) {
1229 if (cnp
->cn_flags
& ISDOTDOT
) {
1230 ltype
= VOP_ISLOCKED(dvp
);
1233 error
= vn_lock(*vpp
, LK_EXCLUSIVE
| LK_RETRY
);
1234 if (cnp
->cn_flags
& ISDOTDOT
)
1235 vn_lock(dvp
, ltype
| LK_RETRY
);
1244 * Insert name into cache if appropriate.
1246 if ((cnp
->cn_flags
& MAKEENTRY
) == 0){
1251 cache_enter(dvp
, *vpp
, cnp
);
1254 if (nameiop
!= CREATE
)
1255 cache_enter(dvp
, *vpp
, cnp
);
1264 * Attempt to create a new entry in a directory. If the entry
1265 * already exists, truncate the file if permissible, else return
1266 * an error. Return the vp of the created or trunc'd file.
1268 * IN: dvp - vnode of directory to put new file entry in.
1269 * name - name of new file entry.
1270 * vap - attributes of new file.
1271 * excl - flag indicating exclusive or non-exclusive mode.
1272 * mode - mode to open file with.
1273 * cr - credentials of caller.
1274 * flag - large file flag [UNUSED].
1275 * ct - caller context
1276 * vsecp - ACL to be set
1278 * OUT: vpp - vnode of created or trunc'd entry.
1280 * RETURN: 0 if success
1281 * error code if failure
1284 * dvp - ctime|mtime updated if new entry created
1285 * vp - ctime|mtime always, atime if new
1290 zfs_create(vnode_t
*dvp
, char *name
, vattr_t
*vap
, int excl
, int mode
,
1291 vnode_t
**vpp
, cred_t
*cr
)
1293 znode_t
*zp
, *dzp
= VTOZ(dvp
);
1294 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
1300 zfs_acl_t
*aclp
= NULL
;
1301 zfs_fuid_info_t
*fuidp
= NULL
;
1305 dprintf("zfs_create called\n");
1307 * If we have an ephemeral id, ACL, or XVATTR then
1308 * make sure file system is at proper version
1311 if (zfsvfs
->z_use_fuids
== B_FALSE
&&
1312 (vsecp
|| (vap
->va_mask
& AT_XVATTR
) ||
1313 IS_EPHEMERAL(crgetuid(cr
)) || IS_EPHEMERAL(crgetgid(cr
))))
1319 zilog
= zfsvfs
->z_log
;
1321 if (zfsvfs
->z_utf8
&& u8_validate(name
, strlen(name
),
1322 NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
1327 if (vap
->va_mask
& AT_XVATTR
) {
1328 if ((error
= secpolicy_xvattr((xvattr_t
*)vap
,
1329 crgetuid(cr
), cr
, vap
->va_type
)) != 0) {
1337 if ((vap
->va_mode
& S_ISVTX
) && secpolicy_vnode_stky_modify(cr
))
1338 vap
->va_mode
&= ~S_ISVTX
;
1340 if (*name
== '\0') {
1342 * Null component name refers to the directory itself.
1349 /* possible VN_HOLD(zp) */
1352 if (flag
& FIGNORECASE
)
1355 error
= zfs_dirent_lock(&dl
, dzp
, name
, &zp
, zflg
,
1358 if (strcmp(name
, "..") == 0)
1366 if (vsecp
&& aclp
== NULL
) {
1367 error
= zfs_vsec_2_aclp(zfsvfs
, vap
->va_type
, vsecp
, &aclp
);
1371 zfs_dirent_unlock(dl
, 0);
1380 * Create a new file object and update the directory
1383 if (error
= zfs_zaccess(dzp
, ACE_ADD_FILE
, 0, B_FALSE
, cr
)) {
1388 * We only support the creation of regular files in
1389 * extended attribute directories.
1391 if ((dzp
->z_phys
->zp_flags
& ZFS_XATTR
) &&
1392 (vap
->va_type
!= VREG
)) {
1397 tx
= dmu_tx_create(os
);
1398 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
1399 if ((aclp
&& aclp
->z_has_fuids
) || IS_EPHEMERAL(crgetuid(cr
)) ||
1400 IS_EPHEMERAL(crgetgid(cr
))) {
1401 if (zfsvfs
->z_fuid_obj
== 0) {
1402 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
1403 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
1404 FUID_SIZE_ESTIMATE(zfsvfs
));
1405 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
,
1408 dmu_tx_hold_bonus(tx
, zfsvfs
->z_fuid_obj
);
1409 dmu_tx_hold_write(tx
, zfsvfs
->z_fuid_obj
, 0,
1410 FUID_SIZE_ESTIMATE(zfsvfs
));
1413 dmu_tx_hold_bonus(tx
, dzp
->z_id
);
1414 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, name
);
1415 if ((dzp
->z_phys
->zp_flags
& ZFS_INHERIT_ACE
) || aclp
) {
1416 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
1417 0, SPA_MAXBLOCKSIZE
);
1419 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1421 zfs_dirent_unlock(dl
, 0);
1422 if (error
== ERESTART
&&
1423 zfsvfs
->z_assign
== TXG_NOWAIT
) {
1434 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, 0, aclp
, &fuidp
);
1435 (void) zfs_link_create(dl
, zp
, tx
, ZNEW
);
1436 txtype
= zfs_log_create_txtype(Z_FILE
, vsecp
, vap
);
1437 if (flag
& FIGNORECASE
)
1439 zfs_log_create(zilog
, tx
, txtype
, dzp
, zp
, name
,
1442 zfs_fuid_info_free(fuidp
);
1445 int aflags
= (flag
& FAPPEND
) ? V_APPEND
: 0;
1448 * A directory entry already exists for this name.
1451 * Can't truncate an existing file if in exclusive mode.
1458 * Can't open a directory for writing.
1460 if ((ZTOV(zp
)->v_type
== VDIR
) && (mode
& S_IWRITE
)) {
1465 * Verify requested access to file.
1467 if (mode
&& (error
= zfs_zaccess_rwx(zp
, mode
, aflags
, cr
))) {
1471 mutex_enter(&dzp
->z_lock
);
1473 mutex_exit(&dzp
->z_lock
);
1476 * Truncate regular files if requested.
1478 if ((ZTOV(zp
)->v_type
== VREG
) &&
1479 (vap
->va_mask
& AT_SIZE
) && (vap
->va_size
== 0)) {
1480 /* we can't hold any locks when calling zfs_freesp() */
1481 zfs_dirent_unlock(dl
, 0);
1483 error
= zfs_freesp(zp
, 0, 0, mode
, TRUE
);
1485 vnevent_create(ZTOV(zp
), NULL
);
1491 zfs_dirent_unlock(dl
, 0);
1499 * If vnode is for a device return a specfs vnode instead.
1501 if (IS_DEVVP(*vpp
)) {
1504 svp
= specvp(*vpp
, (*vpp
)->v_rdev
, (*vpp
)->v_type
, cr
);
1520 * Remove an entry from a directory.
1522 * IN: dvp - vnode of directory to remove entry from.
1523 * name - name of entry to remove.
1524 * cr - credentials of caller.
1525 * ct - caller context
1526 * flags - case flags
1528 * RETURN: 0 if success
1529 * error code if failure
1533 * vp - ctime (if nlink > 0)
1537 zfs_remove(vnode_t
*dvp
, char *name
, cred_t
*cr
, caller_context_t
*ct
,
1540 znode_t
*zp
, *dzp
= VTOZ(dvp
);
1541 znode_t
*xzp
= NULL
;
1543 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
1545 uint64_t acl_obj
, xattr_obj
;
1548 boolean_t may_delete_now
, delete_now
= FALSE
;
1549 boolean_t unlinked
, toobig
= FALSE
;
1551 pathname_t
*realnmp
= NULL
;
1556 dprintf("zfs_remove called\n");
1560 zilog
= zfsvfs
->z_log
;
1562 if (flags
& FIGNORECASE
) {
1570 * Attempt to lock directory; fail if entry doesn't exist.
1572 if (error
= zfs_dirent_lock(&dl
, dzp
, name
, &zp
, zflg
,
1582 if (error
= zfs_zaccess_delete(dzp
, zp
, cr
)) {
1587 * Need to use rmdir for removing directories.
1589 if (vp
->v_type
== VDIR
) {
1594 vnevent_remove(vp
, dvp
, name
, ct
);
1597 dnlc_remove(dvp
, realnmp
->pn_buf
);
1599 dnlc_remove(dvp
, name
);
1601 may_delete_now
= FALSE
;
1604 * We may delete the znode now, or we may put it in the unlinked set;
1605 * it depends on whether we're the last link, and on whether there are
1606 * other holds on the vnode. So we dmu_tx_hold() the right things to
1607 * allow for either case.
1609 tx
= dmu_tx_create(zfsvfs
->z_os
);
1610 dmu_tx_hold_zap(tx
, dzp
->z_id
, FALSE
, name
);
1611 dmu_tx_hold_bonus(tx
, zp
->z_id
);
1612 if (may_delete_now
) {
1614 zp
->z_phys
->zp_size
> zp
->z_blksz
* DMU_MAX_DELETEBLKCNT
;
1615 /* if the file is too big, only hold_free a token amount */
1616 dmu_tx_hold_free(tx
, zp
->z_id
, 0,
1617 (toobig
? DMU_MAX_ACCESS
: DMU_OBJECT_END
));
1620 /* are there any extended attributes? */
1621 if ((xattr_obj
= zp
->z_phys
->zp_xattr
) != 0) {
1622 /* XXX - do we need this if we are deleting? */
1623 dmu_tx_hold_bonus(tx
, xattr_obj
);
1626 /* are there any additional acls */
1627 if ((acl_obj
= zp
->z_phys
->zp_acl
.z_acl_extern_obj
) != 0 &&
1629 dmu_tx_hold_free(tx
, acl_obj
, 0, DMU_OBJECT_END
);
1631 /* charge as an update -- would be nice not to charge at all */
1632 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
1634 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1636 zfs_dirent_unlock(dl
, 0);
1638 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
1651 * Remove the directory entry.
1653 error
= zfs_link_destroy(dl
, zp
, tx
, zflg
, &unlinked
);
1660 if (0 && unlinked
) {
1661 KASSERT(0); /* NetBSD: must now happen now */
1663 delete_now
= may_delete_now
&& !toobig
&&
1664 vp
->v_count
== 1 && !vn_has_cached_data(vp
) &&
1665 zp
->z_phys
->zp_xattr
== xattr_obj
&&
1666 zp
->z_phys
->zp_acl
.z_acl_extern_obj
== acl_obj
;
1671 KASSERT(0); /* NetBSD: must now happen now */
1672 if (zp
->z_phys
->zp_xattr
) {
1673 error
= zfs_zget(zfsvfs
, zp
->z_phys
->zp_xattr
, &xzp
);
1674 ASSERT3U(error
, ==, 0);
1675 ASSERT3U(xzp
->z_phys
->zp_links
, ==, 2);
1676 dmu_buf_will_dirty(xzp
->z_dbuf
, tx
);
1677 mutex_enter(&xzp
->z_lock
);
1678 xzp
->z_unlinked
= 1;
1679 xzp
->z_phys
->zp_links
= 0;
1680 mutex_exit(&xzp
->z_lock
);
1681 zfs_unlinked_add(xzp
, tx
);
1682 zp
->z_phys
->zp_xattr
= 0; /* probably unnecessary */
1684 mutex_enter(&zp
->z_lock
);
1687 ASSERT3U(vp
->v_count
, ==, 0);
1689 mutex_exit(&zp
->z_lock
);
1690 zfs_znode_delete(zp
, tx
);
1691 } else if (unlinked
) {
1692 zfs_unlinked_add(zp
, tx
);
1696 if (flags
& FIGNORECASE
)
1698 zfs_log_remove(zilog
, tx
, txtype
, dzp
, name
);
1705 zfs_dirent_unlock(dl
, 0);
1710 /* this rele is delayed to prevent nesting transactions */
1719 * Create a new directory and insert it into dvp using the name
1720 * provided. Return a pointer to the inserted directory.
1722 * IN: dvp - vnode of directory to add subdir to.
1723 * dirname - name of new directory.
1724 * vap - attributes of new directory.
1725 * cr - credentials of caller.
1726 * ct - caller context
1727 * vsecp - ACL to be set
1729 * OUT: vpp - vnode of created directory.
1731 * RETURN: 0 if success
1732 * error code if failure
1735 * dvp - ctime|mtime updated
1736 * vp - ctime|mtime|atime updated
1740 zfs_mkdir(vnode_t
*dvp
, char *dirname
, vattr_t
*vap
, vnode_t
**vpp
, cred_t
*cr
,
1741 caller_context_t
*ct
, int flags
, vsecattr_t
*vsecp
)
1743 znode_t
*zp
, *dzp
= VTOZ(dvp
);
1744 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
1750 zfs_acl_t
*aclp
= NULL
;
1751 zfs_fuid_info_t
*fuidp
= NULL
;
1754 ASSERT(vap
->va_type
== VDIR
);
1757 * If we have an ephemeral id, ACL, or XVATTR then
1758 * make sure file system is at proper version
1761 if (zfsvfs
->z_use_fuids
== B_FALSE
&&
1762 (vsecp
|| (vap
->va_mask
& AT_XVATTR
) || IS_EPHEMERAL(crgetuid(cr
))||
1763 IS_EPHEMERAL(crgetgid(cr
))))
1768 zilog
= zfsvfs
->z_log
;
1770 if (dzp
->z_phys
->zp_flags
& ZFS_XATTR
) {
1775 if (zfsvfs
->z_utf8
&& u8_validate(dirname
,
1776 strlen(dirname
), NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
1780 if (flags
& FIGNORECASE
)
1783 if (vap
->va_mask
& AT_XVATTR
)
1784 if ((error
= secpolicy_xvattr((xvattr_t
*)vap
,
1785 crgetuid(cr
), cr
, vap
->va_type
)) != 0) {
1791 * First make sure the new directory doesn't exist.
1796 if (error
= zfs_dirent_lock(&dl
, dzp
, dirname
, &zp
, zf
,
1802 if (error
= zfs_zaccess(dzp
, ACE_ADD_SUBDIRECTORY
, 0, B_FALSE
, cr
)) {
1803 zfs_dirent_unlock(dl
, 0);
1808 if (vsecp
&& aclp
== NULL
) {
1809 error
= zfs_vsec_2_aclp(zfsvfs
, vap
->va_type
, vsecp
, &aclp
);
1811 zfs_dirent_unlock(dl
, 0);
1817 * Add a new entry to the directory.
1819 tx
= dmu_tx_create(zfsvfs
->z_os
);
1820 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, dirname
);
1821 dmu_tx_hold_zap(tx
, DMU_NEW_OBJECT
, FALSE
, NULL
);
1822 if ((aclp
&& aclp
->z_has_fuids
) || IS_EPHEMERAL(crgetuid(cr
)) ||
1823 IS_EPHEMERAL(crgetgid(cr
))) {
1824 if (zfsvfs
->z_fuid_obj
== 0) {
1825 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
1826 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
1827 FUID_SIZE_ESTIMATE(zfsvfs
));
1828 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
, FALSE
, NULL
);
1830 dmu_tx_hold_bonus(tx
, zfsvfs
->z_fuid_obj
);
1831 dmu_tx_hold_write(tx
, zfsvfs
->z_fuid_obj
, 0,
1832 FUID_SIZE_ESTIMATE(zfsvfs
));
1835 if ((dzp
->z_phys
->zp_flags
& ZFS_INHERIT_ACE
) || aclp
)
1836 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
1837 0, SPA_MAXBLOCKSIZE
);
1838 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1840 zfs_dirent_unlock(dl
, 0);
1841 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
1856 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, 0, aclp
, &fuidp
);
1862 * Now put new name in parent dir.
1864 (void) zfs_link_create(dl
, zp
, tx
, ZNEW
);
1868 txtype
= zfs_log_create_txtype(Z_DIR
, vsecp
, vap
);
1869 if (flags
& FIGNORECASE
)
1871 zfs_log_create(zilog
, tx
, txtype
, dzp
, zp
, dirname
, vsecp
, fuidp
, vap
);
1874 zfs_fuid_info_free(fuidp
);
1877 zfs_dirent_unlock(dl
, 0);
1884 * Remove a directory subdir entry. If the current working
1885 * directory is the same as the subdir to be removed, the
1888 * IN: dvp - vnode of directory to remove from.
1889 * name - name of directory to be removed.
1890 * cwd - vnode of current working directory.
1891 * cr - credentials of caller.
1892 * ct - caller context
1893 * flags - case flags
1895 * RETURN: 0 if success
1896 * error code if failure
1899 * dvp - ctime|mtime updated
1903 zfs_rmdir(vnode_t
*dvp
, char *name
, vnode_t
*cwd
, cred_t
*cr
,
1904 caller_context_t
*ct
, int flags
)
1906 znode_t
*dzp
= VTOZ(dvp
);
1909 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
1918 zilog
= zfsvfs
->z_log
;
1920 if (flags
& FIGNORECASE
)
1926 * Attempt to lock directory; fail if entry doesn't exist.
1928 if (error
= zfs_dirent_lock(&dl
, dzp
, name
, &zp
, zflg
,
1936 if (error
= zfs_zaccess_delete(dzp
, zp
, cr
)) {
1940 if (vp
->v_type
!= VDIR
) {
1950 vnevent_rmdir(vp
, dvp
, name
, ct
);
1953 * Grab a lock on the parent pointer to make sure we play well
1954 * with the treewalk and directory rename code.
1956 rw_enter(&zp
->z_parent_lock
, RW_WRITER
);
1958 tx
= dmu_tx_create(zfsvfs
->z_os
);
1959 dmu_tx_hold_zap(tx
, dzp
->z_id
, FALSE
, name
);
1960 dmu_tx_hold_bonus(tx
, zp
->z_id
);
1961 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
1962 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1964 rw_exit(&zp
->z_parent_lock
);
1965 rw_exit(&zp
->z_name_lock
);
1966 zfs_dirent_unlock(dl
, 0);
1968 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
1978 /* Purge cache entries, while still holding locks. */
1982 error
= zfs_link_destroy(dl
, zp
, tx
, zflg
, NULL
);
1985 uint64_t txtype
= TX_RMDIR
;
1986 if (flags
& FIGNORECASE
)
1988 zfs_log_remove(zilog
, tx
, txtype
, dzp
, name
);
1993 rw_exit(&zp
->z_parent_lock
);
1994 rw_exit(&zp
->z_name_lock
);
1996 zfs_dirent_unlock(dl
, 0);
2005 * Read as many directory entries as will fit into the provided
2006 * buffer from the given directory cursor position (specified in
2007 * the uio structure.
2009 * IN: vp - vnode of directory to read.
2010 * uio - structure supplying read location, range info,
2011 * and return buffer.
2012 * cr - credentials of caller.
2013 * ct - caller context
2014 * flags - case flags
2016 * OUT: uio - updated offset and range, buffer filled.
2017 * eofp - set to true if end-of-file detected.
2019 * RETURN: 0 if success
2020 * error code if failure
2023 * vp - atime updated
2025 * Note that the low 4 bits of the cookie returned by zap is always zero.
2026 * This allows us to use the low range for "special" directory entries:
2027 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
2028 * we use the offset 2 for the '.zfs' directory.
2032 zfs_readdir(vnode_t
*vp
, uio_t
*uio
, cred_t
*cr
, int *eofp
, int *ncookies
, u_long
**cookies
)
2034 znode_t
*zp
= VTOZ(vp
);
2038 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2043 zap_attribute_t zap
;
2044 uint_t bytes_wanted
;
2045 uint64_t offset
; /* must be unsigned; checks for < 1 */
2050 boolean_t check_sysattrs
;
2053 u_long
*cooks
= NULL
;
2056 dprintf("zfs_readdir called\n");
2062 * If we are not given an eof variable,
2069 * Check for valid iov_len.
2071 if (uio
->uio_iov
->iov_len
<= 0) {
2077 * Quit if directory has been removed (posix)
2079 if ((*eofp
= zp
->z_unlinked
) != 0) {
2086 offset
= uio
->uio_loffset
;
2087 prefetch
= zp
->z_zn_prefetch
;
2090 * Initialize the iterator cursor.
2094 * Start iteration from the beginning of the directory.
2096 zap_cursor_init(&zc
, os
, zp
->z_id
);
2099 * The offset is a serialized cursor.
2101 zap_cursor_init_serialized(&zc
, os
, zp
->z_id
, offset
);
2105 * Get space to change directory entries into fs independent format.
2107 iovp
= uio
->uio_iov
;
2108 bytes_wanted
= iovp
->iov_len
;
2109 if (!VMSPACE_IS_KERNEL_P(uio
->uio_vmspace
) || uio
->uio_iovcnt
!= 1) {
2110 bufsize
= bytes_wanted
;
2111 outbuf
= kmem_alloc(bufsize
, KM_SLEEP
);
2112 memset(outbuf
, 0, bufsize
);
2113 odp
= (struct dirent64
*)outbuf
;
2115 bufsize
= bytes_wanted
;
2116 odp
= (struct dirent64
*)iovp
->iov_base
;
2118 eodp
= (struct edirent
*)odp
;
2120 if (ncookies
!= NULL
) {
2122 * Minimum entry size is dirent size and 1 byte for a file name.
2124 ncooks
= uio
->uio_resid
/ _DIRENT_MINSIZE(odp
);
2125 // sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
2126 cooks
= kmem_alloc(ncooks
* sizeof(u_long
), KM_SLEEP
);
2128 memset(cooks
, 0, ncooks
* sizeof(u_long
));
2134 * If this VFS supports the system attribute view interface; and
2135 * we're looking at an extended attribute directory; and we care
2136 * about normalization conflicts on this vfs; then we must check
2137 * for normalization conflicts with the sysattr name space.
2140 check_sysattrs
= vfs_has_feature(vp
->v_vfsp
, VFSFT_SYSATTR_VIEWS
) &&
2141 (vp
->v_flag
& V_XATTRDIR
) && zfsvfs
->z_norm
&&
2142 (flags
& V_RDDIR_ENTFLAGS
);
2148 * Transform to file-system independent format
2151 while (outcount
< bytes_wanted
) {
2157 * Special case `.', `..', and `.zfs'.
2160 (void) strcpy(zap
.za_name
, ".");
2161 zap
.za_normalization_conflict
= 0;
2164 } else if (offset
== 1) {
2165 (void) strcpy(zap
.za_name
, "..");
2166 zap
.za_normalization_conflict
= 0;
2167 objnum
= zp
->z_phys
->zp_parent
;
2169 } else if (offset
== 2 && zfs_show_ctldir(zp
)) {
2170 (void) strcpy(zap
.za_name
, ZFS_CTLDIR_NAME
);
2171 zap
.za_normalization_conflict
= 0;
2172 objnum
= ZFSCTL_INO_ROOT
;
2178 if (error
= zap_cursor_retrieve(&zc
, &zap
)) {
2179 if ((*eofp
= (error
== ENOENT
)) != 0)
2185 if (zap
.za_integer_length
!= 8 ||
2186 zap
.za_num_integers
!= 1) {
2187 cmn_err(CE_WARN
, "zap_readdir: bad directory "
2188 "entry, obj = %lld, offset = %lld\n",
2189 (u_longlong_t
)zp
->z_id
,
2190 (u_longlong_t
)offset
);
2195 objnum
= ZFS_DIRENT_OBJ(zap
.za_first_integer
);
2197 * MacOS X can extract the object type here such as:
2198 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2200 type
= ZFS_DIRENT_TYPE(zap
.za_first_integer
);
2202 if (check_sysattrs
&& !zap
.za_normalization_conflict
) {
2204 zap
.za_normalization_conflict
=
2205 xattr_sysattr_casechk(zap
.za_name
);
2207 panic("%s:%u: TODO", __func__
, __LINE__
);
2212 if (flags
& V_RDDIR_ENTFLAGS
)
2213 reclen
= EDIRENT_RECLEN(strlen(zap
.za_name
));
2215 reclen
= _DIRENT_RECLEN(odp
, strlen(zap
.za_name
));
2218 * Will this entry fit in the buffer?
2220 if (outcount
+ reclen
> bufsize
) {
2222 * Did we manage to fit anything in the buffer?
2230 if (flags
& V_RDDIR_ENTFLAGS
) {
2232 * Add extended flag entry:
2234 eodp
->ed_ino
= objnum
;
2235 eodp
->ed_reclen
= reclen
;
2236 /* NOTE: ed_off is the offset for the *next* entry */
2237 next
= &(eodp
->ed_off
);
2238 eodp
->ed_eflags
= zap
.za_normalization_conflict
?
2239 ED_CASE_CONFLICT
: 0;
2240 (void) strncpy(eodp
->ed_name
, zap
.za_name
,
2241 EDIRENT_NAMELEN(reclen
));
2242 eodp
= (edirent_t
*)((intptr_t)eodp
+ reclen
);
2247 odp
->d_ino
= objnum
;
2248 odp
->d_reclen
= reclen
;
2249 odp
->d_namlen
= strlen(zap
.za_name
);
2250 (void) strlcpy(odp
->d_name
, zap
.za_name
, odp
->d_namlen
+ 1);
2252 odp
= (dirent64_t
*)((intptr_t)odp
+ reclen
);
2256 KASSERT(outcount
<= bufsize
);
2258 /* Prefetch znode */
2260 dmu_prefetch(os
, objnum
, 0, 0);
2263 * Move to the next entry, fill in the previous offset.
2265 if (offset
> 2 || (offset
== 2 && !zfs_show_ctldir(zp
))) {
2266 zap_cursor_advance(&zc
);
2267 offset
= zap_cursor_serialize(&zc
);
2272 if (cooks
!= NULL
) {
2275 KASSERT(ncooks
>= 0);
2278 zp
->z_zn_prefetch
= B_FALSE
; /* a lookup will re-enable pre-fetching */
2280 /* Subtract unused cookies */
2281 if (ncookies
!= NULL
)
2282 *ncookies
-= ncooks
;
2284 if (VMSPACE_IS_KERNEL_P(uio
->uio_vmspace
) && uio
->uio_iovcnt
== 1) {
2285 iovp
->iov_base
+= outcount
;
2286 iovp
->iov_len
-= outcount
;
2287 uio
->uio_resid
-= outcount
;
2288 } else if (error
= uiomove(outbuf
, (long)outcount
, UIO_READ
, uio
)) {
2290 * Reset the pointer.
2292 offset
= uio
->uio_loffset
;
2296 zap_cursor_fini(&zc
);
2297 if (!VMSPACE_IS_KERNEL_P(uio
->uio_vmspace
) || uio
->uio_iovcnt
!= 1)
2298 kmem_free(outbuf
, bufsize
);
2300 if (error
== ENOENT
)
2303 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
2305 uio
->uio_loffset
= offset
;
2307 if (error
!= 0 && cookies
!= NULL
) {
2308 kmem_free(*cookies
, ncooks
* sizeof(u_long
));
2315 ulong_t zfs_fsync_sync_cnt
= 4;
2318 zfs_fsync(vnode_t
*vp
, int syncflag
, cred_t
*cr
, caller_context_t
*ct
)
2320 znode_t
*zp
= VTOZ(vp
);
2321 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2326 dprintf("zfs_fsync called vp %p -- zfsvfs %p\n", vp
, zfsvfs
);
2327 (void) tsd_set(zfs_fsyncer_key
, (void *)zfs_fsync_sync_cnt
);
2332 * NetBSD: if the sync is from reclaim or from ioflush,
2333 * push dirty atime now. No need to lock: in the reclaim
2334 * case, everything is single threaded and for ioflush this
2335 * is a lazy writeback.
2337 * XXXNETBSD: in the ioflush case, we don't want to push anything
2338 * to disk immediately. We just want to queue the update so it
2339 * will happen "soon". Check this is the case otherwise zfs will
2342 if (zp
->z_atime_dirty
&& zp
->z_unlinked
== 0 &&
2343 (syncflag
& (FSYNC_RECLAIM
| FSYNC_LAZY
)) != 0) {
2344 dmu_tx_t
*tx
= dmu_tx_create(zfsvfs
->z_os
);
2346 dmu_tx_hold_bonus(tx
, zp
->z_id
);
2347 error
= dmu_tx_assign(tx
, TXG_WAIT
);
2351 dmu_buf_will_dirty(zp
->z_dbuf
, tx
);
2352 mutex_enter(&zp
->z_lock
);
2353 zp
->z_atime_dirty
= 0;
2354 mutex_exit(&zp
->z_lock
);
2358 zil_commit(zfsvfs
->z_log
, zp
->z_last_itx
, zp
->z_id
);
2365 * Get the requested file attributes and place them in the provided
2368 * IN: vp - vnode of file.
2369 * vap - va_mask identifies requested attributes.
2370 * If AT_XVATTR set, then optional attrs are requested
2371 * flags - ATTR_NOACLCHECK (CIFS server context)
2372 * cr - credentials of caller.
2373 * ct - caller context
2375 * OUT: vap - attribute values.
2377 * RETURN: 0 (always succeeds)
2381 zfs_getattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
,
2382 caller_context_t
*ct
)
2384 znode_t
*zp
= VTOZ(vp
);
2385 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2389 u_longlong_t nblocks
;
2391 xvattr_t
*xvap
= (xvattr_t
*)vap
; /* vap may be an xvattr_t * */
2392 xoptattr_t
*xoap
= NULL
;
2393 boolean_t skipaclchk
= (flags
& ATTR_NOACLCHECK
) ? B_TRUE
: B_FALSE
;
2395 dprintf("zfs_getattr called\n");
2401 mutex_enter(&zp
->z_lock
);
2404 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2405 * Also, if we are the owner don't bother, since owner should
2406 * always be allowed to read basic attributes of file.
2408 if (!(pzp
->zp_flags
& ZFS_ACL_TRIVIAL
) &&
2409 (pzp
->zp_uid
!= crgetuid(cr
))) {
2410 if (error
= zfs_zaccess(zp
, ACE_READ_ATTRIBUTES
, 0,
2412 mutex_exit(&zp
->z_lock
);
2419 * Return all attributes. It's cheaper to provide the answer
2420 * than to determine whether we were asked the question.
2423 vap
->va_type
= IFTOVT(pzp
->zp_mode
);
2424 vap
->va_mode
= pzp
->zp_mode
& ~S_IFMT
;
2425 zfs_fuid_map_ids(zp
, cr
, &vap
->va_uid
, &vap
->va_gid
);
2426 vap
->va_nodeid
= zp
->z_id
;
2427 if ((vp
->v_flag
& VROOT
) && zfs_show_ctldir(zp
))
2428 links
= pzp
->zp_links
+ 1;
2430 links
= pzp
->zp_links
;
2431 vap
->va_nlink
= MIN(links
, UINT32_MAX
); /* nlink_t limit! */
2432 vap
->va_size
= pzp
->zp_size
;
2433 vap
->va_fsid
= vp
->v_mount
->mnt_stat
.f_fsidx
.__fsid_val
[0];
2434 // vap->va_fsid = 0;
2435 vap
->va_rdev
= zfs_cmpldev(pzp
->zp_rdev
);
2436 vap
->va_seq
= zp
->z_seq
;
2437 vap
->va_flags
= 0; /* FreeBSD: Reset chflags(2) flags. */
2440 * Add in any requested optional attributes and the create time.
2441 * Also set the corresponding bits in the returned attribute bitmap.
2443 if ((xoap
= xva_getxoptattr(xvap
)) != NULL
&& zfsvfs
->z_use_fuids
) {
2444 if (XVA_ISSET_REQ(xvap
, XAT_ARCHIVE
)) {
2446 ((pzp
->zp_flags
& ZFS_ARCHIVE
) != 0);
2447 XVA_SET_RTN(xvap
, XAT_ARCHIVE
);
2450 if (XVA_ISSET_REQ(xvap
, XAT_READONLY
)) {
2451 xoap
->xoa_readonly
=
2452 ((pzp
->zp_flags
& ZFS_READONLY
) != 0);
2453 XVA_SET_RTN(xvap
, XAT_READONLY
);
2456 if (XVA_ISSET_REQ(xvap
, XAT_SYSTEM
)) {
2458 ((pzp
->zp_flags
& ZFS_SYSTEM
) != 0);
2459 XVA_SET_RTN(xvap
, XAT_SYSTEM
);
2462 if (XVA_ISSET_REQ(xvap
, XAT_HIDDEN
)) {
2464 ((pzp
->zp_flags
& ZFS_HIDDEN
) != 0);
2465 XVA_SET_RTN(xvap
, XAT_HIDDEN
);
2468 if (XVA_ISSET_REQ(xvap
, XAT_NOUNLINK
)) {
2469 xoap
->xoa_nounlink
=
2470 ((pzp
->zp_flags
& ZFS_NOUNLINK
) != 0);
2471 XVA_SET_RTN(xvap
, XAT_NOUNLINK
);
2474 if (XVA_ISSET_REQ(xvap
, XAT_IMMUTABLE
)) {
2475 xoap
->xoa_immutable
=
2476 ((pzp
->zp_flags
& ZFS_IMMUTABLE
) != 0);
2477 XVA_SET_RTN(xvap
, XAT_IMMUTABLE
);
2480 if (XVA_ISSET_REQ(xvap
, XAT_APPENDONLY
)) {
2481 xoap
->xoa_appendonly
=
2482 ((pzp
->zp_flags
& ZFS_APPENDONLY
) != 0);
2483 XVA_SET_RTN(xvap
, XAT_APPENDONLY
);
2486 if (XVA_ISSET_REQ(xvap
, XAT_NODUMP
)) {
2488 ((pzp
->zp_flags
& ZFS_NODUMP
) != 0);
2489 XVA_SET_RTN(xvap
, XAT_NODUMP
);
2492 if (XVA_ISSET_REQ(xvap
, XAT_OPAQUE
)) {
2494 ((pzp
->zp_flags
& ZFS_OPAQUE
) != 0);
2495 XVA_SET_RTN(xvap
, XAT_OPAQUE
);
2498 if (XVA_ISSET_REQ(xvap
, XAT_AV_QUARANTINED
)) {
2499 xoap
->xoa_av_quarantined
=
2500 ((pzp
->zp_flags
& ZFS_AV_QUARANTINED
) != 0);
2501 XVA_SET_RTN(xvap
, XAT_AV_QUARANTINED
);
2504 if (XVA_ISSET_REQ(xvap
, XAT_AV_MODIFIED
)) {
2505 xoap
->xoa_av_modified
=
2506 ((pzp
->zp_flags
& ZFS_AV_MODIFIED
) != 0);
2507 XVA_SET_RTN(xvap
, XAT_AV_MODIFIED
);
2510 if (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
) &&
2511 vp
->v_type
== VREG
&&
2512 (pzp
->zp_flags
& ZFS_BONUS_SCANSTAMP
)) {
2514 dmu_object_info_t doi
;
2517 * Only VREG files have anti-virus scanstamps, so we
2518 * won't conflict with symlinks in the bonus buffer.
2520 dmu_object_info_from_db(zp
->z_dbuf
, &doi
);
2521 len
= sizeof (xoap
->xoa_av_scanstamp
) +
2522 sizeof (znode_phys_t
);
2523 if (len
<= doi
.doi_bonus_size
) {
2525 * pzp points to the start of the
2526 * znode_phys_t. pzp + 1 points to the
2527 * first byte after the znode_phys_t.
2529 (void) memcpy(xoap
->xoa_av_scanstamp
,
2531 sizeof (xoap
->xoa_av_scanstamp
));
2532 XVA_SET_RTN(xvap
, XAT_AV_SCANSTAMP
);
2536 if (XVA_ISSET_REQ(xvap
, XAT_CREATETIME
)) {
2537 ZFS_TIME_DECODE(&xoap
->xoa_createtime
, pzp
->zp_crtime
);
2538 XVA_SET_RTN(xvap
, XAT_CREATETIME
);
2542 ZFS_TIME_DECODE(&vap
->va_atime
, pzp
->zp_atime
);
2543 ZFS_TIME_DECODE(&vap
->va_mtime
, pzp
->zp_mtime
);
2544 ZFS_TIME_DECODE(&vap
->va_ctime
, pzp
->zp_ctime
);
2545 ZFS_TIME_DECODE(&vap
->va_birthtime
, pzp
->zp_crtime
);
2547 mutex_exit(&zp
->z_lock
);
2549 dmu_object_size_from_db(zp
->z_dbuf
, &blksize
, &nblocks
);
2550 vap
->va_blksize
= blksize
;
2551 vap
->va_bytes
= nblocks
<< 9; /* nblocks * 512 */
2553 if (zp
->z_blksz
== 0) {
2555 * Block size hasn't been set; suggest maximal I/O transfers.
2557 vap
->va_blksize
= zfsvfs
->z_max_blksz
;
2565 * Set the file attributes to the values contained in the
2568 * IN: vp - vnode of file to be modified.
2569 * vap - new attribute values.
2570 * If AT_XVATTR set, then optional attrs are being set
2571 * flags - ATTR_UTIME set if non-default time values provided.
2572 * - ATTR_NOACLCHECK (CIFS context only).
2573 * cr - credentials of caller.
2574 * ct - caller context
2576 * RETURN: 0 if success
2577 * error code if failure
2580 * vp - ctime updated, mtime updated if size changed.
2584 zfs_setattr(vnode_t
*vp
, vattr_t
*vap
, int flags
, cred_t
*cr
,
2585 caller_context_t
*ct
)
2587 znode_t
*zp
= VTOZ(vp
);
2589 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
2593 uint_t mask
= vap
->va_mask
;
2598 int need_policy
= FALSE
;
2600 zfs_fuid_info_t
*fuidp
= NULL
;
2601 xvattr_t
*xvap
= (xvattr_t
*)vap
; /* vap may be an xvattr_t * */
2603 zfs_acl_t
*aclp
= NULL
;
2604 boolean_t skipaclchk
= (flags
& ATTR_NOACLCHECK
) ? B_TRUE
: B_FALSE
;
2606 dprintf("zfs_setattr called\n");
2611 if (mask
& AT_NOSET
)
2618 zilog
= zfsvfs
->z_log
;
2621 * Make sure that if we have ephemeral uid/gid or xvattr specified
2622 * that file system is at proper version level
2625 if (zfsvfs
->z_use_fuids
== B_FALSE
&&
2626 (((mask
& AT_UID
) && IS_EPHEMERAL(vap
->va_uid
)) ||
2627 ((mask
& AT_GID
) && IS_EPHEMERAL(vap
->va_gid
)) ||
2628 (mask
& AT_XVATTR
))) {
2633 if (mask
& AT_SIZE
&& vp
->v_type
== VDIR
) {
2638 if (mask
& AT_SIZE
&& vp
->v_type
!= VREG
&& vp
->v_type
!= VFIFO
) {
2644 * If this is an xvattr_t, then get a pointer to the structure of
2645 * optional attributes. If this is NULL, then we have a vattr_t.
2647 xoap
= xva_getxoptattr(xvap
);
2650 * Immutable files can only alter immutable bit and atime
2652 if ((pzp
->zp_flags
& ZFS_IMMUTABLE
) &&
2653 ((mask
& (AT_SIZE
|AT_UID
|AT_GID
|AT_MTIME
|AT_MODE
)) ||
2654 ((mask
& AT_XVATTR
) && XVA_ISSET_REQ(xvap
, XAT_CREATETIME
)))) {
2659 if ((mask
& AT_SIZE
) && (pzp
->zp_flags
& ZFS_READONLY
)) {
2665 * Verify timestamps doesn't overflow 32 bits.
2666 * ZFS can handle large timestamps, but 32bit syscalls can't
2667 * handle times greater than 2039. This check should be removed
2668 * once large timestamps are fully supported.
2670 if (mask
& (AT_ATIME
| AT_MTIME
)) {
2671 if (((mask
& AT_ATIME
) && TIMESPEC_OVERFLOW(&vap
->va_atime
)) ||
2672 ((mask
& AT_MTIME
) && TIMESPEC_OVERFLOW(&vap
->va_mtime
))) {
2681 if (zfsvfs
->z_vfs
->vfs_flag
& VFS_RDONLY
) {
2687 * First validate permissions
2689 if (mask
& AT_SIZE
) {
2690 err
= zfs_zaccess(zp
, ACE_WRITE_DATA
, 0, skipaclchk
, cr
);
2696 * XXX - Note, we are not providing any open
2697 * mode flags here (like FNDELAY), so we may
2698 * block if there are locks present... this
2699 * should be addressed in openat().
2701 /* XXX - would it be OK to generate a log record here? */
2702 err
= zfs_freesp(zp
, vap
->va_size
, 0, 0, FALSE
);
2709 if (mask
& (AT_ATIME
|AT_MTIME
) ||
2710 ((mask
& AT_XVATTR
) && (XVA_ISSET_REQ(xvap
, XAT_HIDDEN
) ||
2711 XVA_ISSET_REQ(xvap
, XAT_READONLY
) ||
2712 XVA_ISSET_REQ(xvap
, XAT_ARCHIVE
) ||
2713 XVA_ISSET_REQ(xvap
, XAT_CREATETIME
) ||
2714 XVA_ISSET_REQ(xvap
, XAT_SYSTEM
))))
2715 need_policy
= zfs_zaccess(zp
, ACE_WRITE_ATTRIBUTES
, 0,
2718 if (mask
& (AT_UID
|AT_GID
)) {
2719 int idmask
= (mask
& (AT_UID
|AT_GID
));
2724 * NOTE: even if a new mode is being set,
2725 * we may clear S_ISUID/S_ISGID bits.
2728 if (!(mask
& AT_MODE
))
2729 vap
->va_mode
= pzp
->zp_mode
;
2732 * Take ownership or chgrp to group we are a member of
2735 take_owner
= (mask
& AT_UID
) && (vap
->va_uid
== crgetuid(cr
));
2736 take_group
= (mask
& AT_GID
) &&
2737 zfs_groupmember(zfsvfs
, vap
->va_gid
, cr
);
2740 * If both AT_UID and AT_GID are set then take_owner and
2741 * take_group must both be set in order to allow taking
2744 * Otherwise, send the check through secpolicy_vnode_setattr()
2748 if (((idmask
== (AT_UID
|AT_GID
)) && take_owner
&& take_group
) ||
2749 ((idmask
== AT_UID
) && take_owner
) ||
2750 ((idmask
== AT_GID
) && take_group
)) {
2751 if (zfs_zaccess(zp
, ACE_WRITE_OWNER
, 0,
2752 skipaclchk
, cr
) == 0) {
2754 * Remove setuid/setgid for non-privileged users
2756 secpolicy_setid_clear(vap
, cr
);
2757 trim_mask
= (mask
& (AT_UID
|AT_GID
));
2766 mutex_enter(&zp
->z_lock
);
2767 oldva
.va_mode
= pzp
->zp_mode
;
2768 zfs_fuid_map_ids(zp
, cr
, &oldva
.va_uid
, &oldva
.va_gid
);
2769 if (mask
& AT_XVATTR
) {
2770 if ((need_policy
== FALSE
) &&
2771 (XVA_ISSET_REQ(xvap
, XAT_APPENDONLY
) &&
2772 xoap
->xoa_appendonly
!=
2773 ((pzp
->zp_flags
& ZFS_APPENDONLY
) != 0)) ||
2774 (XVA_ISSET_REQ(xvap
, XAT_NOUNLINK
) &&
2775 xoap
->xoa_nounlink
!=
2776 ((pzp
->zp_flags
& ZFS_NOUNLINK
) != 0)) ||
2777 (XVA_ISSET_REQ(xvap
, XAT_IMMUTABLE
) &&
2778 xoap
->xoa_immutable
!=
2779 ((pzp
->zp_flags
& ZFS_IMMUTABLE
) != 0)) ||
2780 (XVA_ISSET_REQ(xvap
, XAT_NODUMP
) &&
2782 ((pzp
->zp_flags
& ZFS_NODUMP
) != 0)) ||
2783 (XVA_ISSET_REQ(xvap
, XAT_AV_MODIFIED
) &&
2784 xoap
->xoa_av_modified
!=
2785 ((pzp
->zp_flags
& ZFS_AV_MODIFIED
) != 0)) ||
2786 ((XVA_ISSET_REQ(xvap
, XAT_AV_QUARANTINED
) &&
2787 ((vp
->v_type
!= VREG
&& xoap
->xoa_av_quarantined
) ||
2788 xoap
->xoa_av_quarantined
!=
2789 ((pzp
->zp_flags
& ZFS_AV_QUARANTINED
) != 0)))) ||
2790 (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
)) ||
2791 (XVA_ISSET_REQ(xvap
, XAT_OPAQUE
))) {
2795 mutex_exit(&zp
->z_lock
);
2797 if (mask
& AT_MODE
) {
2798 if (zfs_zaccess(zp
, ACE_WRITE_ACL
, 0, skipaclchk
, cr
) == 0) {
2799 err
= secpolicy_setid_setsticky_clear(vp
, vap
,
2805 trim_mask
|= AT_MODE
;
2813 * If trim_mask is set then take ownership
2814 * has been granted or write_acl is present and user
2815 * has the ability to modify mode. In that case remove
2816 * UID|GID and or MODE from mask so that
2817 * secpolicy_vnode_setattr() doesn't revoke it.
2821 saved_mask
= vap
->va_mask
;
2822 vap
->va_mask
&= ~trim_mask
;
2824 err
= secpolicy_vnode_setattr(cr
, vp
, vap
, &oldva
, flags
,
2825 (int (*)(void *, int, cred_t
*))zfs_zaccess_unix
, zp
);
2832 vap
->va_mask
|= saved_mask
;
2835 * secpolicy_vnode_setattr, or take ownership may have
2838 mask
= vap
->va_mask
;
2840 tx
= dmu_tx_create(zfsvfs
->z_os
);
2841 dmu_tx_hold_bonus(tx
, zp
->z_id
);
2842 if (((mask
& AT_UID
) && IS_EPHEMERAL(vap
->va_uid
)) ||
2843 ((mask
& AT_GID
) && IS_EPHEMERAL(vap
->va_gid
))) {
2844 if (zfsvfs
->z_fuid_obj
== 0) {
2845 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
2846 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
2847 FUID_SIZE_ESTIMATE(zfsvfs
));
2848 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
, FALSE
, NULL
);
2850 dmu_tx_hold_bonus(tx
, zfsvfs
->z_fuid_obj
);
2851 dmu_tx_hold_write(tx
, zfsvfs
->z_fuid_obj
, 0,
2852 FUID_SIZE_ESTIMATE(zfsvfs
));
2856 if (mask
& AT_MODE
) {
2857 uint64_t pmode
= pzp
->zp_mode
;
2859 new_mode
= (pmode
& S_IFMT
) | (vap
->va_mode
& ~S_IFMT
);
2861 if (err
= zfs_acl_chmod_setattr(zp
, &aclp
, new_mode
)) {
2866 if (pzp
->zp_acl
.z_acl_extern_obj
) {
2867 /* Are we upgrading ACL from old V0 format to new V1 */
2868 if (zfsvfs
->z_version
<= ZPL_VERSION_FUID
&&
2869 pzp
->zp_acl
.z_acl_version
==
2870 ZFS_ACL_VERSION_INITIAL
) {
2871 dmu_tx_hold_free(tx
,
2872 pzp
->zp_acl
.z_acl_extern_obj
, 0,
2874 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
2875 0, aclp
->z_acl_bytes
);
2877 dmu_tx_hold_write(tx
,
2878 pzp
->zp_acl
.z_acl_extern_obj
, 0,
2881 } else if (aclp
->z_acl_bytes
> ZFS_ACE_SPACE
) {
2882 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
,
2883 0, aclp
->z_acl_bytes
);
2887 if ((mask
& (AT_UID
| AT_GID
)) && pzp
->zp_xattr
!= 0) {
2888 err
= zfs_zget(zp
->z_zfsvfs
, pzp
->zp_xattr
, &attrzp
);
2896 dmu_tx_hold_bonus(tx
, attrzp
->z_id
);
2899 err
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
2902 VN_RELE(ZTOV(attrzp
));
2909 if (err
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
2919 dmu_buf_will_dirty(zp
->z_dbuf
, tx
);
2922 * Set each attribute requested.
2923 * We group settings according to the locks they need to acquire.
2925 * Note: you cannot set ctime directly, although it will be
2926 * updated as a side-effect of calling this function.
2929 mutex_enter(&zp
->z_lock
);
2931 if (mask
& AT_MODE
) {
2932 mutex_enter(&zp
->z_acl_lock
);
2933 zp
->z_phys
->zp_mode
= new_mode
;
2934 err
= zfs_aclset_common(zp
, aclp
, cr
, &fuidp
, tx
);
2935 ASSERT3U(err
, ==, 0);
2936 mutex_exit(&zp
->z_acl_lock
);
2940 mutex_enter(&attrzp
->z_lock
);
2942 if (mask
& AT_UID
) {
2943 pzp
->zp_uid
= zfs_fuid_create(zfsvfs
,
2944 vap
->va_uid
, cr
, ZFS_OWNER
, tx
, &fuidp
);
2946 attrzp
->z_phys
->zp_uid
= zfs_fuid_create(zfsvfs
,
2947 vap
->va_uid
, cr
, ZFS_OWNER
, tx
, &fuidp
);
2951 if (mask
& AT_GID
) {
2952 pzp
->zp_gid
= zfs_fuid_create(zfsvfs
, vap
->va_gid
,
2953 cr
, ZFS_GROUP
, tx
, &fuidp
);
2955 attrzp
->z_phys
->zp_gid
= zfs_fuid_create(zfsvfs
,
2956 vap
->va_gid
, cr
, ZFS_GROUP
, tx
, &fuidp
);
2963 mutex_exit(&attrzp
->z_lock
);
2965 if (mask
& AT_ATIME
)
2966 ZFS_TIME_ENCODE(&vap
->va_atime
, pzp
->zp_atime
);
2968 if (mask
& AT_MTIME
)
2969 ZFS_TIME_ENCODE(&vap
->va_mtime
, pzp
->zp_mtime
);
2971 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2973 zfs_time_stamper_locked(zp
, CONTENT_MODIFIED
, tx
);
2975 zfs_time_stamper_locked(zp
, STATE_CHANGED
, tx
);
2977 * Do this after setting timestamps to prevent timestamp
2978 * update from toggling bit
2981 if (xoap
&& (mask
& AT_XVATTR
)) {
2982 if (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
)) {
2984 dmu_object_info_t doi
;
2986 ASSERT(vp
->v_type
== VREG
);
2988 /* Grow the bonus buffer if necessary. */
2989 dmu_object_info_from_db(zp
->z_dbuf
, &doi
);
2990 len
= sizeof (xoap
->xoa_av_scanstamp
) +
2991 sizeof (znode_phys_t
);
2992 if (len
> doi
.doi_bonus_size
)
2993 VERIFY(dmu_set_bonus(zp
->z_dbuf
, len
, tx
) == 0);
2995 zfs_xvattr_set(zp
, xvap
);
2999 zfs_log_setattr(zilog
, tx
, TX_SETATTR
, zp
, vap
, mask
, fuidp
);
3002 zfs_fuid_info_free(fuidp
);
3003 mutex_exit(&zp
->z_lock
);
3006 VN_RELE(ZTOV(attrzp
));
3014 typedef struct zfs_zlock
{
3015 krwlock_t
*zl_rwlock
; /* lock we acquired */
3016 znode_t
*zl_znode
; /* znode we held */
3017 struct zfs_zlock
*zl_next
; /* next in list */
3021 * Drop locks and release vnodes that were held by zfs_rename_lock().
3024 zfs_rename_unlock(zfs_zlock_t
**zlpp
)
3028 while ((zl
= *zlpp
) != NULL
) {
3029 if (zl
->zl_znode
!= NULL
)
3030 VN_RELE(ZTOV(zl
->zl_znode
));
3031 rw_exit(zl
->zl_rwlock
);
3032 *zlpp
= zl
->zl_next
;
3033 kmem_free(zl
, sizeof (*zl
));
3038 * Search back through the directory tree, using the ".." entries.
3039 * Lock each directory in the chain to prevent concurrent renames.
3040 * Fail any attempt to move a directory into one of its own descendants.
3041 * XXX - z_parent_lock can overlap with map or grow locks
3044 zfs_rename_lock(znode_t
*szp
, znode_t
*tdzp
, znode_t
*sdzp
, zfs_zlock_t
**zlpp
)
3048 uint64_t rootid
= zp
->z_zfsvfs
->z_root
;
3049 uint64_t *oidp
= &zp
->z_id
;
3050 krwlock_t
*rwlp
= &szp
->z_parent_lock
;
3051 krw_t rw
= RW_WRITER
;
3054 * First pass write-locks szp and compares to zp->z_id.
3055 * Later passes read-lock zp and compare to zp->z_parent.
3058 if (!rw_tryenter(rwlp
, rw
)) {
3060 * Another thread is renaming in this path.
3061 * Note that if we are a WRITER, we don't have any
3062 * parent_locks held yet.
3064 if (rw
== RW_READER
&& zp
->z_id
> szp
->z_id
) {
3066 * Drop our locks and restart
3068 zfs_rename_unlock(&zl
);
3072 rwlp
= &szp
->z_parent_lock
;
3077 * Wait for other thread to drop its locks
3083 zl
= kmem_alloc(sizeof (*zl
), KM_SLEEP
);
3084 zl
->zl_rwlock
= rwlp
;
3085 zl
->zl_znode
= NULL
;
3086 zl
->zl_next
= *zlpp
;
3089 if (*oidp
== szp
->z_id
) /* We're a descendant of szp */
3092 if (*oidp
== rootid
) /* We've hit the top */
3095 if (rw
== RW_READER
) { /* i.e. not the first pass */
3096 int error
= zfs_zget(zp
->z_zfsvfs
, *oidp
, &zp
);
3101 oidp
= &zp
->z_phys
->zp_parent
;
3102 rwlp
= &zp
->z_parent_lock
;
3105 } while (zp
->z_id
!= sdzp
->z_id
);
3111 * Move an entry from the provided source directory to the target
3112 * directory. Change the entry name as indicated.
3114 * IN: sdvp - Source directory containing the "old entry".
3115 * snm - Old entry name.
3116 * tdvp - Target directory to contain the "new entry".
3117 * tnm - New entry name.
3118 * cr - credentials of caller.
3119 * ct - caller context
3120 * flags - case flags
3122 * RETURN: 0 if success
3123 * error code if failure
3126 * sdvp,tdvp - ctime|mtime updated
3128 /* XXX NetBSD There is significant problem with dirent locking during rename
3129 * of files which are in a same dir. zfs_dirent_lock is then called twice on
3130 * same lock which panics LOCKDEBUG kernel. Locking twice is not needed.
3131 * Proper solution for this is add new flag to zfs_dirent_lock which will
3132 * disable rw_enter in it. Renaming of files in same dir is considered as broken
3133 * on LOCKDEBUG kernels on NetBSD for now.
3137 zfs_rename(vnode_t
*sdvp
, char *snm
, vnode_t
*tdvp
, char *tnm
, cred_t
*cr
,
3138 caller_context_t
*ct
, int flags
)
3140 znode_t
*tdzp
, *szp
, *tzp
;
3141 znode_t
*sdzp
= VTOZ(sdvp
);
3142 zfsvfs_t
*zfsvfs
= sdzp
->z_zfsvfs
;
3145 zfs_dirlock_t
*sdl
, *tdl
;
3148 int cmp
, serr
, terr
;
3156 dprintf("zfs_rename called\n");
3159 ZFS_VERIFY_ZP(sdzp
);
3160 zilog
= zfsvfs
->z_log
;
3163 * Make sure we have the real vp for the target directory.
3165 if (VOP_REALVP(tdvp
, &realvp
, ct
) == 0)
3168 if (tdvp
->v_vfsp
!= sdvp
->v_vfsp
) {
3174 ZFS_VERIFY_ZP(tdzp
);
3175 if (zfsvfs
->z_utf8
&& u8_validate(tnm
,
3176 strlen(tnm
), NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
3181 if (flags
& FIGNORECASE
)
3190 * This is to prevent the creation of links into attribute space
3191 * by renaming a linked file into/outof an attribute directory.
3192 * See the comment in zfs_link() for why this is considered bad.
3194 if ((tdzp
->z_phys
->zp_flags
& ZFS_XATTR
) !=
3195 (sdzp
->z_phys
->zp_flags
& ZFS_XATTR
)) {
3201 * Lock source and target directory entries. To prevent deadlock,
3202 * a lock ordering must be defined. We lock the directory with
3203 * the smallest object id first, or if it's a tie, the one with
3204 * the lexically first name.
3206 if (sdzp
->z_id
< tdzp
->z_id
) {
3208 } else if (sdzp
->z_id
> tdzp
->z_id
) {
3212 * First compare the two name arguments without
3213 * considering any case folding.
3215 int nofold
= (zfsvfs
->z_norm
& ~U8_TEXTPREP_TOUPPER
);
3217 cmp
= u8_strcmp(snm
, tnm
, 0, nofold
, U8_UNICODE_LATEST
, &error
);
3218 ASSERT(error
== 0 || !zfsvfs
->z_utf8
);
3221 * POSIX: "If the old argument and the new argument
3222 * both refer to links to the same existing file,
3223 * the rename() function shall return successfully
3224 * and perform no other action."
3230 * If the file system is case-folding, then we may
3231 * have some more checking to do. A case-folding file
3232 * system is either supporting mixed case sensitivity
3233 * access or is completely case-insensitive. Note
3234 * that the file system is always case preserving.
3236 * In mixed sensitivity mode case sensitive behavior
3237 * is the default. FIGNORECASE must be used to
3238 * explicitly request case insensitive behavior.
3240 * If the source and target names provided differ only
3241 * by case (e.g., a request to rename 'tim' to 'Tim'),
3242 * we will treat this as a special case in the
3243 * case-insensitive mode: as long as the source name
3244 * is an exact match, we will allow this to proceed as
3245 * a name-change request.
3247 if ((zfsvfs
->z_case
== ZFS_CASE_INSENSITIVE
||
3248 (zfsvfs
->z_case
== ZFS_CASE_MIXED
&&
3249 flags
& FIGNORECASE
)) &&
3250 u8_strcmp(snm
, tnm
, 0, zfsvfs
->z_norm
, U8_UNICODE_LATEST
,
3253 * case preserving rename request, require exact
3263 serr
= zfs_dirent_lock(&sdl
, sdzp
, snm
, &szp
,
3264 ZEXISTS
| zflg
, NULL
, NULL
);
3265 if ((serr
== 0) && (sdzp
== tdzp
)) {
3267 * If renaming within the one directory we must
3268 * be careful not to recursively acquire locks.
3272 terr
= zfs_dirent_lock(&tdl
,
3273 tdzp
, tnm
, &tzp
, ZRENAMING
| zflg
, NULL
, NULL
);
3275 terr
= zfs_dirent_lock(&tdl
,
3276 tdzp
, tnm
, &tzp
, zflg
, NULL
, NULL
);
3278 if ((terr
== 0) && (sdzp
== tdzp
)) {
3280 * If renaming within the one directory we must
3281 * be careful not to recursively acquire locks.
3285 serr
= zfs_dirent_lock(&sdl
,
3286 sdzp
, snm
, &szp
, ZEXISTS
| ZRENAMING
| zflg
,
3292 * Source entry invalid or not there.
3295 zfs_dirent_unlock(tdl
, 0);
3299 if (strcmp(snm
, ".") == 0 || strcmp(snm
, "..") == 0)
3306 zfs_dirent_unlock(sdl
, 0);
3308 if (strcmp(tnm
, "..") == 0)
3315 * Must have write access at the source to remove the old entry
3316 * and write access at the target to create the new entry.
3317 * Note that if target and source are the same, this can be
3318 * done in a single check.
3321 if (error
= zfs_zaccess_rename(sdzp
, szp
, tdzp
, tzp
, cr
))
3324 if (ZTOV(szp
)->v_type
== VDIR
) {
3326 * Check to make sure rename is valid.
3327 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3329 if (error
= zfs_rename_lock(szp
, tdzp
, sdzp
, &zl
))
3334 * Does target exist?
3338 * Source and target must be the same type.
3340 if (ZTOV(szp
)->v_type
== VDIR
) {
3341 if (ZTOV(tzp
)->v_type
!= VDIR
) {
3346 if (ZTOV(tzp
)->v_type
== VDIR
) {
3352 * POSIX dictates that when the source and target
3353 * entries refer to the same file object, rename
3354 * must do nothing and exit without error.
3356 if (szp
->z_id
== tzp
->z_id
) {
3362 vnevent_rename_src(ZTOV(szp
), sdvp
, snm
, ct
);
3364 vnevent_rename_dest(ZTOV(tzp
), tdvp
, tnm
, ct
);
3367 * notify the target directory if it is not the same
3368 * as source directory.
3371 vnevent_rename_dest_dir(tdvp
, ct
);
3374 tx
= dmu_tx_create(zfsvfs
->z_os
);
3375 dmu_tx_hold_bonus(tx
, szp
->z_id
); /* nlink changes */
3376 dmu_tx_hold_bonus(tx
, sdzp
->z_id
); /* nlink changes */
3377 dmu_tx_hold_zap(tx
, sdzp
->z_id
, FALSE
, snm
);
3378 dmu_tx_hold_zap(tx
, tdzp
->z_id
, TRUE
, tnm
);
3380 dmu_tx_hold_bonus(tx
, tdzp
->z_id
); /* nlink changes */
3382 dmu_tx_hold_bonus(tx
, tzp
->z_id
); /* parent changes */
3383 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
3384 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
3387 zfs_rename_unlock(&zl
);
3388 zfs_dirent_unlock(sdl
, zflg
);
3389 zfs_dirent_unlock(tdl
, 0);
3393 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
3403 if (tzp
) /* Attempt to remove the existing target */
3404 error
= zfs_link_destroy(tdl
, tzp
, tx
, zflg
, NULL
);
3407 error
= zfs_link_create(tdl
, szp
, tx
, ZRENAMING
);
3409 szp
->z_phys
->zp_flags
|= ZFS_AV_MODIFIED
;
3411 error
= zfs_link_destroy(sdl
, szp
, tx
, ZRENAMING
, NULL
);
3414 zfs_log_rename(zilog
, tx
,
3415 TX_RENAME
| (flags
& FIGNORECASE
? TX_CI
: 0),
3416 sdzp
, sdl
->dl_name
, tdzp
, tdl
->dl_name
, szp
);
3418 /* Update path information for the target vnode */
3419 vn_renamepath(tdvp
, ZTOV(szp
), tnm
, strlen(tnm
));
3422 /* Purge cache entries, while still holding locks. */
3431 zfs_rename_unlock(&zl
);
3433 zfs_dirent_unlock(sdl
, zflg
);
3434 zfs_dirent_unlock(tdl
, 0);
3446 * Insert the indicated symbolic reference entry into the directory.
3448 * IN: dvp - Directory to contain new symbolic link.
3449 * link - Name for new symlink entry.
3450 * vap - Attributes of new entry.
3451 * target - Target path of new symlink.
3452 * cr - credentials of caller.
3453 * ct - caller context
3454 * flags - case flags
3456 * RETURN: 0 if success
3457 * error code if failure
3460 * dvp - ctime|mtime updated
3464 zfs_symlink(vnode_t
*dvp
, vnode_t
**vpp
, char *name
, vattr_t
*vap
, char *link
,
3467 znode_t
*zp
, *dzp
= VTOZ(dvp
);
3470 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
3472 int len
= strlen(link
);
3475 zfs_fuid_info_t
*fuidp
= NULL
;
3478 ASSERT(vap
->va_type
== VLNK
);
3482 zilog
= zfsvfs
->z_log
;
3484 if (zfsvfs
->z_utf8
&& u8_validate(name
, strlen(name
),
3485 NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
3489 if (flags
& FIGNORECASE
)
3492 if (error
= zfs_zaccess(dzp
, ACE_ADD_FILE
, 0, B_FALSE
, cr
)) {
3497 if (len
> MAXPATHLEN
) {
3499 return (ENAMETOOLONG
);
3503 * Attempt to lock directory; fail if entry already exists.
3505 error
= zfs_dirent_lock(&dl
, dzp
, name
, &zp
, zflg
, NULL
, NULL
);
3511 tx
= dmu_tx_create(zfsvfs
->z_os
);
3512 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, MAX(1, len
));
3513 dmu_tx_hold_bonus(tx
, dzp
->z_id
);
3514 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, name
);
3515 if (dzp
->z_phys
->zp_flags
& ZFS_INHERIT_ACE
)
3516 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0, SPA_MAXBLOCKSIZE
);
3517 if (IS_EPHEMERAL(crgetuid(cr
)) || IS_EPHEMERAL(crgetgid(cr
))) {
3518 if (zfsvfs
->z_fuid_obj
== 0) {
3519 dmu_tx_hold_bonus(tx
, DMU_NEW_OBJECT
);
3520 dmu_tx_hold_write(tx
, DMU_NEW_OBJECT
, 0,
3521 FUID_SIZE_ESTIMATE(zfsvfs
));
3522 dmu_tx_hold_zap(tx
, MASTER_NODE_OBJ
, FALSE
, NULL
);
3524 dmu_tx_hold_bonus(tx
, zfsvfs
->z_fuid_obj
);
3525 dmu_tx_hold_write(tx
, zfsvfs
->z_fuid_obj
, 0,
3526 FUID_SIZE_ESTIMATE(zfsvfs
));
3529 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
3531 zfs_dirent_unlock(dl
, 0);
3532 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
3542 dmu_buf_will_dirty(dzp
->z_dbuf
, tx
);
3545 * Create a new object for the symlink.
3546 * Put the link content into bonus buffer if it will fit;
3547 * otherwise, store it just like any other file data.
3549 if (sizeof (znode_phys_t
) + len
<= dmu_bonus_max()) {
3550 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, len
, NULL
, &fuidp
);
3552 bcopy(link
, zp
->z_phys
+ 1, len
);
3556 zfs_mknode(dzp
, vap
, tx
, cr
, 0, &zp
, 0, NULL
, &fuidp
);
3558 * Nothing can access the znode yet so no locking needed
3559 * for growing the znode's blocksize.
3561 zfs_grow_blocksize(zp
, len
, tx
);
3563 VERIFY(0 == dmu_buf_hold(zfsvfs
->z_os
,
3564 zp
->z_id
, 0, FTAG
, &dbp
));
3565 dmu_buf_will_dirty(dbp
, tx
);
3567 ASSERT3U(len
, <=, dbp
->db_size
);
3568 bcopy(link
, dbp
->db_data
, len
);
3569 dmu_buf_rele(dbp
, FTAG
);
3571 zp
->z_phys
->zp_size
= len
;
3574 * Insert the new object into the directory.
3576 (void) zfs_link_create(dl
, zp
, tx
, ZNEW
);
3579 uint64_t txtype
= TX_SYMLINK
;
3580 if (flags
& FIGNORECASE
)
3582 zfs_log_symlink(zilog
, tx
, txtype
, dzp
, zp
, name
, link
);
3586 zfs_fuid_info_free(fuidp
);
3590 zfs_dirent_unlock(dl
, 0);
3597 * Return, in the buffer contained in the provided uio structure,
3598 * the symbolic path referred to by vp.
3600 * IN: vp - vnode of symbolic link.
3601 * uoip - structure to contain the link path.
3602 * cr - credentials of caller.
3603 * ct - caller context
3605 * OUT: uio - structure to contain the link path.
3607 * RETURN: 0 if success
3608 * error code if failure
3611 * vp - atime updated
3615 zfs_readlink(vnode_t
*vp
, uio_t
*uio
, cred_t
*cr
, caller_context_t
*ct
)
3617 znode_t
*zp
= VTOZ(vp
);
3618 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
3625 bufsz
= (size_t)zp
->z_phys
->zp_size
;
3626 if (bufsz
+ sizeof (znode_phys_t
) <= zp
->z_dbuf
->db_size
) {
3627 error
= uiomove(zp
->z_phys
+ 1,
3628 MIN((size_t)bufsz
, uio
->uio_resid
), UIO_READ
, uio
);
3631 error
= dmu_buf_hold(zfsvfs
->z_os
, zp
->z_id
, 0, FTAG
, &dbp
);
3636 error
= uiomove(dbp
->db_data
,
3637 MIN((size_t)bufsz
, uio
->uio_resid
), UIO_READ
, uio
);
3638 dmu_buf_rele(dbp
, FTAG
);
3641 ZFS_ACCESSTIME_STAMP(zfsvfs
, zp
);
3647 * Insert a new entry into directory tdvp referencing svp.
3649 * IN: tdvp - Directory to contain new entry.
3650 * svp - vnode of new entry.
3651 * name - name of new entry.
3652 * cr - credentials of caller.
3653 * ct - caller context
3655 * RETURN: 0 if success
3656 * error code if failure
3659 * tdvp - ctime|mtime updated
3660 * svp - ctime updated
3664 zfs_link(vnode_t
*tdvp
, vnode_t
*svp
, char *name
, cred_t
*cr
,
3665 caller_context_t
*ct
, int flags
)
3667 znode_t
*dzp
= VTOZ(tdvp
);
3669 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
3678 ASSERT(tdvp
->v_type
== VDIR
);
3682 zilog
= zfsvfs
->z_log
;
3684 if (VOP_REALVP(svp
, &realvp
, ct
) == 0)
3687 if (svp
->v_vfsp
!= tdvp
->v_vfsp
) {
3694 if (zfsvfs
->z_utf8
&& u8_validate(name
,
3695 strlen(name
), NULL
, U8_VALIDATE_ENTIRE
, &error
) < 0) {
3699 if (flags
& FIGNORECASE
)
3704 * We do not support links between attributes and non-attributes
3705 * because of the potential security risk of creating links
3706 * into "normal" file space in order to circumvent restrictions
3707 * imposed in attribute space.
3709 if ((szp
->z_phys
->zp_flags
& ZFS_XATTR
) !=
3710 (dzp
->z_phys
->zp_flags
& ZFS_XATTR
)) {
3716 * POSIX dictates that we return EPERM here.
3717 * Better choices include ENOTSUP or EISDIR.
3719 if (svp
->v_type
== VDIR
) {
3724 owner
= zfs_fuid_map_id(zfsvfs
, szp
->z_phys
->zp_uid
, cr
, ZFS_OWNER
);
3725 if (owner
!= crgetuid(cr
) &&
3726 secpolicy_basic_link(cr
) != 0) {
3731 if (error
= zfs_zaccess(dzp
, ACE_ADD_FILE
, 0, B_FALSE
, cr
)) {
3737 * Attempt to lock directory; fail if entry already exists.
3739 error
= zfs_dirent_lock(&dl
, dzp
, name
, &tzp
, zf
, NULL
, NULL
);
3745 tx
= dmu_tx_create(zfsvfs
->z_os
);
3746 dmu_tx_hold_bonus(tx
, szp
->z_id
);
3747 dmu_tx_hold_zap(tx
, dzp
->z_id
, TRUE
, name
);
3748 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
3750 zfs_dirent_unlock(dl
, 0);
3751 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
3761 error
= zfs_link_create(dl
, szp
, tx
, 0);
3764 uint64_t txtype
= TX_LINK
;
3765 if (flags
& FIGNORECASE
)
3767 zfs_log_link(zilog
, tx
, txtype
, dzp
, szp
, name
);
3772 zfs_dirent_unlock(dl
, 0);
3775 vnevent_link(svp
, ct
);
3784 /* CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid)); */
3785 /* CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid)); */
3789 zfs_fid(vnode_t
*vp
, fid_t
*fidp
, caller_context_t
*ct
)
3791 znode_t
*zp
= VTOZ(vp
);
3792 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
3794 uint64_t object
= zp
->z_id
;
3800 gen
= (uint32_t)zp
->z_gen
;
3802 size
= (zfsvfs
->z_parent
!= zfsvfs
) ? LONG_FID_LEN
: SHORT_FID_LEN
;
3803 fidp
->fid_len
= size
;
3805 zfid
= (zfid_short_t
*)fidp
;
3807 zfid
->zf_len
= size
;
3809 for (i
= 0; i
< sizeof (zfid
->zf_object
); i
++)
3810 zfid
->zf_object
[i
] = (uint8_t)(object
>> (8 * i
));
3812 /* Must have a non-zero generation number to distinguish from .zfs */
3815 for (i
= 0; i
< sizeof (zfid
->zf_gen
); i
++)
3816 zfid
->zf_gen
[i
] = (uint8_t)(gen
>> (8 * i
));
3818 if (size
== LONG_FID_LEN
) {
3819 uint64_t objsetid
= dmu_objset_id(zfsvfs
->z_os
);
3822 zlfid
= (zfid_long_t
*)fidp
;
3824 for (i
= 0; i
< sizeof (zlfid
->zf_setid
); i
++)
3825 zlfid
->zf_setid
[i
] = (uint8_t)(objsetid
>> (8 * i
));
3827 /* XXX - this should be the generation number for the objset */
3828 for (i
= 0; i
< sizeof (zlfid
->zf_setgen
); i
++)
3829 zlfid
->zf_setgen
[i
] = 0;
3837 zfs_pathconf(vnode_t
*vp
, int cmd
, ulong_t
*valp
, cred_t
*cr
,
3838 caller_context_t
*ct
)
3850 case _PC_FILESIZEBITS
:
3855 case _PC_XATTR_EXISTS
:
3857 zfsvfs
= zp
->z_zfsvfs
;
3861 error
= zfs_dirent_lock(&dl
, zp
, "", &xzp
,
3862 ZXATTR
| ZEXISTS
| ZSHARED
, NULL
, NULL
);
3864 zfs_dirent_unlock(dl
, 0);
3865 if (!zfs_dirempty(xzp
))
3868 } else if (error
== ENOENT
) {
3870 * If there aren't extended attributes, it's the
3871 * same as having zero of them.
3879 case _PC_ACL_EXTENDED
:
3880 *valp
= 0; /* TODO */
3883 case _PC_MIN_HOLE_SIZE
:
3884 *valp
= (int)SPA_MINBLOCKSIZE
;
3888 return (EOPNOTSUPP
);
3893 zfs_netbsd_open(struct vop_open_args
*ap
)
3895 vnode_t
*vp
= ap
->a_vp
;
3896 znode_t
*zp
= VTOZ(vp
);
3899 error
= zfs_open(&vp
, ap
->a_mode
, ap
->a_cred
, NULL
);
3905 zfs_netbsd_close(struct vop_close_args
*ap
)
3908 return (zfs_close(ap
->a_vp
, ap
->a_fflag
, 0, 0, ap
->a_cred
, NULL
));
3912 zfs_netbsd_ioctl(struct vop_ioctl_args
*ap
)
3915 return (zfs_ioctl(ap
->a_vp
, ap
->a_command
, (intptr_t)ap
->a_data
,
3916 ap
->a_fflag
, ap
->a_cred
, NULL
, NULL
));
3921 zfs_netbsd_read(struct vop_read_args
*ap
)
3924 return (zfs_read(ap
->a_vp
, ap
->a_uio
, ap
->a_ioflag
, ap
->a_cred
, NULL
));
3928 zfs_netbsd_write(struct vop_write_args
*ap
)
3931 return (zfs_write(ap
->a_vp
, ap
->a_uio
, ap
->a_ioflag
, ap
->a_cred
, NULL
));
3935 zfs_netbsd_access(struct vop_access_args
*ap
)
3939 * ZFS itself only knowns about VREAD, VWRITE and VEXEC, the rest
3940 * we have to handle by calling vaccess().
3942 if ((ap
->a_mode
& ~(VREAD
|VWRITE
|VEXEC
)) != 0) {
3943 vnode_t
*vp
= ap
->a_vp
;
3944 znode_t
*zp
= VTOZ(vp
);
3945 znode_phys_t
*zphys
= zp
->z_phys
;
3947 return (vaccess(vp
->v_type
, zphys
->zp_mode
, zphys
->zp_uid
,
3948 zphys
->zp_gid
, ap
->a_mode
, ap
->a_cred
));
3951 return (zfs_access(ap
->a_vp
, ap
->a_mode
, 0, ap
->a_cred
, NULL
));
3955 zfs_netbsd_lookup(struct vop_lookup_args
*ap
)
3957 struct componentname
*cnp
= ap
->a_cnp
;
3958 char nm
[NAME_MAX
+ 1];
3961 ASSERT(cnp
->cn_namelen
< sizeof(nm
));
3962 strlcpy(nm
, cnp
->cn_nameptr
, MIN(cnp
->cn_namelen
+ 1, sizeof(nm
)));
3964 err
= zfs_lookup(ap
->a_dvp
, nm
, ap
->a_vpp
, cnp
, cnp
->cn_nameiop
,
3971 zfs_netbsd_create(struct vop_create_args
*ap
)
3973 struct componentname
*cnp
= ap
->a_cnp
;
3974 vattr_t
*vap
= ap
->a_vap
;
3977 ASSERT(cnp
->cn_flags
& SAVENAME
);
3979 vattr_init_mask(vap
);
3980 mode
= vap
->va_mode
& ALLPERMS
;
3982 return (zfs_create(ap
->a_dvp
, (char *)cnp
->cn_nameptr
, vap
, !EXCL
, mode
,
3983 ap
->a_vpp
, cnp
->cn_cred
));
3987 zfs_netbsd_remove(struct vop_remove_args
*ap
)
3990 ASSERT(ap
->a_cnp
->cn_flags
& SAVENAME
);
3992 return (zfs_remove(ap
->a_dvp
, (char *)ap
->a_cnp
->cn_nameptr
,
3993 ap
->a_cnp
->cn_cred
, NULL
, 0));
3997 zfs_netbsd_mkdir(struct vop_mkdir_args
*ap
)
3999 vattr_t
*vap
= ap
->a_vap
;
4001 ASSERT(ap
->a_cnp
->cn_flags
& SAVENAME
);
4003 vattr_init_mask(vap
);
4005 return (zfs_mkdir(ap
->a_dvp
, (char *)ap
->a_cnp
->cn_nameptr
, vap
, ap
->a_vpp
,
4006 ap
->a_cnp
->cn_cred
, NULL
, 0, NULL
));
4010 zfs_netbsd_rmdir(struct vop_rmdir_args
*ap
)
4012 struct componentname
*cnp
= ap
->a_cnp
;
4014 ASSERT(cnp
->cn_flags
& SAVENAME
);
4016 return (zfs_rmdir(ap
->a_dvp
, (char *)cnp
->cn_nameptr
, NULL
, cnp
->cn_cred
, NULL
, 0));
4020 zfs_netbsd_readdir(struct vop_readdir_args
*ap
)
4023 return (zfs_readdir(ap
->a_vp
, ap
->a_uio
, ap
->a_cred
, ap
->a_eofflag
,
4024 ap
->a_ncookies
, (u_long
**)ap
->a_cookies
));
4028 zfs_netbsd_fsync(struct vop_fsync_args
*ap
)
4031 return (zfs_fsync(ap
->a_vp
, ap
->a_flags
, ap
->a_cred
, NULL
));
4035 zfs_netbsd_getattr(struct vop_getattr_args
*ap
)
4037 vattr_t
*vap
= ap
->a_vap
;
4043 xvap
.xva_vattr
= *vap
;
4044 xvap
.xva_vattr
.va_mask
|= AT_XVATTR
;
4046 /* Convert chflags into ZFS-type flags. */
4047 /* XXX: what about SF_SETTABLE?. */
4048 XVA_SET_REQ(&xvap
, XAT_IMMUTABLE
);
4049 XVA_SET_REQ(&xvap
, XAT_APPENDONLY
);
4050 XVA_SET_REQ(&xvap
, XAT_NOUNLINK
);
4051 XVA_SET_REQ(&xvap
, XAT_NODUMP
);
4052 error
= zfs_getattr(ap
->a_vp
, (vattr_t
*)&xvap
, 0, ap
->a_cred
, NULL
);
4056 /* Convert ZFS xattr into chflags. */
4057 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4058 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4059 fflags |= (fflag); \
4061 FLAG_CHECK(SF_IMMUTABLE
, XAT_IMMUTABLE
,
4062 xvap
.xva_xoptattrs
.xoa_immutable
);
4063 FLAG_CHECK(SF_APPEND
, XAT_APPENDONLY
,
4064 xvap
.xva_xoptattrs
.xoa_appendonly
);
4065 FLAG_CHECK(SF_NOUNLINK
, XAT_NOUNLINK
,
4066 xvap
.xva_xoptattrs
.xoa_nounlink
);
4067 FLAG_CHECK(UF_NODUMP
, XAT_NODUMP
,
4068 xvap
.xva_xoptattrs
.xoa_nodump
);
4070 *vap
= xvap
.xva_vattr
;
4071 vap
->va_flags
= fflags
;
4076 zfs_netbsd_setattr(struct vop_setattr_args
*ap
)
4078 vnode_t
*vp
= ap
->a_vp
;
4079 vattr_t
*vap
= ap
->a_vap
;
4080 cred_t
*cred
= ap
->a_cred
;
4085 vattr_init_mask(vap
);
4086 vap
->va_mask
&= ~AT_NOSET
;
4089 xvap
.xva_vattr
= *vap
;
4091 zflags
= VTOZ(vp
)->z_phys
->zp_flags
;
4093 if (vap
->va_flags
!= VNOVAL
) {
4096 fflags
= vap
->va_flags
;
4097 if ((fflags
& ~(SF_IMMUTABLE
|SF_APPEND
|SF_NOUNLINK
|UF_NODUMP
)) != 0)
4098 return (EOPNOTSUPP
);
4100 * Callers may only modify the file flags on objects they
4101 * have VADMIN rights for.
4103 if ((error
= VOP_ACCESS(vp
, VWRITE
, cred
)) != 0)
4106 * Unprivileged processes are not permitted to unset system
4107 * flags, or modify flags if any system flags are set.
4108 * Privileged non-jail processes may not modify system flags
4109 * if securelevel > 0 and any existing system flags are set.
4110 * Privileged jail processes behave like privileged non-jail
4111 * processes if the security.jail.chflags_allowed sysctl is
4112 * is non-zero; otherwise, they behave like unprivileged
4115 if (kauth_authorize_system(cred
, KAUTH_SYSTEM_CHSYSFLAGS
, 0,
4116 NULL
, NULL
, NULL
) != 0) {
4119 (ZFS_IMMUTABLE
| ZFS_APPENDONLY
| ZFS_NOUNLINK
)) {
4123 (SF_IMMUTABLE
| SF_APPEND
| SF_NOUNLINK
)) {
4128 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4129 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4130 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4131 XVA_SET_REQ(&xvap, (xflag)); \
4132 (xfield) = ((fflags & (fflag)) != 0); \
4135 /* Convert chflags into ZFS-type flags. */
4136 /* XXX: what about SF_SETTABLE?. */
4137 FLAG_CHANGE(SF_IMMUTABLE
, ZFS_IMMUTABLE
, XAT_IMMUTABLE
,
4138 xvap
.xva_xoptattrs
.xoa_immutable
);
4139 FLAG_CHANGE(SF_APPEND
, ZFS_APPENDONLY
, XAT_APPENDONLY
,
4140 xvap
.xva_xoptattrs
.xoa_appendonly
);
4141 FLAG_CHANGE(SF_NOUNLINK
, ZFS_NOUNLINK
, XAT_NOUNLINK
,
4142 xvap
.xva_xoptattrs
.xoa_nounlink
);
4143 FLAG_CHANGE(UF_NODUMP
, ZFS_NODUMP
, XAT_NODUMP
,
4144 xvap
.xva_xoptattrs
.xoa_nodump
);
4147 return (zfs_setattr(vp
, (vattr_t
*)&xvap
, 0, cred
, NULL
));
4151 zfs_netbsd_rename(ap
)
4152 struct vop_rename_args
/* {
4153 struct vnode *a_fdvp;
4154 struct vnode *a_fvp;
4155 struct componentname *a_fcnp;
4156 struct vnode *a_tdvp;
4157 struct vnode *a_tvp;
4158 struct componentname *a_tcnp;
4161 vnode_t
*fdvp
= ap
->a_fdvp
;
4162 vnode_t
*fvp
= ap
->a_fvp
;
4163 vnode_t
*tdvp
= ap
->a_tdvp
;
4164 vnode_t
*tvp
= ap
->a_tvp
;
4167 ASSERT(ap
->a_fcnp
->cn_flags
& (SAVENAME
|SAVESTART
));
4168 ASSERT(ap
->a_tcnp
->cn_flags
& (SAVENAME
|SAVESTART
));
4170 error
= zfs_rename(fdvp
, (char *)ap
->a_fcnp
->cn_nameptr
, tdvp
,
4171 (char *)ap
->a_tcnp
->cn_nameptr
, ap
->a_fcnp
->cn_cred
, NULL
, 0);
4186 zfs_netbsd_symlink(struct vop_symlink_args
*ap
)
4188 struct componentname
*cnp
= ap
->a_cnp
;
4189 vattr_t
*vap
= ap
->a_vap
;
4191 ASSERT(cnp
->cn_flags
& SAVENAME
);
4193 vap
->va_type
= VLNK
; /* Netbsd: Syscall only sets va_mode. */
4194 vattr_init_mask(vap
);
4196 return (zfs_symlink(ap
->a_dvp
, ap
->a_vpp
, (char *)cnp
->cn_nameptr
, vap
,
4197 ap
->a_target
, cnp
->cn_cred
));
4201 zfs_netbsd_readlink(struct vop_readlink_args
*ap
)
4204 return (zfs_readlink(ap
->a_vp
, ap
->a_uio
, ap
->a_cred
, NULL
));
4208 zfs_netbsd_link(struct vop_link_args
*ap
)
4210 struct componentname
*cnp
= ap
->a_cnp
;
4212 ASSERT(cnp
->cn_flags
& SAVENAME
);
4214 return (zfs_link(ap
->a_dvp
, ap
->a_vp
, (char *)cnp
->cn_nameptr
, cnp
->cn_cred
, NULL
, 0));
4218 zfs_netbsd_inactive(struct vop_inactive_args
*ap
)
4220 vnode_t
*vp
= ap
->a_vp
;
4221 znode_t
*zp
= VTOZ(vp
);
4224 * NetBSD: nothing to do here, other than indicate if the
4225 * vnode should be reclaimed. No need to lock, if we race
4226 * vrele() will call us again.
4228 *ap
->a_recycle
= (zp
->z_unlinked
!= 0);
4234 * Destroy znode from taskq thread without ZFS_OBJ_MUTEX held.
4237 zfs_reclaim_deferred(void *arg
, int pending
)
4240 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
4241 uint64_t z_id
= zp
->z_id
;
4244 * Don't allow a zfs_zget() while were trying to release this znode
4246 ZFS_OBJ_HOLD_ENTER(zfsvfs
, z_id
);
4248 /* Don't need to call ZFS_OBJ_HOLD_EXIT zfs_inactive did thatfor us. */
4254 zfs_netbsd_reclaim(struct vop_reclaim_args
*ap
)
4256 vnode_t
*vp
= ap
->a_vp
;
4257 znode_t
*zp
= VTOZ(vp
);
4264 KASSERT(!vn_has_cached_data(vp
));
4266 zfsvfs
= zp
->z_zfsvfs
;
4268 mutex_enter(&zp
->z_lock
);
4271 // dprintf("destroying znode %p -- vnode %p -- zp->z_buf = %p\n", zp, ZTOV(zp), zp->z_dbuf);
4272 // rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
4273 genfs_node_destroy(vp
);
4276 if (zp
->z_dbuf
== NULL
) {
4278 * The fs has been unmounted, or we did a
4279 * suspend/resume and this file no longer exists.
4281 rw_exit(&zfsvfs
->z_teardown_inactive_lock
);
4282 mutex_exit(&zp
->z_lock
);
4286 mutex_exit(&zp
->z_lock
);
4288 mutex_enter(&zp
->z_lock
);
4289 if (!zp
->z_unlinked
) {
4291 * XXX Hack because ZFS_OBJ_MUTEX is held we can't call zfs_zinactive
4292 * now. I need to defer zfs_zinactive to another thread which doesn't hold this mutex.
4294 locked
= MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs
, zp
->z_id
)) ? 2 :
4295 ZFS_OBJ_HOLD_TRYENTER(zfsvfs
, zp
->z_id
);
4298 * Lock can't be obtained due to deadlock possibility,
4299 * so defer znode destruction.
4301 taskq_dispatch(system_taskq
, zfs_reclaim_deferred
, zp
, 0);
4303 zfs_znode_dmu_fini(zp
);
4304 /* Our LWP is holding ZFS_OBJ_HELD mutex but it was locked before
4305 zfs_zinactive was called therefore we can't release it. */
4307 ZFS_OBJ_HOLD_EXIT(zfsvfs
, zp
->z_id
);
4311 mutex_exit(&zp
->z_lock
);
4314 vp
->v_data
= NULL
; /* v_data must be NULL for a cleaned vnode. */
4320 zfs_netbsd_fid(struct vop_fid_args
*ap
)
4323 return (zfs_fid(ap
->a_vp
, (void *)ap
->a_fid
, NULL
));
4327 zfs_netbsd_pathconf(struct vop_pathconf_args
*ap
)
4332 error
= zfs_pathconf(ap
->a_vp
, ap
->a_name
, &val
, curthread
->l_cred
, NULL
);
4334 *ap
->a_retval
= val
;
4335 else if (error
== EOPNOTSUPP
) {
4336 switch (ap
->a_name
) {
4338 *ap
->a_retval
= NAME_MAX
;
4341 *ap
->a_retval
= PATH_MAX
;
4344 *ap
->a_retval
= LINK_MAX
;
4347 *ap
->a_retval
= MAX_CANON
;
4350 *ap
->a_retval
= MAX_INPUT
;
4353 *ap
->a_retval
= PIPE_BUF
;
4355 case _PC_CHOWN_RESTRICTED
:
4359 *ap
->a_retval
= _POSIX_VDISABLE
;
4370 zfs_netbsd_lock(struct vop_lock_args
*ap
)
4372 struct vnode
*vp
= ap
->a_vp
;
4373 int flags
= ap
->a_flags
;
4375 if ((flags
& LK_INTERLOCK
) != 0) {
4376 mutex_exit(&vp
->v_interlock
);
4383 zfs_netbsd_unlock(void *v
)
4390 zfs_netbsd_getpages(void *v)
4392 struct vnode *vp = ((struct vop_getpages_args *)v)->a_vp;
4393 voff_t offset = ((struct vop_getpages_args *)v)->a_offset;
4394 struct vm_page **m = ((struct vop_getpages_args *)v)->a_m;
4395 int *count = ((struct vop_getpages_args *)v)->a_count;
4396 int centeridx = ((struct vop_getpages_args *)v)->a_centeridx;
4397 vm_prot_t access_type = ((struct vop_getpages_args *)v)->a_access_type;
4398 int advice = ((struct vop_getpages_args *)v)->a_advice;
4399 int flags = ((struct vop_getpages_args *)v)->a_flags;
4405 KASSERT(!vn_has_cached_data(vp));
4406 mutex_exit(&vp->v_interlock);
4413 zfs_netbsd_putpages(void *v
)
4415 struct vnode
*vp
= ((struct vop_putpages_args
*)v
)->a_vp
;
4416 voff_t offlo
= ((struct vop_putpages_args
*)v
)->a_offlo
;
4417 voff_t offhi
= ((struct vop_putpages_args
*)v
)->a_offhi
;
4418 int flags
= ((struct vop_putpages_args
*)v
)->a_flags
;
4419 znode_t
*zp
= VTOZ(vp
);
4423 dprintf("putpages entry %p -- zfsvfs %p\n", vp
, zp
->z_zfsvfs
);
4424 error
= genfs_putpages(v
);
4425 dprintf("putpages exit %p -- zfsvfs %p\n", vp
, zp
->z_zfsvfs
);
4430 #define zfs_netbsd_seek genfs_seek
4431 #define zfs_netbsd_mmap genfs_mmap
4432 #define zfs_netbsd_getpages genfs_compat_getpages
4433 //#define zfs_netbsd_putpages genfs_putpages
4434 #define zfs_netbsd_islocked genfs_islocked
4436 int (**zfs_vnodeop_p
)(void *);
4437 const struct vnodeopv_entry_desc zfs_vnodeop_entries
[] = {
4438 { &vop_default_desc
, vn_default_error
},
4439 { &vop_lookup_desc
, zfs_netbsd_lookup
},
4440 { &vop_create_desc
, zfs_netbsd_create
},
4441 { &vop_open_desc
, zfs_netbsd_open
},
4442 { &vop_close_desc
, zfs_netbsd_close
},
4443 { &vop_access_desc
, zfs_netbsd_access
},
4444 { &vop_getattr_desc
, zfs_netbsd_getattr
},
4445 { &vop_setattr_desc
, zfs_netbsd_setattr
},
4446 { &vop_read_desc
, zfs_netbsd_read
},
4447 { &vop_write_desc
, zfs_netbsd_write
},
4448 { &vop_ioctl_desc
, zfs_netbsd_ioctl
},
4449 { &vop_fsync_desc
, zfs_netbsd_fsync
},
4450 { &vop_remove_desc
, zfs_netbsd_remove
},
4451 { &vop_link_desc
, zfs_netbsd_link
},
4452 { &vop_lock_desc
, zfs_netbsd_lock
},
4453 { &vop_unlock_desc
, zfs_netbsd_unlock
},
4454 { &vop_rename_desc
, zfs_netbsd_rename
},
4455 { &vop_mkdir_desc
, zfs_netbsd_mkdir
},
4456 { &vop_rmdir_desc
, zfs_netbsd_rmdir
},
4457 { &vop_symlink_desc
, zfs_netbsd_symlink
},
4458 { &vop_readdir_desc
, zfs_netbsd_readdir
},
4459 { &vop_readlink_desc
, zfs_netbsd_readlink
},
4460 { &vop_inactive_desc
, zfs_netbsd_inactive
},
4461 { &vop_reclaim_desc
, zfs_netbsd_reclaim
},
4462 { &vop_pathconf_desc
, zfs_netbsd_pathconf
},
4463 { &vop_seek_desc
, zfs_netbsd_seek
},
4464 { &vop_getpages_desc
, zfs_netbsd_getpages
},
4465 { &vop_putpages_desc
, zfs_netbsd_putpages
},
4466 { &vop_mmap_desc
, zfs_netbsd_mmap
},
4467 { &vop_islocked_desc
, zfs_netbsd_islocked
},
4469 { &vop_advlock_desc
, zfs_netbsd_advlock
},
4470 { &vop_fcntl_desc
, zfs_netbsd_fcntl
},
4471 { &vop_bmap_desc
, zfs_netbsd_bmap
},
4472 { &vop_strategy_desc
, zfs_netbsd_strategy
},
4473 { &vop_print_desc
, zfs_netbsd_print
},
4474 { &vop_bwrite_desc
, zfs_netbsd_bwrite
},
4479 const struct vnodeopv_desc zfs_vnodeop_opv_desc
=
4480 { &zfs_vnodeop_p
, zfs_vnodeop_entries
};
4483 struct vop_vector zfs_vnodeops
;
4484 struct vop_vector zfs_fifoops
;
4488 struct vop_vector zfs_vnodeops
= {
4489 .vop_default
= &default_vnodeops
,
4490 .vop_inactive
= zfs_netbsd_inactive
,
4491 .vop_reclaim
= zfs_netbsd_reclaim
,
4492 .vop_access
= zfs_netbsd_access
,
4493 .vop_lookup
= zfs_netbsd_lookup
,
4494 .vop_getattr
= zfs_netbsd_getattr
,
4495 .vop_setattr
= zfs_netbsd_setattr
,
4496 .vop_create
= zfs_netbsd_create
,
4497 .vop_mknod
= zfs_netbsd_create
,
4498 .vop_mkdir
= zfs_netbsd_mkdir
,
4499 .vop_readdir
= zfs_netbsd_readdir
,
4500 .vop_fsync
= zfs_netbsd_fsync
,
4501 .vop_open
= zfs_netbsd_open
,
4502 .vop_close
= zfs_netbsd_close
,
4503 .vop_rmdir
= zfs_netbsd_rmdir
,
4504 .vop_ioctl
= zfs_netbsd_ioctl
,
4505 .vop_link
= zfs_netbsd_link
,
4506 .vop_lock
= zfs_netbsd_lock
,
4507 .vop_unlock
= zfs_netbsd_unlock
,
4508 .vop_symlink
= zfs_netbsd_symlink
,
4509 .vop_readlink
= zfs_netbsd_readlink
,
4510 .vop_read
= zfs_netbsd_read
,
4511 .vop_write
= zfs_netbsd_write
,
4512 .vop_remove
= zfs_netbsd_remove
,
4513 .vop_rename
= zfs_netbsd_rename
,
4514 .vop_pathconf
= zfs_netbsd_pathconf
,
4515 .vop_bmap
= VOP_EOPNOTSUPP
,
4516 .vop_fid
= zfs_netbsd_fid
,
4517 .vop_getextattr
= zfs_getextattr
,
4518 .vop_deleteextattr
= zfs_deleteextattr
,
4519 .vop_setextattr
= zfs_setextattr
,
4520 .vop_listextattr
= zfs_listextattr
,
4522 .vop_getacl
= zfs_netbsd_getacl
,
4523 .vop_setacl
= zfs_netbsd_setacl
,
4524 .vop_aclcheck
= zfs_netbsd_aclcheck
,
4528 struct vop_vector zfs_fifoops
= {
4529 .vop_default
= &fifo_specops
,
4530 .vop_fsync
= VOP_PANIC
,
4531 .vop_access
= zfs_netbsd_access
,
4532 .vop_getattr
= zfs_netbsd_getattr
,
4533 .vop_inactive
= zfs_netbsd_inactive
,
4534 .vop_read
= VOP_PANIC
,
4535 .vop_reclaim
= zfs_netbsd_reclaim
,
4536 .vop_setattr
= zfs_netbsd_setattr
,
4537 .vop_write
= VOP_PANIC
,
4538 .vop_fid
= zfs_netbsd_fid
,
4540 .vop_getacl
= zfs_netbsd_getacl
,
4541 .vop_setacl
= zfs_netbsd_setacl
,
4542 .vop_aclcheck
= zfs_netbsd_aclcheck
,