FreeBSD: Lock vnode in zfs_ioctl()
[zfs.git] / module / os / freebsd / zfs / zfs_vnops_os.c
blob95a947e3d7d2858735ef4135c67c61108197a1b4
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <security/mac/mac_framework.h>
38 #include <sys/vfs.h>
39 #include <sys/endian.h>
40 #include <sys/vm.h>
41 #include <sys/vnode.h>
42 #include <sys/smr.h>
43 #include <sys/dirent.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/kmem.h>
47 #include <sys/taskq.h>
48 #include <sys/uio.h>
49 #include <sys/atomic.h>
50 #include <sys/namei.h>
51 #include <sys/mman.h>
52 #include <sys/cmn_err.h>
53 #include <sys/kdb.h>
54 #include <sys/sysproto.h>
55 #include <sys/errno.h>
56 #include <sys/unistd.h>
57 #include <sys/zfs_dir.h>
58 #include <sys/zfs_ioctl.h>
59 #include <sys/fs/zfs.h>
60 #include <sys/dmu.h>
61 #include <sys/dmu_objset.h>
62 #include <sys/spa.h>
63 #include <sys/txg.h>
64 #include <sys/dbuf.h>
65 #include <sys/zap.h>
66 #include <sys/sa.h>
67 #include <sys/policy.h>
68 #include <sys/sunddi.h>
69 #include <sys/filio.h>
70 #include <sys/sid.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_fuid.h>
73 #include <sys/zfs_quota.h>
74 #include <sys/zfs_sa.h>
75 #include <sys/zfs_rlock.h>
76 #include <sys/bio.h>
77 #include <sys/buf.h>
78 #include <sys/sched.h>
79 #include <sys/acl.h>
80 #include <sys/vmmeter.h>
81 #include <vm/vm_param.h>
82 #include <sys/zil.h>
83 #include <sys/zfs_vnops.h>
84 #include <sys/module.h>
85 #include <sys/sysent.h>
86 #include <sys/dmu_impl.h>
87 #include <sys/brt.h>
88 #include <sys/zfeature.h>
90 #include <vm/vm_object.h>
92 #include <sys/extattr.h>
93 #include <sys/priv.h>
95 #ifndef VN_OPEN_INVFS
96 #define VN_OPEN_INVFS 0x0
97 #endif
99 VFS_SMR_DECLARE;
101 #ifdef DEBUG_VFS_LOCKS
102 #define VNCHECKREF(vp) \
103 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
104 ("%s: wrong ref counts", __func__));
105 #else
106 #define VNCHECKREF(vp)
107 #endif
109 #if __FreeBSD_version >= 1400045
110 typedef uint64_t cookie_t;
111 #else
112 typedef ulong_t cookie_t;
113 #endif
116 * Programming rules.
118 * Each vnode op performs some logical unit of work. To do this, the ZPL must
119 * properly lock its in-core state, create a DMU transaction, do the work,
120 * record this work in the intent log (ZIL), commit the DMU transaction,
121 * and wait for the intent log to commit if it is a synchronous operation.
122 * Moreover, the vnode ops must work in both normal and log replay context.
123 * The ordering of events is important to avoid deadlocks and references
124 * to freed memory. The example below illustrates the following Big Rules:
126 * (1) A check must be made in each zfs thread for a mounted file system.
127 * This is done avoiding races using zfs_enter(zfsvfs).
128 * A zfs_exit(zfsvfs) is needed before all returns. Any znodes
129 * must be checked with zfs_verify_zp(zp). Both of these macros
130 * can return EIO from the calling function.
132 * (2) VN_RELE() should always be the last thing except for zil_commit()
133 * (if necessary) and zfs_exit(). This is for 3 reasons:
134 * First, if it's the last reference, the vnode/znode
135 * can be freed, so the zp may point to freed memory. Second, the last
136 * reference will call zfs_zinactive(), which may induce a lot of work --
137 * pushing cached pages (which acquires range locks) and syncing out
138 * cached atime changes. Third, zfs_zinactive() may require a new tx,
139 * which could deadlock the system if you were already holding one.
140 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
142 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
143 * as they can span dmu_tx_assign() calls.
145 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
146 * dmu_tx_assign(). This is critical because we don't want to block
147 * while holding locks.
149 * If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT. This
150 * reduces lock contention and CPU usage when we must wait (note that if
151 * throughput is constrained by the storage, nearly every transaction
152 * must wait).
154 * Note, in particular, that if a lock is sometimes acquired before
155 * the tx assigns, and sometimes after (e.g. z_lock), then failing
156 * to use a non-blocking assign can deadlock the system. The scenario:
158 * Thread A has grabbed a lock before calling dmu_tx_assign().
159 * Thread B is in an already-assigned tx, and blocks for this lock.
160 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
161 * forever, because the previous txg can't quiesce until B's tx commits.
163 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
164 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
165 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
166 * to indicate that this operation has already called dmu_tx_wait().
167 * This will ensure that we don't retry forever, waiting a short bit
168 * each time.
170 * (5) If the operation succeeded, generate the intent log entry for it
171 * before dropping locks. This ensures that the ordering of events
172 * in the intent log matches the order in which they actually occurred.
173 * During ZIL replay the zfs_log_* functions will update the sequence
174 * number to indicate the zil transaction has replayed.
176 * (6) At the end of each vnode op, the DMU tx must always commit,
177 * regardless of whether there were any errors.
179 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
180 * to ensure that synchronous semantics are provided when necessary.
182 * In general, this is how things should be ordered in each vnode op:
184 * zfs_enter(zfsvfs); // exit if unmounted
185 * top:
186 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
187 * rw_enter(...); // grab any other locks you need
188 * tx = dmu_tx_create(...); // get DMU tx
189 * dmu_tx_hold_*(); // hold each object you might modify
190 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
191 * if (error) {
192 * rw_exit(...); // drop locks
193 * zfs_dirent_unlock(dl); // unlock directory entry
194 * VN_RELE(...); // release held vnodes
195 * if (error == ERESTART) {
196 * waited = B_TRUE;
197 * dmu_tx_wait(tx);
198 * dmu_tx_abort(tx);
199 * goto top;
201 * dmu_tx_abort(tx); // abort DMU tx
202 * zfs_exit(zfsvfs); // finished in zfs
203 * return (error); // really out of space
205 * error = do_real_work(); // do whatever this VOP does
206 * if (error == 0)
207 * zfs_log_*(...); // on success, make ZIL entry
208 * dmu_tx_commit(tx); // commit DMU tx -- error or not
209 * rw_exit(...); // drop locks
210 * zfs_dirent_unlock(dl); // unlock directory entry
211 * VN_RELE(...); // release held vnodes
212 * zil_commit(zilog, foid); // synchronous when necessary
213 * zfs_exit(zfsvfs); // finished in zfs
214 * return (error); // done, report error
216 static int
217 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
219 (void) cr;
220 znode_t *zp = VTOZ(*vpp);
221 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
222 int error;
224 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
225 return (error);
227 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
228 ((flag & FAPPEND) == 0)) {
229 zfs_exit(zfsvfs, FTAG);
230 return (SET_ERROR(EPERM));
234 * Keep a count of the synchronous opens in the znode. On first
235 * synchronous open we must convert all previous async transactions
236 * into sync to keep correct ordering.
238 if (flag & O_SYNC) {
239 if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
240 zil_async_to_sync(zfsvfs->z_log, zp->z_id);
243 zfs_exit(zfsvfs, FTAG);
244 return (0);
247 static int
248 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
250 (void) offset, (void) cr;
251 znode_t *zp = VTOZ(vp);
252 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
253 int error;
255 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
256 return (error);
258 /* Decrement the synchronous opens in the znode */
259 if ((flag & O_SYNC) && (count == 1))
260 atomic_dec_32(&zp->z_sync_cnt);
262 zfs_exit(zfsvfs, FTAG);
263 return (0);
266 static int
267 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
268 int *rvalp)
270 (void) flag, (void) cred, (void) rvalp;
271 loff_t off;
272 int error;
274 switch (com) {
275 case _FIOFFS:
277 return (0);
280 * The following two ioctls are used by bfu. Faking out,
281 * necessary to avoid bfu errors.
284 case _FIOGDIO:
285 case _FIOSDIO:
287 return (0);
290 case F_SEEK_DATA:
291 case F_SEEK_HOLE:
293 off = *(offset_t *)data;
294 error = vn_lock(vp, LK_SHARED);
295 if (error)
296 return (error);
297 /* offset parameter is in/out */
298 error = zfs_holey(VTOZ(vp), com, &off);
299 VOP_UNLOCK(vp);
300 if (error)
301 return (error);
302 *(offset_t *)data = off;
303 return (0);
306 return (SET_ERROR(ENOTTY));
309 static vm_page_t
310 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
312 vm_object_t obj;
313 vm_page_t pp;
314 int64_t end;
317 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
318 * aligned boundaries, if the range is not aligned. As a result a
319 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
320 * It may happen that all DEV_BSIZE subranges are marked clean and thus
321 * the whole page would be considered clean despite have some
322 * dirty data.
323 * For this reason we should shrink the range to DEV_BSIZE aligned
324 * boundaries before calling vm_page_clear_dirty.
326 end = rounddown2(off + nbytes, DEV_BSIZE);
327 off = roundup2(off, DEV_BSIZE);
328 nbytes = end - off;
330 obj = vp->v_object;
331 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
332 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
333 VM_ALLOC_IGN_SBUSY);
334 if (pp != NULL) {
335 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
336 vm_object_pip_add(obj, 1);
337 pmap_remove_write(pp);
338 if (nbytes != 0)
339 vm_page_clear_dirty(pp, off, nbytes);
341 return (pp);
344 static void
345 page_unbusy(vm_page_t pp)
348 vm_page_sunbusy(pp);
349 vm_object_pip_wakeup(pp->object);
352 static vm_page_t
353 page_hold(vnode_t *vp, int64_t start)
355 vm_object_t obj;
356 vm_page_t m;
358 obj = vp->v_object;
359 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
360 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
361 VM_ALLOC_NOBUSY);
362 return (m);
365 static void
366 page_unhold(vm_page_t pp)
368 vm_page_unwire(pp, PQ_ACTIVE);
372 * When a file is memory mapped, we must keep the IO data synchronized
373 * between the DMU cache and the memory mapped pages. What this means:
375 * On Write: If we find a memory mapped page, we write to *both*
376 * the page and the dmu buffer.
378 void
379 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
381 vm_object_t obj;
382 struct sf_buf *sf;
383 vnode_t *vp = ZTOV(zp);
384 caddr_t va;
385 int off;
387 ASSERT3P(vp->v_mount, !=, NULL);
388 obj = vp->v_object;
389 ASSERT3P(obj, !=, NULL);
391 off = start & PAGEOFFSET;
392 vm_object_pip_add(obj, 1);
393 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
394 vm_page_t pp;
395 int nbytes = imin(PAGESIZE - off, len);
397 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
398 va = zfs_map_page(pp, &sf);
399 (void) dmu_read(os, zp->z_id, start + off, nbytes,
400 va + off, DMU_READ_PREFETCH);
401 zfs_unmap_page(sf);
402 page_unbusy(pp);
404 len -= nbytes;
405 off = 0;
407 vm_object_pip_wakeup(obj);
411 * Read with UIO_NOCOPY flag means that sendfile(2) requests
412 * ZFS to populate a range of page cache pages with data.
414 * NOTE: this function could be optimized to pre-allocate
415 * all pages in advance, drain exclusive busy on all of them,
416 * map them into contiguous KVA region and populate them
417 * in one single dmu_read() call.
420 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
422 vnode_t *vp = ZTOV(zp);
423 objset_t *os = zp->z_zfsvfs->z_os;
424 struct sf_buf *sf;
425 vm_object_t obj;
426 vm_page_t pp;
427 int64_t start;
428 caddr_t va;
429 int len = nbytes;
430 int error = 0;
432 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
433 ASSERT3P(vp->v_mount, !=, NULL);
434 obj = vp->v_object;
435 ASSERT3P(obj, !=, NULL);
436 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
438 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
439 int bytes = MIN(PAGESIZE, len);
441 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
442 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
443 if (vm_page_none_valid(pp)) {
444 va = zfs_map_page(pp, &sf);
445 error = dmu_read(os, zp->z_id, start, bytes, va,
446 DMU_READ_PREFETCH);
447 if (bytes != PAGESIZE && error == 0)
448 memset(va + bytes, 0, PAGESIZE - bytes);
449 zfs_unmap_page(sf);
450 if (error == 0) {
451 vm_page_valid(pp);
452 vm_page_activate(pp);
453 vm_page_sunbusy(pp);
454 } else {
455 zfs_vmobject_wlock(obj);
456 if (!vm_page_wired(pp) && pp->valid == 0 &&
457 vm_page_busy_tryupgrade(pp))
458 vm_page_free(pp);
459 else {
460 vm_page_deactivate_noreuse(pp);
461 vm_page_sunbusy(pp);
463 zfs_vmobject_wunlock(obj);
465 } else {
466 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
467 vm_page_sunbusy(pp);
469 if (error)
470 break;
471 zfs_uio_advance(uio, bytes);
472 len -= bytes;
474 return (error);
478 * When a file is memory mapped, we must keep the IO data synchronized
479 * between the DMU cache and the memory mapped pages. What this means:
481 * On Read: We "read" preferentially from memory mapped pages,
482 * else we default from the dmu buffer.
484 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
485 * the file is memory mapped.
488 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
490 vnode_t *vp = ZTOV(zp);
491 vm_object_t obj;
492 int64_t start;
493 int len = nbytes;
494 int off;
495 int error = 0;
497 ASSERT3P(vp->v_mount, !=, NULL);
498 obj = vp->v_object;
499 ASSERT3P(obj, !=, NULL);
501 start = zfs_uio_offset(uio);
502 off = start & PAGEOFFSET;
503 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
504 vm_page_t pp;
505 uint64_t bytes = MIN(PAGESIZE - off, len);
507 if ((pp = page_hold(vp, start))) {
508 struct sf_buf *sf;
509 caddr_t va;
511 va = zfs_map_page(pp, &sf);
512 error = vn_io_fault_uiomove(va + off, bytes,
513 GET_UIO_STRUCT(uio));
514 zfs_unmap_page(sf);
515 page_unhold(pp);
516 } else {
517 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
518 uio, bytes);
520 len -= bytes;
521 off = 0;
522 if (error)
523 break;
525 return (error);
529 zfs_write_simple(znode_t *zp, const void *data, size_t len,
530 loff_t pos, size_t *presid)
532 int error = 0;
533 ssize_t resid;
535 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
536 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
538 if (error) {
539 return (SET_ERROR(error));
540 } else if (presid == NULL) {
541 if (resid != 0) {
542 error = SET_ERROR(EIO);
544 } else {
545 *presid = resid;
547 return (error);
550 void
551 zfs_zrele_async(znode_t *zp)
553 vnode_t *vp = ZTOV(zp);
554 objset_t *os = ITOZSB(vp)->z_os;
556 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
559 static int
560 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
562 int error;
564 *vpp = arg;
565 error = vn_lock(*vpp, lkflags);
566 if (error != 0)
567 vrele(*vpp);
568 return (error);
571 static int
572 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
574 znode_t *zdp = VTOZ(dvp);
575 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
576 int error;
577 int ltype;
579 if (zfsvfs->z_replay == B_FALSE)
580 ASSERT_VOP_LOCKED(dvp, __func__);
582 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
583 ASSERT3P(dvp, ==, vp);
584 vref(dvp);
585 ltype = lkflags & LK_TYPE_MASK;
586 if (ltype != VOP_ISLOCKED(dvp)) {
587 if (ltype == LK_EXCLUSIVE)
588 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
589 else /* if (ltype == LK_SHARED) */
590 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
593 * Relock for the "." case could leave us with
594 * reclaimed vnode.
596 if (VN_IS_DOOMED(dvp)) {
597 vrele(dvp);
598 return (SET_ERROR(ENOENT));
601 return (0);
602 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
604 * Note that in this case, dvp is the child vnode, and we
605 * are looking up the parent vnode - exactly reverse from
606 * normal operation. Unlocking dvp requires some rather
607 * tricky unlock/relock dance to prevent mp from being freed;
608 * use vn_vget_ino_gen() which takes care of all that.
610 * XXX Note that there is a time window when both vnodes are
611 * unlocked. It is possible, although highly unlikely, that
612 * during that window the parent-child relationship between
613 * the vnodes may change, for example, get reversed.
614 * In that case we would have a wrong lock order for the vnodes.
615 * All other filesystems seem to ignore this problem, so we
616 * do the same here.
617 * A potential solution could be implemented as follows:
618 * - using LK_NOWAIT when locking the second vnode and retrying
619 * if necessary
620 * - checking that the parent-child relationship still holds
621 * after locking both vnodes and retrying if it doesn't
623 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
624 return (error);
625 } else {
626 error = vn_lock(vp, lkflags);
627 if (error != 0)
628 vrele(vp);
629 return (error);
634 * Lookup an entry in a directory, or an extended attribute directory.
635 * If it exists, return a held vnode reference for it.
637 * IN: dvp - vnode of directory to search.
638 * nm - name of entry to lookup.
639 * pnp - full pathname to lookup [UNUSED].
640 * flags - LOOKUP_XATTR set if looking for an attribute.
641 * rdir - root directory vnode [UNUSED].
642 * cr - credentials of caller.
643 * ct - caller context
645 * OUT: vpp - vnode of located entry, NULL if not found.
647 * RETURN: 0 on success, error code on failure.
649 * Timestamps:
650 * NA
652 static int
653 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
654 struct componentname *cnp, int nameiop, cred_t *cr, int flags,
655 boolean_t cached)
657 znode_t *zdp = VTOZ(dvp);
658 znode_t *zp;
659 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
660 seqc_t dvp_seqc;
661 int error = 0;
664 * Fast path lookup, however we must skip DNLC lookup
665 * for case folding or normalizing lookups because the
666 * DNLC code only stores the passed in name. This means
667 * creating 'a' and removing 'A' on a case insensitive
668 * file system would work, but DNLC still thinks 'a'
669 * exists and won't let you create it again on the next
670 * pass through fast path.
672 if (!(flags & LOOKUP_XATTR)) {
673 if (dvp->v_type != VDIR) {
674 return (SET_ERROR(ENOTDIR));
675 } else if (zdp->z_sa_hdl == NULL) {
676 return (SET_ERROR(EIO));
680 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
681 const char *, nm);
683 if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
684 return (error);
686 dvp_seqc = vn_seqc_read_notmodify(dvp);
688 *vpp = NULL;
690 if (flags & LOOKUP_XATTR) {
692 * If the xattr property is off, refuse the lookup request.
694 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
695 zfs_exit(zfsvfs, FTAG);
696 return (SET_ERROR(EOPNOTSUPP));
700 * We don't allow recursive attributes..
701 * Maybe someday we will.
703 if (zdp->z_pflags & ZFS_XATTR) {
704 zfs_exit(zfsvfs, FTAG);
705 return (SET_ERROR(EINVAL));
708 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
709 zfs_exit(zfsvfs, FTAG);
710 return (error);
712 *vpp = ZTOV(zp);
715 * Do we have permission to get into attribute directory?
717 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
718 if (error) {
719 vrele(ZTOV(zp));
722 zfs_exit(zfsvfs, FTAG);
723 return (error);
727 * Check accessibility of directory if we're not coming in via
728 * VOP_CACHEDLOOKUP.
730 if (!cached) {
731 #ifdef NOEXECCHECK
732 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
733 cnp->cn_flags &= ~NOEXECCHECK;
734 } else
735 #endif
736 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
737 NULL))) {
738 zfs_exit(zfsvfs, FTAG);
739 return (error);
743 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
744 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
745 zfs_exit(zfsvfs, FTAG);
746 return (SET_ERROR(EILSEQ));
751 * First handle the special cases.
753 if ((cnp->cn_flags & ISDOTDOT) != 0) {
755 * If we are a snapshot mounted under .zfs, return
756 * the vp for the snapshot directory.
758 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
759 struct componentname cn;
760 vnode_t *zfsctl_vp;
761 int ltype;
763 zfs_exit(zfsvfs, FTAG);
764 ltype = VOP_ISLOCKED(dvp);
765 VOP_UNLOCK(dvp);
766 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
767 &zfsctl_vp);
768 if (error == 0) {
769 cn.cn_nameptr = "snapshot";
770 cn.cn_namelen = strlen(cn.cn_nameptr);
771 cn.cn_nameiop = cnp->cn_nameiop;
772 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
773 cn.cn_lkflags = cnp->cn_lkflags;
774 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
775 vput(zfsctl_vp);
777 vn_lock(dvp, ltype | LK_RETRY);
778 return (error);
781 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
782 zfs_exit(zfsvfs, FTAG);
783 if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED)
784 return (SET_ERROR(ENOENT));
785 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
786 return (SET_ERROR(ENOTSUP));
787 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
788 return (error);
792 * The loop is retry the lookup if the parent-child relationship
793 * changes during the dot-dot locking complexities.
795 for (;;) {
796 uint64_t parent;
798 error = zfs_dirlook(zdp, nm, &zp);
799 if (error == 0)
800 *vpp = ZTOV(zp);
802 zfs_exit(zfsvfs, FTAG);
803 if (error != 0)
804 break;
806 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
807 if (error != 0) {
809 * If we've got a locking error, then the vnode
810 * got reclaimed because of a force unmount.
811 * We never enter doomed vnodes into the name cache.
813 *vpp = NULL;
814 return (error);
817 if ((cnp->cn_flags & ISDOTDOT) == 0)
818 break;
820 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
821 vput(ZTOV(zp));
822 *vpp = NULL;
823 return (error);
825 if (zdp->z_sa_hdl == NULL) {
826 error = SET_ERROR(EIO);
827 } else {
828 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
829 &parent, sizeof (parent));
831 if (error != 0) {
832 zfs_exit(zfsvfs, FTAG);
833 vput(ZTOV(zp));
834 break;
836 if (zp->z_id == parent) {
837 zfs_exit(zfsvfs, FTAG);
838 break;
840 vput(ZTOV(zp));
843 if (error != 0)
844 *vpp = NULL;
846 /* Translate errors and add SAVENAME when needed. */
847 if (cnp->cn_flags & ISLASTCN) {
848 switch (nameiop) {
849 case CREATE:
850 case RENAME:
851 if (error == ENOENT) {
852 error = EJUSTRETURN;
853 #if __FreeBSD_version < 1400068
854 cnp->cn_flags |= SAVENAME;
855 #endif
856 break;
858 zfs_fallthrough;
859 case DELETE:
860 #if __FreeBSD_version < 1400068
861 if (error == 0)
862 cnp->cn_flags |= SAVENAME;
863 #endif
864 break;
868 if ((cnp->cn_flags & ISDOTDOT) != 0) {
870 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
871 * handle races. In particular different callers may end up
872 * with different vnodes and will try to add conflicting
873 * entries to the namecache.
875 * While finding different result may be acceptable in face
876 * of concurrent modification, adding conflicting entries
877 * trips over an assert in the namecache.
879 * Ultimately let an entry through once everything settles.
881 if (!vn_seqc_consistent(dvp, dvp_seqc)) {
882 cnp->cn_flags &= ~MAKEENTRY;
886 /* Insert name into cache (as non-existent) if appropriate. */
887 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
888 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
889 cache_enter(dvp, NULL, cnp);
891 /* Insert name into cache if appropriate. */
892 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
893 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
894 if (!(cnp->cn_flags & ISLASTCN) ||
895 (nameiop != DELETE && nameiop != RENAME)) {
896 cache_enter(dvp, *vpp, cnp);
900 return (error);
903 static inline bool
904 is_nametoolong(zfsvfs_t *zfsvfs, const char *name)
906 size_t dlen = strlen(name);
907 return ((!zfsvfs->z_longname && dlen >= ZAP_MAXNAMELEN) ||
908 dlen >= ZAP_MAXNAMELEN_NEW);
912 * Attempt to create a new entry in a directory. If the entry
913 * already exists, truncate the file if permissible, else return
914 * an error. Return the vp of the created or trunc'd file.
916 * IN: dvp - vnode of directory to put new file entry in.
917 * name - name of new file entry.
918 * vap - attributes of new file.
919 * excl - flag indicating exclusive or non-exclusive mode.
920 * mode - mode to open file with.
921 * cr - credentials of caller.
922 * flag - large file flag [UNUSED].
923 * ct - caller context
924 * vsecp - ACL to be set
925 * mnt_ns - Unused on FreeBSD
927 * OUT: vpp - vnode of created or trunc'd entry.
929 * RETURN: 0 on success, error code on failure.
931 * Timestamps:
932 * dvp - ctime|mtime updated if new entry created
933 * vp - ctime|mtime always, atime if new
936 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
937 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns)
939 (void) excl, (void) mode, (void) flag;
940 znode_t *zp;
941 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
942 zilog_t *zilog;
943 objset_t *os;
944 dmu_tx_t *tx;
945 int error;
946 uid_t uid = crgetuid(cr);
947 gid_t gid = crgetgid(cr);
948 uint64_t projid = ZFS_DEFAULT_PROJID;
949 zfs_acl_ids_t acl_ids;
950 boolean_t fuid_dirtied;
951 uint64_t txtype;
952 #ifdef DEBUG_VFS_LOCKS
953 vnode_t *dvp = ZTOV(dzp);
954 #endif
956 if (is_nametoolong(zfsvfs, name))
957 return (SET_ERROR(ENAMETOOLONG));
960 * If we have an ephemeral id, ACL, or XVATTR then
961 * make sure file system is at proper version
963 if (zfsvfs->z_use_fuids == B_FALSE &&
964 (vsecp || (vap->va_mask & AT_XVATTR) ||
965 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
966 return (SET_ERROR(EINVAL));
968 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
969 return (error);
970 os = zfsvfs->z_os;
971 zilog = zfsvfs->z_log;
973 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
974 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
975 zfs_exit(zfsvfs, FTAG);
976 return (SET_ERROR(EILSEQ));
979 if (vap->va_mask & AT_XVATTR) {
980 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
981 crgetuid(cr), cr, vap->va_type)) != 0) {
982 zfs_exit(zfsvfs, FTAG);
983 return (error);
987 *zpp = NULL;
989 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
990 vap->va_mode &= ~S_ISVTX;
992 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
993 if (error) {
994 zfs_exit(zfsvfs, FTAG);
995 return (error);
997 ASSERT3P(zp, ==, NULL);
1000 * Create a new file object and update the directory
1001 * to reference it.
1003 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
1004 goto out;
1008 * We only support the creation of regular files in
1009 * extended attribute directories.
1012 if ((dzp->z_pflags & ZFS_XATTR) &&
1013 (vap->va_type != VREG)) {
1014 error = SET_ERROR(EINVAL);
1015 goto out;
1018 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1019 cr, vsecp, &acl_ids, NULL)) != 0)
1020 goto out;
1022 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1023 projid = zfs_inherit_projid(dzp);
1024 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1025 zfs_acl_ids_free(&acl_ids);
1026 error = SET_ERROR(EDQUOT);
1027 goto out;
1030 getnewvnode_reserve();
1032 tx = dmu_tx_create(os);
1034 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1035 ZFS_SA_BASE_ATTR_SIZE);
1037 fuid_dirtied = zfsvfs->z_fuid_dirty;
1038 if (fuid_dirtied)
1039 zfs_fuid_txhold(zfsvfs, tx);
1040 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1041 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1042 if (!zfsvfs->z_use_sa &&
1043 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1044 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1045 0, acl_ids.z_aclp->z_acl_bytes);
1047 error = dmu_tx_assign(tx, TXG_WAIT);
1048 if (error) {
1049 zfs_acl_ids_free(&acl_ids);
1050 dmu_tx_abort(tx);
1051 getnewvnode_drop_reserve();
1052 zfs_exit(zfsvfs, FTAG);
1053 return (error);
1055 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1057 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
1058 if (error != 0) {
1060 * Since, we failed to add the directory entry for it,
1061 * delete the newly created dnode.
1063 zfs_znode_delete(zp, tx);
1064 VOP_UNLOCK(ZTOV(zp));
1065 zrele(zp);
1066 zfs_acl_ids_free(&acl_ids);
1067 dmu_tx_commit(tx);
1068 getnewvnode_drop_reserve();
1069 goto out;
1072 if (fuid_dirtied)
1073 zfs_fuid_sync(zfsvfs, tx);
1075 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1076 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1077 vsecp, acl_ids.z_fuidp, vap);
1078 zfs_acl_ids_free(&acl_ids);
1079 dmu_tx_commit(tx);
1081 getnewvnode_drop_reserve();
1083 out:
1084 VNCHECKREF(dvp);
1085 if (error == 0) {
1086 *zpp = zp;
1089 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1090 zil_commit(zilog, 0);
1092 zfs_exit(zfsvfs, FTAG);
1093 return (error);
1097 * Remove an entry from a directory.
1099 * IN: dvp - vnode of directory to remove entry from.
1100 * name - name of entry to remove.
1101 * cr - credentials of caller.
1102 * ct - caller context
1103 * flags - case flags
1105 * RETURN: 0 on success, error code on failure.
1107 * Timestamps:
1108 * dvp - ctime|mtime
1109 * vp - ctime (if nlink > 0)
1111 static int
1112 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1114 znode_t *dzp = VTOZ(dvp);
1115 znode_t *zp;
1116 znode_t *xzp;
1117 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1118 zilog_t *zilog;
1119 uint64_t xattr_obj;
1120 uint64_t obj = 0;
1121 dmu_tx_t *tx;
1122 boolean_t unlinked;
1123 uint64_t txtype;
1124 int error;
1127 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1128 return (error);
1129 zp = VTOZ(vp);
1130 if ((error = zfs_verify_zp(zp)) != 0) {
1131 zfs_exit(zfsvfs, FTAG);
1132 return (error);
1134 zilog = zfsvfs->z_log;
1136 xattr_obj = 0;
1137 xzp = NULL;
1139 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1140 goto out;
1144 * Need to use rmdir for removing directories.
1146 if (vp->v_type == VDIR) {
1147 error = SET_ERROR(EPERM);
1148 goto out;
1151 vnevent_remove(vp, dvp, name, ct);
1153 obj = zp->z_id;
1155 /* are there any extended attributes? */
1156 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1157 &xattr_obj, sizeof (xattr_obj));
1158 if (error == 0 && xattr_obj) {
1159 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1160 ASSERT0(error);
1164 * We may delete the znode now, or we may put it in the unlinked set;
1165 * it depends on whether we're the last link, and on whether there are
1166 * other holds on the vnode. So we dmu_tx_hold() the right things to
1167 * allow for either case.
1169 tx = dmu_tx_create(zfsvfs->z_os);
1170 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1171 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1172 zfs_sa_upgrade_txholds(tx, zp);
1173 zfs_sa_upgrade_txholds(tx, dzp);
1175 if (xzp) {
1176 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1177 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1180 /* charge as an update -- would be nice not to charge at all */
1181 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1184 * Mark this transaction as typically resulting in a net free of space
1186 dmu_tx_mark_netfree(tx);
1188 error = dmu_tx_assign(tx, TXG_WAIT);
1189 if (error) {
1190 dmu_tx_abort(tx);
1191 zfs_exit(zfsvfs, FTAG);
1192 return (error);
1196 * Remove the directory entry.
1198 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1200 if (error) {
1201 dmu_tx_commit(tx);
1202 goto out;
1205 if (unlinked) {
1206 zfs_unlinked_add(zp, tx);
1207 vp->v_vflag |= VV_NOSYNC;
1209 /* XXX check changes to linux vnops */
1210 txtype = TX_REMOVE;
1211 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1213 dmu_tx_commit(tx);
1214 out:
1216 if (xzp)
1217 vrele(ZTOV(xzp));
1219 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1220 zil_commit(zilog, 0);
1223 zfs_exit(zfsvfs, FTAG);
1224 return (error);
1228 static int
1229 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1230 struct componentname *cnp, int nameiop)
1232 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1233 int error;
1235 cnp->cn_nameptr = __DECONST(char *, name);
1236 cnp->cn_namelen = strlen(name);
1237 cnp->cn_nameiop = nameiop;
1238 cnp->cn_flags = ISLASTCN;
1239 #if __FreeBSD_version < 1400068
1240 cnp->cn_flags |= SAVENAME;
1241 #endif
1242 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1243 cnp->cn_cred = kcred;
1244 #if __FreeBSD_version < 1400037
1245 cnp->cn_thread = curthread;
1246 #endif
1248 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1249 struct vop_lookup_args a;
1251 a.a_gen.a_desc = &vop_lookup_desc;
1252 a.a_dvp = ZTOV(dzp);
1253 a.a_vpp = vpp;
1254 a.a_cnp = cnp;
1255 error = vfs_cache_lookup(&a);
1256 } else {
1257 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1258 B_FALSE);
1260 #ifdef ZFS_DEBUG
1261 if (error) {
1262 printf("got error %d on name %s on op %d\n", error, name,
1263 nameiop);
1264 kdb_backtrace();
1266 #endif
1267 return (error);
1271 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1273 vnode_t *vp;
1274 int error;
1275 struct componentname cn;
1277 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1278 return (error);
1280 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1281 vput(vp);
1282 return (error);
1285 * Create a new directory and insert it into dvp using the name
1286 * provided. Return a pointer to the inserted directory.
1288 * IN: dvp - vnode of directory to add subdir to.
1289 * dirname - name of new directory.
1290 * vap - attributes of new directory.
1291 * cr - credentials of caller.
1292 * ct - caller context
1293 * flags - case flags
1294 * vsecp - ACL to be set
1295 * mnt_ns - Unused on FreeBSD
1297 * OUT: vpp - vnode of created directory.
1299 * RETURN: 0 on success, error code on failure.
1301 * Timestamps:
1302 * dvp - ctime|mtime updated
1303 * vp - ctime|mtime|atime updated
1306 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1307 cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1309 (void) flags, (void) vsecp;
1310 znode_t *zp;
1311 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1312 zilog_t *zilog;
1313 uint64_t txtype;
1314 dmu_tx_t *tx;
1315 int error;
1316 uid_t uid = crgetuid(cr);
1317 gid_t gid = crgetgid(cr);
1318 zfs_acl_ids_t acl_ids;
1319 boolean_t fuid_dirtied;
1321 ASSERT3U(vap->va_type, ==, VDIR);
1323 if (is_nametoolong(zfsvfs, dirname))
1324 return (SET_ERROR(ENAMETOOLONG));
1327 * If we have an ephemeral id, ACL, or XVATTR then
1328 * make sure file system is at proper version
1330 if (zfsvfs->z_use_fuids == B_FALSE &&
1331 ((vap->va_mask & AT_XVATTR) ||
1332 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1333 return (SET_ERROR(EINVAL));
1335 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1336 return (error);
1337 zilog = zfsvfs->z_log;
1339 if (dzp->z_pflags & ZFS_XATTR) {
1340 zfs_exit(zfsvfs, FTAG);
1341 return (SET_ERROR(EINVAL));
1344 if (zfsvfs->z_utf8 && u8_validate(dirname,
1345 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1346 zfs_exit(zfsvfs, FTAG);
1347 return (SET_ERROR(EILSEQ));
1350 if (vap->va_mask & AT_XVATTR) {
1351 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1352 crgetuid(cr), cr, vap->va_type)) != 0) {
1353 zfs_exit(zfsvfs, FTAG);
1354 return (error);
1358 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1359 NULL, &acl_ids, NULL)) != 0) {
1360 zfs_exit(zfsvfs, FTAG);
1361 return (error);
1365 * First make sure the new directory doesn't exist.
1367 * Existence is checked first to make sure we don't return
1368 * EACCES instead of EEXIST which can cause some applications
1369 * to fail.
1371 *zpp = NULL;
1373 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1374 zfs_acl_ids_free(&acl_ids);
1375 zfs_exit(zfsvfs, FTAG);
1376 return (error);
1378 ASSERT3P(zp, ==, NULL);
1380 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
1381 mnt_ns))) {
1382 zfs_acl_ids_free(&acl_ids);
1383 zfs_exit(zfsvfs, FTAG);
1384 return (error);
1387 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1388 zfs_acl_ids_free(&acl_ids);
1389 zfs_exit(zfsvfs, FTAG);
1390 return (SET_ERROR(EDQUOT));
1394 * Add a new entry to the directory.
1396 getnewvnode_reserve();
1397 tx = dmu_tx_create(zfsvfs->z_os);
1398 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1399 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1400 fuid_dirtied = zfsvfs->z_fuid_dirty;
1401 if (fuid_dirtied)
1402 zfs_fuid_txhold(zfsvfs, tx);
1403 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1404 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1405 acl_ids.z_aclp->z_acl_bytes);
1408 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1409 ZFS_SA_BASE_ATTR_SIZE);
1411 error = dmu_tx_assign(tx, TXG_WAIT);
1412 if (error) {
1413 zfs_acl_ids_free(&acl_ids);
1414 dmu_tx_abort(tx);
1415 getnewvnode_drop_reserve();
1416 zfs_exit(zfsvfs, FTAG);
1417 return (error);
1421 * Create new node.
1423 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1426 * Now put new name in parent dir.
1428 error = zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1429 if (error != 0) {
1430 zfs_znode_delete(zp, tx);
1431 VOP_UNLOCK(ZTOV(zp));
1432 zrele(zp);
1433 goto out;
1436 if (fuid_dirtied)
1437 zfs_fuid_sync(zfsvfs, tx);
1439 *zpp = zp;
1441 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1442 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1443 acl_ids.z_fuidp, vap);
1445 out:
1446 zfs_acl_ids_free(&acl_ids);
1448 dmu_tx_commit(tx);
1450 getnewvnode_drop_reserve();
1452 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1453 zil_commit(zilog, 0);
1455 zfs_exit(zfsvfs, FTAG);
1456 return (error);
1460 * Remove a directory subdir entry. If the current working
1461 * directory is the same as the subdir to be removed, the
1462 * remove will fail.
1464 * IN: dvp - vnode of directory to remove from.
1465 * name - name of directory to be removed.
1466 * cwd - vnode of current working directory.
1467 * cr - credentials of caller.
1468 * ct - caller context
1469 * flags - case flags
1471 * RETURN: 0 on success, error code on failure.
1473 * Timestamps:
1474 * dvp - ctime|mtime updated
1476 static int
1477 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1479 znode_t *dzp = VTOZ(dvp);
1480 znode_t *zp = VTOZ(vp);
1481 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1482 zilog_t *zilog;
1483 dmu_tx_t *tx;
1484 int error;
1486 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1487 return (error);
1488 if ((error = zfs_verify_zp(zp)) != 0) {
1489 zfs_exit(zfsvfs, FTAG);
1490 return (error);
1492 zilog = zfsvfs->z_log;
1495 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1496 goto out;
1499 if (vp->v_type != VDIR) {
1500 error = SET_ERROR(ENOTDIR);
1501 goto out;
1504 vnevent_rmdir(vp, dvp, name, ct);
1506 tx = dmu_tx_create(zfsvfs->z_os);
1507 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1508 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1509 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1510 zfs_sa_upgrade_txholds(tx, zp);
1511 zfs_sa_upgrade_txholds(tx, dzp);
1512 dmu_tx_mark_netfree(tx);
1513 error = dmu_tx_assign(tx, TXG_WAIT);
1514 if (error) {
1515 dmu_tx_abort(tx);
1516 zfs_exit(zfsvfs, FTAG);
1517 return (error);
1520 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1522 if (error == 0) {
1523 uint64_t txtype = TX_RMDIR;
1524 zfs_log_remove(zilog, tx, txtype, dzp, name,
1525 ZFS_NO_OBJECT, B_FALSE);
1528 dmu_tx_commit(tx);
1530 if (zfsvfs->z_use_namecache)
1531 cache_vop_rmdir(dvp, vp);
1532 out:
1533 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1534 zil_commit(zilog, 0);
1536 zfs_exit(zfsvfs, FTAG);
1537 return (error);
1541 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1543 struct componentname cn;
1544 vnode_t *vp;
1545 int error;
1547 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1548 return (error);
1550 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1551 vput(vp);
1552 return (error);
1556 * Read as many directory entries as will fit into the provided
1557 * buffer from the given directory cursor position (specified in
1558 * the uio structure).
1560 * IN: vp - vnode of directory to read.
1561 * uio - structure supplying read location, range info,
1562 * and return buffer.
1563 * cr - credentials of caller.
1564 * ct - caller context
1566 * OUT: uio - updated offset and range, buffer filled.
1567 * eofp - set to true if end-of-file detected.
1568 * ncookies- number of entries in cookies
1569 * cookies - offsets to directory entries
1571 * RETURN: 0 on success, error code on failure.
1573 * Timestamps:
1574 * vp - atime updated
1576 * Note that the low 4 bits of the cookie returned by zap is always zero.
1577 * This allows us to use the low range for "special" directory entries:
1578 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1579 * we use the offset 2 for the '.zfs' directory.
1581 static int
1582 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1583 int *ncookies, cookie_t **cookies)
1585 znode_t *zp = VTOZ(vp);
1586 iovec_t *iovp;
1587 dirent64_t *odp;
1588 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1589 objset_t *os;
1590 caddr_t outbuf;
1591 size_t bufsize;
1592 zap_cursor_t zc;
1593 zap_attribute_t *zap;
1594 uint_t bytes_wanted;
1595 uint64_t offset; /* must be unsigned; checks for < 1 */
1596 uint64_t parent;
1597 int local_eof;
1598 int outcount;
1599 int error;
1600 uint8_t prefetch;
1601 uint8_t type;
1602 int ncooks;
1603 cookie_t *cooks = NULL;
1605 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1606 return (error);
1608 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1609 &parent, sizeof (parent))) != 0) {
1610 zfs_exit(zfsvfs, FTAG);
1611 return (error);
1615 * If we are not given an eof variable,
1616 * use a local one.
1618 if (eofp == NULL)
1619 eofp = &local_eof;
1622 * Check for valid iov_len.
1624 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1625 zfs_exit(zfsvfs, FTAG);
1626 return (SET_ERROR(EINVAL));
1630 * Quit if directory has been removed (posix)
1632 if ((*eofp = zp->z_unlinked) != 0) {
1633 zfs_exit(zfsvfs, FTAG);
1634 return (0);
1637 error = 0;
1638 os = zfsvfs->z_os;
1639 offset = zfs_uio_offset(uio);
1640 prefetch = zp->z_zn_prefetch;
1641 zap = zap_attribute_long_alloc();
1644 * Initialize the iterator cursor.
1646 if (offset <= 3) {
1648 * Start iteration from the beginning of the directory.
1650 zap_cursor_init(&zc, os, zp->z_id);
1651 } else {
1653 * The offset is a serialized cursor.
1655 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1659 * Get space to change directory entries into fs independent format.
1661 iovp = GET_UIO_STRUCT(uio)->uio_iov;
1662 bytes_wanted = iovp->iov_len;
1663 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1664 bufsize = bytes_wanted;
1665 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1666 odp = (struct dirent64 *)outbuf;
1667 } else {
1668 bufsize = bytes_wanted;
1669 outbuf = NULL;
1670 odp = (struct dirent64 *)iovp->iov_base;
1673 if (ncookies != NULL) {
1675 * Minimum entry size is dirent size and 1 byte for a file name.
1677 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1678 sizeof (((struct dirent *)NULL)->d_name) + 1);
1679 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1680 *cookies = cooks;
1681 *ncookies = ncooks;
1685 * Transform to file-system independent format
1687 outcount = 0;
1688 while (outcount < bytes_wanted) {
1689 ino64_t objnum;
1690 ushort_t reclen;
1691 off64_t *next = NULL;
1694 * Special case `.', `..', and `.zfs'.
1696 if (offset == 0) {
1697 (void) strcpy(zap->za_name, ".");
1698 zap->za_normalization_conflict = 0;
1699 objnum = zp->z_id;
1700 type = DT_DIR;
1701 } else if (offset == 1) {
1702 (void) strcpy(zap->za_name, "..");
1703 zap->za_normalization_conflict = 0;
1704 objnum = parent;
1705 type = DT_DIR;
1706 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1707 (void) strcpy(zap->za_name, ZFS_CTLDIR_NAME);
1708 zap->za_normalization_conflict = 0;
1709 objnum = ZFSCTL_INO_ROOT;
1710 type = DT_DIR;
1711 } else {
1713 * Grab next entry.
1715 if ((error = zap_cursor_retrieve(&zc, zap))) {
1716 if ((*eofp = (error == ENOENT)) != 0)
1717 break;
1718 else
1719 goto update;
1722 if (zap->za_integer_length != 8 ||
1723 zap->za_num_integers != 1) {
1724 cmn_err(CE_WARN, "zap_readdir: bad directory "
1725 "entry, obj = %lld, offset = %lld\n",
1726 (u_longlong_t)zp->z_id,
1727 (u_longlong_t)offset);
1728 error = SET_ERROR(ENXIO);
1729 goto update;
1732 objnum = ZFS_DIRENT_OBJ(zap->za_first_integer);
1734 * MacOS X can extract the object type here such as:
1735 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1737 type = ZFS_DIRENT_TYPE(zap->za_first_integer);
1740 reclen = DIRENT64_RECLEN(strlen(zap->za_name));
1743 * Will this entry fit in the buffer?
1745 if (outcount + reclen > bufsize) {
1747 * Did we manage to fit anything in the buffer?
1749 if (!outcount) {
1750 error = SET_ERROR(EINVAL);
1751 goto update;
1753 break;
1756 * Add normal entry:
1758 odp->d_ino = objnum;
1759 odp->d_reclen = reclen;
1760 odp->d_namlen = strlen(zap->za_name);
1761 /* NOTE: d_off is the offset for the *next* entry. */
1762 next = &odp->d_off;
1763 strlcpy(odp->d_name, zap->za_name, odp->d_namlen + 1);
1764 odp->d_type = type;
1765 dirent_terminate(odp);
1766 odp = (dirent64_t *)((intptr_t)odp + reclen);
1768 outcount += reclen;
1770 ASSERT3S(outcount, <=, bufsize);
1772 if (prefetch)
1773 dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
1776 * Move to the next entry, fill in the previous offset.
1778 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1779 zap_cursor_advance(&zc);
1780 offset = zap_cursor_serialize(&zc);
1781 } else {
1782 offset += 1;
1785 /* Fill the offset right after advancing the cursor. */
1786 if (next != NULL)
1787 *next = offset;
1788 if (cooks != NULL) {
1789 *cooks++ = offset;
1790 ncooks--;
1791 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1794 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1796 /* Subtract unused cookies */
1797 if (ncookies != NULL)
1798 *ncookies -= ncooks;
1800 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1801 iovp->iov_base += outcount;
1802 iovp->iov_len -= outcount;
1803 zfs_uio_resid(uio) -= outcount;
1804 } else if ((error =
1805 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1807 * Reset the pointer.
1809 offset = zfs_uio_offset(uio);
1812 update:
1813 zap_cursor_fini(&zc);
1814 zap_attribute_free(zap);
1815 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1816 kmem_free(outbuf, bufsize);
1818 if (error == ENOENT)
1819 error = 0;
1821 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1823 zfs_uio_setoffset(uio, offset);
1824 zfs_exit(zfsvfs, FTAG);
1825 if (error != 0 && cookies != NULL) {
1826 free(*cookies, M_TEMP);
1827 *cookies = NULL;
1828 *ncookies = 0;
1830 return (error);
1834 * Get the requested file attributes and place them in the provided
1835 * vattr structure.
1837 * IN: vp - vnode of file.
1838 * vap - va_mask identifies requested attributes.
1839 * If AT_XVATTR set, then optional attrs are requested
1840 * flags - ATTR_NOACLCHECK (CIFS server context)
1841 * cr - credentials of caller.
1843 * OUT: vap - attribute values.
1845 * RETURN: 0 (always succeeds).
1847 static int
1848 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1850 znode_t *zp = VTOZ(vp);
1851 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1852 int error = 0;
1853 uint32_t blksize;
1854 u_longlong_t nblocks;
1855 uint64_t mtime[2], ctime[2], crtime[2], rdev;
1856 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
1857 xoptattr_t *xoap = NULL;
1858 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1859 sa_bulk_attr_t bulk[4];
1860 int count = 0;
1862 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1863 return (error);
1865 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1867 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1868 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1869 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1870 if (vp->v_type == VBLK || vp->v_type == VCHR)
1871 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
1872 &rdev, 8);
1874 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
1875 zfs_exit(zfsvfs, FTAG);
1876 return (error);
1880 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
1881 * Also, if we are the owner don't bother, since owner should
1882 * always be allowed to read basic attributes of file.
1884 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
1885 (vap->va_uid != crgetuid(cr))) {
1886 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
1887 skipaclchk, cr, NULL))) {
1888 zfs_exit(zfsvfs, FTAG);
1889 return (error);
1894 * Return all attributes. It's cheaper to provide the answer
1895 * than to determine whether we were asked the question.
1898 vap->va_type = IFTOVT(zp->z_mode);
1899 vap->va_mode = zp->z_mode & ~S_IFMT;
1900 vn_fsid(vp, vap);
1901 vap->va_nodeid = zp->z_id;
1902 vap->va_nlink = zp->z_links;
1903 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
1904 zp->z_links < ZFS_LINK_MAX)
1905 vap->va_nlink++;
1906 vap->va_size = zp->z_size;
1907 if (vp->v_type == VBLK || vp->v_type == VCHR)
1908 vap->va_rdev = zfs_cmpldev(rdev);
1909 else
1910 vap->va_rdev = 0;
1911 vap->va_gen = zp->z_gen;
1912 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
1913 vap->va_filerev = zp->z_seq;
1916 * Add in any requested optional attributes and the create time.
1917 * Also set the corresponding bits in the returned attribute bitmap.
1919 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
1920 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1921 xoap->xoa_archive =
1922 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
1923 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1926 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1927 xoap->xoa_readonly =
1928 ((zp->z_pflags & ZFS_READONLY) != 0);
1929 XVA_SET_RTN(xvap, XAT_READONLY);
1932 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1933 xoap->xoa_system =
1934 ((zp->z_pflags & ZFS_SYSTEM) != 0);
1935 XVA_SET_RTN(xvap, XAT_SYSTEM);
1938 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1939 xoap->xoa_hidden =
1940 ((zp->z_pflags & ZFS_HIDDEN) != 0);
1941 XVA_SET_RTN(xvap, XAT_HIDDEN);
1944 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1945 xoap->xoa_nounlink =
1946 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
1947 XVA_SET_RTN(xvap, XAT_NOUNLINK);
1950 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1951 xoap->xoa_immutable =
1952 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
1953 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1956 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1957 xoap->xoa_appendonly =
1958 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
1959 XVA_SET_RTN(xvap, XAT_APPENDONLY);
1962 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1963 xoap->xoa_nodump =
1964 ((zp->z_pflags & ZFS_NODUMP) != 0);
1965 XVA_SET_RTN(xvap, XAT_NODUMP);
1968 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1969 xoap->xoa_opaque =
1970 ((zp->z_pflags & ZFS_OPAQUE) != 0);
1971 XVA_SET_RTN(xvap, XAT_OPAQUE);
1974 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1975 xoap->xoa_av_quarantined =
1976 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
1977 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1980 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1981 xoap->xoa_av_modified =
1982 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
1983 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1986 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
1987 vp->v_type == VREG) {
1988 zfs_sa_get_scanstamp(zp, xvap);
1991 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1992 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
1993 XVA_SET_RTN(xvap, XAT_REPARSE);
1995 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
1996 xoap->xoa_generation = zp->z_gen;
1997 XVA_SET_RTN(xvap, XAT_GEN);
2000 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2001 xoap->xoa_offline =
2002 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2003 XVA_SET_RTN(xvap, XAT_OFFLINE);
2006 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2007 xoap->xoa_sparse =
2008 ((zp->z_pflags & ZFS_SPARSE) != 0);
2009 XVA_SET_RTN(xvap, XAT_SPARSE);
2012 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2013 xoap->xoa_projinherit =
2014 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2015 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2018 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2019 xoap->xoa_projid = zp->z_projid;
2020 XVA_SET_RTN(xvap, XAT_PROJID);
2024 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2025 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2026 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2027 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2030 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2031 vap->va_blksize = blksize;
2032 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2034 if (zp->z_blksz == 0) {
2036 * Block size hasn't been set; suggest maximal I/O transfers.
2038 vap->va_blksize = zfsvfs->z_max_blksz;
2041 zfs_exit(zfsvfs, FTAG);
2042 return (0);
2046 * Set the file attributes to the values contained in the
2047 * vattr structure.
2049 * IN: zp - znode of file to be modified.
2050 * vap - new attribute values.
2051 * If AT_XVATTR set, then optional attrs are being set
2052 * flags - ATTR_UTIME set if non-default time values provided.
2053 * - ATTR_NOACLCHECK (CIFS context only).
2054 * cr - credentials of caller.
2055 * mnt_ns - Unused on FreeBSD
2057 * RETURN: 0 on success, error code on failure.
2059 * Timestamps:
2060 * vp - ctime updated, mtime updated if size changed.
2063 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
2065 vnode_t *vp = ZTOV(zp);
2066 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2067 objset_t *os;
2068 zilog_t *zilog;
2069 dmu_tx_t *tx;
2070 vattr_t oldva;
2071 xvattr_t tmpxvattr;
2072 uint_t mask = vap->va_mask;
2073 uint_t saved_mask = 0;
2074 uint64_t saved_mode;
2075 int trim_mask = 0;
2076 uint64_t new_mode;
2077 uint64_t new_uid, new_gid;
2078 uint64_t xattr_obj;
2079 uint64_t mtime[2], ctime[2];
2080 uint64_t projid = ZFS_INVALID_PROJID;
2081 znode_t *attrzp;
2082 int need_policy = FALSE;
2083 int err, err2;
2084 zfs_fuid_info_t *fuidp = NULL;
2085 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2086 xoptattr_t *xoap;
2087 zfs_acl_t *aclp;
2088 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2089 boolean_t fuid_dirtied = B_FALSE;
2090 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2091 int count = 0, xattr_count = 0;
2093 if (mask == 0)
2094 return (0);
2096 if (mask & AT_NOSET)
2097 return (SET_ERROR(EINVAL));
2099 if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
2100 return (err);
2102 os = zfsvfs->z_os;
2103 zilog = zfsvfs->z_log;
2106 * Make sure that if we have ephemeral uid/gid or xvattr specified
2107 * that file system is at proper version level
2110 if (zfsvfs->z_use_fuids == B_FALSE &&
2111 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2112 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2113 (mask & AT_XVATTR))) {
2114 zfs_exit(zfsvfs, FTAG);
2115 return (SET_ERROR(EINVAL));
2118 if (mask & AT_SIZE && vp->v_type == VDIR) {
2119 zfs_exit(zfsvfs, FTAG);
2120 return (SET_ERROR(EISDIR));
2123 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2124 zfs_exit(zfsvfs, FTAG);
2125 return (SET_ERROR(EINVAL));
2129 * If this is an xvattr_t, then get a pointer to the structure of
2130 * optional attributes. If this is NULL, then we have a vattr_t.
2132 xoap = xva_getxoptattr(xvap);
2134 xva_init(&tmpxvattr);
2137 * Immutable files can only alter immutable bit and atime
2139 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2140 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2141 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2142 zfs_exit(zfsvfs, FTAG);
2143 return (SET_ERROR(EPERM));
2147 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2151 * Verify timestamps doesn't overflow 32 bits.
2152 * ZFS can handle large timestamps, but 32bit syscalls can't
2153 * handle times greater than 2039. This check should be removed
2154 * once large timestamps are fully supported.
2156 if (mask & (AT_ATIME | AT_MTIME)) {
2157 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2158 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2159 zfs_exit(zfsvfs, FTAG);
2160 return (SET_ERROR(EOVERFLOW));
2163 if (xoap != NULL && (mask & AT_XVATTR)) {
2164 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2165 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2166 zfs_exit(zfsvfs, FTAG);
2167 return (SET_ERROR(EOVERFLOW));
2170 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2171 if (!dmu_objset_projectquota_enabled(os) ||
2172 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2173 zfs_exit(zfsvfs, FTAG);
2174 return (SET_ERROR(EOPNOTSUPP));
2177 projid = xoap->xoa_projid;
2178 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2179 zfs_exit(zfsvfs, FTAG);
2180 return (SET_ERROR(EINVAL));
2183 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2184 projid = ZFS_INVALID_PROJID;
2185 else
2186 need_policy = TRUE;
2189 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2190 (xoap->xoa_projinherit !=
2191 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2192 (!dmu_objset_projectquota_enabled(os) ||
2193 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2194 zfs_exit(zfsvfs, FTAG);
2195 return (SET_ERROR(EOPNOTSUPP));
2199 attrzp = NULL;
2200 aclp = NULL;
2202 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2203 zfs_exit(zfsvfs, FTAG);
2204 return (SET_ERROR(EROFS));
2208 * First validate permissions
2211 if (mask & AT_SIZE) {
2213 * XXX - Note, we are not providing any open
2214 * mode flags here (like FNDELAY), so we may
2215 * block if there are locks present... this
2216 * should be addressed in openat().
2218 /* XXX - would it be OK to generate a log record here? */
2219 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2220 if (err) {
2221 zfs_exit(zfsvfs, FTAG);
2222 return (err);
2226 if (mask & (AT_ATIME|AT_MTIME) ||
2227 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2228 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2229 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2230 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2231 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2232 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2233 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2234 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2235 skipaclchk, cr, mnt_ns);
2238 if (mask & (AT_UID|AT_GID)) {
2239 int idmask = (mask & (AT_UID|AT_GID));
2240 int take_owner;
2241 int take_group;
2244 * NOTE: even if a new mode is being set,
2245 * we may clear S_ISUID/S_ISGID bits.
2248 if (!(mask & AT_MODE))
2249 vap->va_mode = zp->z_mode;
2252 * Take ownership or chgrp to group we are a member of
2255 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2256 take_group = (mask & AT_GID) &&
2257 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2260 * If both AT_UID and AT_GID are set then take_owner and
2261 * take_group must both be set in order to allow taking
2262 * ownership.
2264 * Otherwise, send the check through secpolicy_vnode_setattr()
2268 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2269 ((idmask == AT_UID) && take_owner) ||
2270 ((idmask == AT_GID) && take_group)) {
2271 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2272 skipaclchk, cr, mnt_ns) == 0) {
2274 * Remove setuid/setgid for non-privileged users
2276 secpolicy_setid_clear(vap, vp, cr);
2277 trim_mask = (mask & (AT_UID|AT_GID));
2278 } else {
2279 need_policy = TRUE;
2281 } else {
2282 need_policy = TRUE;
2286 oldva.va_mode = zp->z_mode;
2287 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2288 if (mask & AT_XVATTR) {
2290 * Update xvattr mask to include only those attributes
2291 * that are actually changing.
2293 * the bits will be restored prior to actually setting
2294 * the attributes so the caller thinks they were set.
2296 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2297 if (xoap->xoa_appendonly !=
2298 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2299 need_policy = TRUE;
2300 } else {
2301 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2302 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2306 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2307 if (xoap->xoa_projinherit !=
2308 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2309 need_policy = TRUE;
2310 } else {
2311 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2312 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2316 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2317 if (xoap->xoa_nounlink !=
2318 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2319 need_policy = TRUE;
2320 } else {
2321 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2322 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2326 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2327 if (xoap->xoa_immutable !=
2328 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2329 need_policy = TRUE;
2330 } else {
2331 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2332 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2336 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2337 if (xoap->xoa_nodump !=
2338 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2339 need_policy = TRUE;
2340 } else {
2341 XVA_CLR_REQ(xvap, XAT_NODUMP);
2342 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2346 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2347 if (xoap->xoa_av_modified !=
2348 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2349 need_policy = TRUE;
2350 } else {
2351 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2352 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2356 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2357 if ((vp->v_type != VREG &&
2358 xoap->xoa_av_quarantined) ||
2359 xoap->xoa_av_quarantined !=
2360 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2361 need_policy = TRUE;
2362 } else {
2363 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2364 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2368 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2369 zfs_exit(zfsvfs, FTAG);
2370 return (SET_ERROR(EPERM));
2373 if (need_policy == FALSE &&
2374 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2375 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2376 need_policy = TRUE;
2380 if (mask & AT_MODE) {
2381 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
2382 mnt_ns) == 0) {
2383 err = secpolicy_setid_setsticky_clear(vp, vap,
2384 &oldva, cr);
2385 if (err) {
2386 zfs_exit(zfsvfs, FTAG);
2387 return (err);
2389 trim_mask |= AT_MODE;
2390 } else {
2391 need_policy = TRUE;
2395 if (need_policy) {
2397 * If trim_mask is set then take ownership
2398 * has been granted or write_acl is present and user
2399 * has the ability to modify mode. In that case remove
2400 * UID|GID and or MODE from mask so that
2401 * secpolicy_vnode_setattr() doesn't revoke it.
2404 if (trim_mask) {
2405 saved_mask = vap->va_mask;
2406 vap->va_mask &= ~trim_mask;
2407 if (trim_mask & AT_MODE) {
2409 * Save the mode, as secpolicy_vnode_setattr()
2410 * will overwrite it with ova.va_mode.
2412 saved_mode = vap->va_mode;
2415 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2416 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2417 if (err) {
2418 zfs_exit(zfsvfs, FTAG);
2419 return (err);
2422 if (trim_mask) {
2423 vap->va_mask |= saved_mask;
2424 if (trim_mask & AT_MODE) {
2426 * Recover the mode after
2427 * secpolicy_vnode_setattr().
2429 vap->va_mode = saved_mode;
2435 * secpolicy_vnode_setattr, or take ownership may have
2436 * changed va_mask
2438 mask = vap->va_mask;
2440 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2441 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2442 &xattr_obj, sizeof (xattr_obj));
2444 if (err == 0 && xattr_obj) {
2445 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2446 if (err == 0) {
2447 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2448 if (err != 0)
2449 vrele(ZTOV(attrzp));
2451 if (err)
2452 goto out2;
2454 if (mask & AT_UID) {
2455 new_uid = zfs_fuid_create(zfsvfs,
2456 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2457 if (new_uid != zp->z_uid &&
2458 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2459 new_uid)) {
2460 if (attrzp)
2461 vput(ZTOV(attrzp));
2462 err = SET_ERROR(EDQUOT);
2463 goto out2;
2467 if (mask & AT_GID) {
2468 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2469 cr, ZFS_GROUP, &fuidp);
2470 if (new_gid != zp->z_gid &&
2471 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2472 new_gid)) {
2473 if (attrzp)
2474 vput(ZTOV(attrzp));
2475 err = SET_ERROR(EDQUOT);
2476 goto out2;
2480 if (projid != ZFS_INVALID_PROJID &&
2481 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2482 if (attrzp)
2483 vput(ZTOV(attrzp));
2484 err = SET_ERROR(EDQUOT);
2485 goto out2;
2488 tx = dmu_tx_create(os);
2490 if (mask & AT_MODE) {
2491 uint64_t pmode = zp->z_mode;
2492 uint64_t acl_obj;
2493 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2495 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2496 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2497 err = SET_ERROR(EPERM);
2498 goto out;
2501 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2502 goto out;
2504 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2506 * Are we upgrading ACL from old V0 format
2507 * to V1 format?
2509 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2510 zfs_znode_acl_version(zp) ==
2511 ZFS_ACL_VERSION_INITIAL) {
2512 dmu_tx_hold_free(tx, acl_obj, 0,
2513 DMU_OBJECT_END);
2514 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2515 0, aclp->z_acl_bytes);
2516 } else {
2517 dmu_tx_hold_write(tx, acl_obj, 0,
2518 aclp->z_acl_bytes);
2520 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2521 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2522 0, aclp->z_acl_bytes);
2524 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2525 } else {
2526 if (((mask & AT_XVATTR) &&
2527 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2528 (projid != ZFS_INVALID_PROJID &&
2529 !(zp->z_pflags & ZFS_PROJID)))
2530 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2531 else
2532 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2535 if (attrzp) {
2536 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2539 fuid_dirtied = zfsvfs->z_fuid_dirty;
2540 if (fuid_dirtied)
2541 zfs_fuid_txhold(zfsvfs, tx);
2543 zfs_sa_upgrade_txholds(tx, zp);
2545 err = dmu_tx_assign(tx, TXG_WAIT);
2546 if (err)
2547 goto out;
2549 count = 0;
2551 * Set each attribute requested.
2552 * We group settings according to the locks they need to acquire.
2554 * Note: you cannot set ctime directly, although it will be
2555 * updated as a side-effect of calling this function.
2558 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2560 * For the existed object that is upgraded from old system,
2561 * its on-disk layout has no slot for the project ID attribute.
2562 * But quota accounting logic needs to access related slots by
2563 * offset directly. So we need to adjust old objects' layout
2564 * to make the project ID to some unified and fixed offset.
2566 if (attrzp)
2567 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2568 if (err == 0)
2569 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2571 if (unlikely(err == EEXIST))
2572 err = 0;
2573 else if (err != 0)
2574 goto out;
2575 else
2576 projid = ZFS_INVALID_PROJID;
2579 if (mask & (AT_UID|AT_GID|AT_MODE))
2580 mutex_enter(&zp->z_acl_lock);
2582 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2583 &zp->z_pflags, sizeof (zp->z_pflags));
2585 if (attrzp) {
2586 if (mask & (AT_UID|AT_GID|AT_MODE))
2587 mutex_enter(&attrzp->z_acl_lock);
2588 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2589 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2590 sizeof (attrzp->z_pflags));
2591 if (projid != ZFS_INVALID_PROJID) {
2592 attrzp->z_projid = projid;
2593 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2594 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2595 sizeof (attrzp->z_projid));
2599 if (mask & (AT_UID|AT_GID)) {
2601 if (mask & AT_UID) {
2602 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2603 &new_uid, sizeof (new_uid));
2604 zp->z_uid = new_uid;
2605 if (attrzp) {
2606 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2607 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2608 sizeof (new_uid));
2609 attrzp->z_uid = new_uid;
2613 if (mask & AT_GID) {
2614 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2615 NULL, &new_gid, sizeof (new_gid));
2616 zp->z_gid = new_gid;
2617 if (attrzp) {
2618 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2619 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2620 sizeof (new_gid));
2621 attrzp->z_gid = new_gid;
2624 if (!(mask & AT_MODE)) {
2625 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2626 NULL, &new_mode, sizeof (new_mode));
2627 new_mode = zp->z_mode;
2629 err = zfs_acl_chown_setattr(zp);
2630 ASSERT0(err);
2631 if (attrzp) {
2632 vn_seqc_write_begin(ZTOV(attrzp));
2633 err = zfs_acl_chown_setattr(attrzp);
2634 vn_seqc_write_end(ZTOV(attrzp));
2635 ASSERT0(err);
2639 if (mask & AT_MODE) {
2640 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2641 &new_mode, sizeof (new_mode));
2642 zp->z_mode = new_mode;
2643 ASSERT3P(aclp, !=, NULL);
2644 err = zfs_aclset_common(zp, aclp, cr, tx);
2645 ASSERT0(err);
2646 if (zp->z_acl_cached)
2647 zfs_acl_free(zp->z_acl_cached);
2648 zp->z_acl_cached = aclp;
2649 aclp = NULL;
2653 if (mask & AT_ATIME) {
2654 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2655 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2656 &zp->z_atime, sizeof (zp->z_atime));
2659 if (mask & AT_MTIME) {
2660 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2661 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2662 mtime, sizeof (mtime));
2665 if (projid != ZFS_INVALID_PROJID) {
2666 zp->z_projid = projid;
2667 SA_ADD_BULK_ATTR(bulk, count,
2668 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2669 sizeof (zp->z_projid));
2672 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2673 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2674 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2675 NULL, mtime, sizeof (mtime));
2676 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2677 &ctime, sizeof (ctime));
2678 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2679 } else if (mask != 0) {
2680 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2681 &ctime, sizeof (ctime));
2682 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2683 if (attrzp) {
2684 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2685 SA_ZPL_CTIME(zfsvfs), NULL,
2686 &ctime, sizeof (ctime));
2687 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2688 mtime, ctime);
2693 * Do this after setting timestamps to prevent timestamp
2694 * update from toggling bit
2697 if (xoap && (mask & AT_XVATTR)) {
2699 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2700 xoap->xoa_createtime = vap->va_birthtime;
2702 * restore trimmed off masks
2703 * so that return masks can be set for caller.
2706 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2707 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2709 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2710 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2712 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2713 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2715 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2716 XVA_SET_REQ(xvap, XAT_NODUMP);
2718 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2719 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2721 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2722 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2724 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2725 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2728 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2729 ASSERT3S(vp->v_type, ==, VREG);
2731 zfs_xvattr_set(zp, xvap, tx);
2734 if (fuid_dirtied)
2735 zfs_fuid_sync(zfsvfs, tx);
2737 if (mask != 0)
2738 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2740 if (mask & (AT_UID|AT_GID|AT_MODE))
2741 mutex_exit(&zp->z_acl_lock);
2743 if (attrzp) {
2744 if (mask & (AT_UID|AT_GID|AT_MODE))
2745 mutex_exit(&attrzp->z_acl_lock);
2747 out:
2748 if (err == 0 && attrzp) {
2749 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2750 xattr_count, tx);
2751 ASSERT0(err2);
2754 if (attrzp)
2755 vput(ZTOV(attrzp));
2757 if (aclp)
2758 zfs_acl_free(aclp);
2760 if (fuidp) {
2761 zfs_fuid_info_free(fuidp);
2762 fuidp = NULL;
2765 if (err) {
2766 dmu_tx_abort(tx);
2767 } else {
2768 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2769 dmu_tx_commit(tx);
2772 out2:
2773 if (os->os_sync == ZFS_SYNC_ALWAYS)
2774 zil_commit(zilog, 0);
2776 zfs_exit(zfsvfs, FTAG);
2777 return (err);
2781 * Look up the directory entries corresponding to the source and target
2782 * directory/name pairs.
2784 static int
2785 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2786 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2787 znode_t **tzpp)
2789 zfsvfs_t *zfsvfs;
2790 znode_t *szp, *tzp;
2791 int error;
2794 * Before using sdzp and tdzp we must ensure that they are live.
2795 * As a porting legacy from illumos we have two things to worry
2796 * about. One is typical for FreeBSD and it is that the vnode is
2797 * not reclaimed (doomed). The other is that the znode is live.
2798 * The current code can invalidate the znode without acquiring the
2799 * corresponding vnode lock if the object represented by the znode
2800 * and vnode is no longer valid after a rollback or receive operation.
2801 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
2802 * that protects the znodes from the invalidation.
2804 zfsvfs = sdzp->z_zfsvfs;
2805 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2806 if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
2807 return (error);
2808 if ((error = zfs_verify_zp(tdzp)) != 0) {
2809 zfs_exit(zfsvfs, FTAG);
2810 return (error);
2814 * Re-resolve svp to be certain it still exists and fetch the
2815 * correct vnode.
2817 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2818 if (error != 0) {
2819 /* Source entry invalid or not there. */
2820 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2821 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2822 error = SET_ERROR(EINVAL);
2823 goto out;
2825 *szpp = szp;
2828 * Re-resolve tvp, if it disappeared we just carry on.
2830 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2831 if (error != 0) {
2832 vrele(ZTOV(szp));
2833 if ((tcnp->cn_flags & ISDOTDOT) != 0)
2834 error = SET_ERROR(EINVAL);
2835 goto out;
2837 *tzpp = tzp;
2838 out:
2839 zfs_exit(zfsvfs, FTAG);
2840 return (error);
2844 * We acquire all but fdvp locks using non-blocking acquisitions. If we
2845 * fail to acquire any lock in the path we will drop all held locks,
2846 * acquire the new lock in a blocking fashion, and then release it and
2847 * restart the rename. This acquire/release step ensures that we do not
2848 * spin on a lock waiting for release. On error release all vnode locks
2849 * and decrement references the way tmpfs_rename() would do.
2851 static int
2852 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2853 struct vnode *tdvp, struct vnode **tvpp,
2854 const struct componentname *scnp, const struct componentname *tcnp)
2856 struct vnode *nvp, *svp, *tvp;
2857 znode_t *sdzp, *tdzp, *szp, *tzp;
2858 int error;
2860 VOP_UNLOCK(tdvp);
2861 if (*tvpp != NULL && *tvpp != tdvp)
2862 VOP_UNLOCK(*tvpp);
2864 relock:
2865 error = vn_lock(sdvp, LK_EXCLUSIVE);
2866 if (error)
2867 goto out;
2868 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
2869 if (error != 0) {
2870 VOP_UNLOCK(sdvp);
2871 if (error != EBUSY)
2872 goto out;
2873 error = vn_lock(tdvp, LK_EXCLUSIVE);
2874 if (error)
2875 goto out;
2876 VOP_UNLOCK(tdvp);
2877 goto relock;
2879 tdzp = VTOZ(tdvp);
2880 sdzp = VTOZ(sdvp);
2882 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
2883 if (error != 0) {
2884 VOP_UNLOCK(sdvp);
2885 VOP_UNLOCK(tdvp);
2886 goto out;
2888 svp = ZTOV(szp);
2889 tvp = tzp != NULL ? ZTOV(tzp) : NULL;
2892 * Now try acquire locks on svp and tvp.
2894 nvp = svp;
2895 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2896 if (error != 0) {
2897 VOP_UNLOCK(sdvp);
2898 VOP_UNLOCK(tdvp);
2899 if (tvp != NULL)
2900 vrele(tvp);
2901 if (error != EBUSY) {
2902 vrele(nvp);
2903 goto out;
2905 error = vn_lock(nvp, LK_EXCLUSIVE);
2906 if (error != 0) {
2907 vrele(nvp);
2908 goto out;
2910 VOP_UNLOCK(nvp);
2912 * Concurrent rename race.
2913 * XXX ?
2915 if (nvp == tdvp) {
2916 vrele(nvp);
2917 error = SET_ERROR(EINVAL);
2918 goto out;
2920 vrele(*svpp);
2921 *svpp = nvp;
2922 goto relock;
2924 vrele(*svpp);
2925 *svpp = nvp;
2927 if (*tvpp != NULL)
2928 vrele(*tvpp);
2929 *tvpp = NULL;
2930 if (tvp != NULL) {
2931 nvp = tvp;
2932 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2933 if (error != 0) {
2934 VOP_UNLOCK(sdvp);
2935 VOP_UNLOCK(tdvp);
2936 VOP_UNLOCK(*svpp);
2937 if (error != EBUSY) {
2938 vrele(nvp);
2939 goto out;
2941 error = vn_lock(nvp, LK_EXCLUSIVE);
2942 if (error != 0) {
2943 vrele(nvp);
2944 goto out;
2946 vput(nvp);
2947 goto relock;
2949 *tvpp = nvp;
2952 return (0);
2954 out:
2955 return (error);
2959 * Note that we must use VRELE_ASYNC in this function as it walks
2960 * up the directory tree and vrele may need to acquire an exclusive
2961 * lock if a last reference to a vnode is dropped.
2963 static int
2964 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
2966 zfsvfs_t *zfsvfs;
2967 znode_t *zp, *zp1;
2968 uint64_t parent;
2969 int error;
2971 zfsvfs = tdzp->z_zfsvfs;
2972 if (tdzp == szp)
2973 return (SET_ERROR(EINVAL));
2974 if (tdzp == sdzp)
2975 return (0);
2976 if (tdzp->z_id == zfsvfs->z_root)
2977 return (0);
2978 zp = tdzp;
2979 for (;;) {
2980 ASSERT(!zp->z_unlinked);
2981 if ((error = sa_lookup(zp->z_sa_hdl,
2982 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
2983 break;
2985 if (parent == szp->z_id) {
2986 error = SET_ERROR(EINVAL);
2987 break;
2989 if (parent == zfsvfs->z_root)
2990 break;
2991 if (parent == sdzp->z_id)
2992 break;
2994 error = zfs_zget(zfsvfs, parent, &zp1);
2995 if (error != 0)
2996 break;
2998 if (zp != tdzp)
2999 VN_RELE_ASYNC(ZTOV(zp),
3000 dsl_pool_zrele_taskq(
3001 dmu_objset_pool(zfsvfs->z_os)));
3002 zp = zp1;
3005 if (error == ENOTDIR)
3006 panic("checkpath: .. not a directory\n");
3007 if (zp != tdzp)
3008 VN_RELE_ASYNC(ZTOV(zp),
3009 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3010 return (error);
3013 static int
3014 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3015 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3016 cred_t *cr);
3019 * Move an entry from the provided source directory to the target
3020 * directory. Change the entry name as indicated.
3022 * IN: sdvp - Source directory containing the "old entry".
3023 * scnp - Old entry name.
3024 * tdvp - Target directory to contain the "new entry".
3025 * tcnp - New entry name.
3026 * cr - credentials of caller.
3027 * INOUT: svpp - Source file
3028 * tvpp - Target file, may point to NULL initially
3030 * RETURN: 0 on success, error code on failure.
3032 * Timestamps:
3033 * sdvp,tdvp - ctime|mtime updated
3035 static int
3036 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3037 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3038 cred_t *cr)
3040 int error;
3042 ASSERT_VOP_ELOCKED(tdvp, __func__);
3043 if (*tvpp != NULL)
3044 ASSERT_VOP_ELOCKED(*tvpp, __func__);
3046 /* Reject renames across filesystems. */
3047 if ((*svpp)->v_mount != tdvp->v_mount ||
3048 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3049 error = SET_ERROR(EXDEV);
3050 goto out;
3053 if (zfsctl_is_node(tdvp)) {
3054 error = SET_ERROR(EXDEV);
3055 goto out;
3059 * Lock all four vnodes to ensure safety and semantics of renaming.
3061 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3062 if (error != 0) {
3063 /* no vnodes are locked in the case of error here */
3064 return (error);
3067 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3068 VOP_UNLOCK(sdvp);
3069 VOP_UNLOCK(*svpp);
3070 out:
3071 if (*tvpp != NULL)
3072 VOP_UNLOCK(*tvpp);
3073 if (tdvp != *tvpp)
3074 VOP_UNLOCK(tdvp);
3076 return (error);
3079 static int
3080 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3081 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3082 cred_t *cr)
3084 dmu_tx_t *tx;
3085 zfsvfs_t *zfsvfs;
3086 zilog_t *zilog;
3087 znode_t *tdzp, *sdzp, *tzp, *szp;
3088 const char *snm = scnp->cn_nameptr;
3089 const char *tnm = tcnp->cn_nameptr;
3090 int error;
3092 tdzp = VTOZ(tdvp);
3093 sdzp = VTOZ(sdvp);
3094 zfsvfs = tdzp->z_zfsvfs;
3096 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3097 return (error);
3098 if ((error = zfs_verify_zp(sdzp)) != 0) {
3099 zfs_exit(zfsvfs, FTAG);
3100 return (error);
3102 zilog = zfsvfs->z_log;
3104 if (zfsvfs->z_utf8 && u8_validate(tnm,
3105 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3106 error = SET_ERROR(EILSEQ);
3107 goto out;
3110 /* If source and target are the same file, there is nothing to do. */
3111 if ((*svpp) == (*tvpp)) {
3112 error = 0;
3113 goto out;
3116 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3117 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3118 (*tvpp)->v_mountedhere != NULL)) {
3119 error = SET_ERROR(EXDEV);
3120 goto out;
3123 szp = VTOZ(*svpp);
3124 if ((error = zfs_verify_zp(szp)) != 0) {
3125 zfs_exit(zfsvfs, FTAG);
3126 return (error);
3128 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3129 if (tzp != NULL) {
3130 if ((error = zfs_verify_zp(tzp)) != 0) {
3131 zfs_exit(zfsvfs, FTAG);
3132 return (error);
3137 * This is to prevent the creation of links into attribute space
3138 * by renaming a linked file into/outof an attribute directory.
3139 * See the comment in zfs_link() for why this is considered bad.
3141 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3142 error = SET_ERROR(EINVAL);
3143 goto out;
3147 * If we are using project inheritance, means if the directory has
3148 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3149 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3150 * such case, we only allow renames into our tree when the project
3151 * IDs are the same.
3153 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3154 tdzp->z_projid != szp->z_projid) {
3155 error = SET_ERROR(EXDEV);
3156 goto out;
3160 * Must have write access at the source to remove the old entry
3161 * and write access at the target to create the new entry.
3162 * Note that if target and source are the same, this can be
3163 * done in a single check.
3165 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
3166 goto out;
3168 if ((*svpp)->v_type == VDIR) {
3170 * Avoid ".", "..", and aliases of "." for obvious reasons.
3172 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3173 sdzp == szp ||
3174 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3175 error = EINVAL;
3176 goto out;
3180 * Check to make sure rename is valid.
3181 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3183 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3184 goto out;
3188 * Does target exist?
3190 if (tzp) {
3192 * Source and target must be the same type.
3194 if ((*svpp)->v_type == VDIR) {
3195 if ((*tvpp)->v_type != VDIR) {
3196 error = SET_ERROR(ENOTDIR);
3197 goto out;
3198 } else {
3199 cache_purge(tdvp);
3200 if (sdvp != tdvp)
3201 cache_purge(sdvp);
3203 } else {
3204 if ((*tvpp)->v_type == VDIR) {
3205 error = SET_ERROR(EISDIR);
3206 goto out;
3211 vn_seqc_write_begin(*svpp);
3212 vn_seqc_write_begin(sdvp);
3213 if (*tvpp != NULL)
3214 vn_seqc_write_begin(*tvpp);
3215 if (tdvp != *tvpp)
3216 vn_seqc_write_begin(tdvp);
3218 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3219 if (tzp)
3220 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3223 * notify the target directory if it is not the same
3224 * as source directory.
3226 if (tdvp != sdvp) {
3227 vnevent_rename_dest_dir(tdvp, ct);
3230 tx = dmu_tx_create(zfsvfs->z_os);
3231 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3232 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3233 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3234 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3235 if (sdzp != tdzp) {
3236 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3237 zfs_sa_upgrade_txholds(tx, tdzp);
3239 if (tzp) {
3240 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3241 zfs_sa_upgrade_txholds(tx, tzp);
3244 zfs_sa_upgrade_txholds(tx, szp);
3245 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3246 error = dmu_tx_assign(tx, TXG_WAIT);
3247 if (error) {
3248 dmu_tx_abort(tx);
3249 goto out_seq;
3252 if (tzp) /* Attempt to remove the existing target */
3253 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3255 if (error == 0) {
3256 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3257 if (error == 0) {
3258 szp->z_pflags |= ZFS_AV_MODIFIED;
3260 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3261 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3262 ASSERT0(error);
3264 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3265 NULL);
3266 if (error == 0) {
3267 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3268 snm, tdzp, tnm, szp);
3269 } else {
3271 * At this point, we have successfully created
3272 * the target name, but have failed to remove
3273 * the source name. Since the create was done
3274 * with the ZRENAMING flag, there are
3275 * complications; for one, the link count is
3276 * wrong. The easiest way to deal with this
3277 * is to remove the newly created target, and
3278 * return the original error. This must
3279 * succeed; fortunately, it is very unlikely to
3280 * fail, since we just created it.
3282 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3283 ZRENAMING, NULL));
3286 if (error == 0) {
3287 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3291 dmu_tx_commit(tx);
3293 out_seq:
3294 vn_seqc_write_end(*svpp);
3295 vn_seqc_write_end(sdvp);
3296 if (*tvpp != NULL)
3297 vn_seqc_write_end(*tvpp);
3298 if (tdvp != *tvpp)
3299 vn_seqc_write_end(tdvp);
3301 out:
3302 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3303 zil_commit(zilog, 0);
3304 zfs_exit(zfsvfs, FTAG);
3306 return (error);
3310 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3311 cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
3313 struct componentname scn, tcn;
3314 vnode_t *sdvp, *tdvp;
3315 vnode_t *svp, *tvp;
3316 int error;
3317 svp = tvp = NULL;
3319 if (is_nametoolong(tdzp->z_zfsvfs, tname))
3320 return (SET_ERROR(ENAMETOOLONG));
3322 if (rflags != 0 || wo_vap != NULL)
3323 return (SET_ERROR(EINVAL));
3325 sdvp = ZTOV(sdzp);
3326 tdvp = ZTOV(tdzp);
3327 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3328 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3329 VOP_UNLOCK(sdvp);
3330 if (error != 0)
3331 goto fail;
3332 VOP_UNLOCK(svp);
3334 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3335 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3336 if (error == EJUSTRETURN)
3337 tvp = NULL;
3338 else if (error != 0) {
3339 VOP_UNLOCK(tdvp);
3340 goto fail;
3343 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3344 fail:
3345 if (svp != NULL)
3346 vrele(svp);
3347 if (tvp != NULL)
3348 vrele(tvp);
3350 return (error);
3354 * Insert the indicated symbolic reference entry into the directory.
3356 * IN: dvp - Directory to contain new symbolic link.
3357 * link - Name for new symlink entry.
3358 * vap - Attributes of new entry.
3359 * cr - credentials of caller.
3360 * ct - caller context
3361 * flags - case flags
3362 * mnt_ns - Unused on FreeBSD
3364 * RETURN: 0 on success, error code on failure.
3366 * Timestamps:
3367 * dvp - ctime|mtime updated
3370 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3371 const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
3373 (void) flags;
3374 znode_t *zp;
3375 dmu_tx_t *tx;
3376 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3377 zilog_t *zilog;
3378 uint64_t len = strlen(link);
3379 int error;
3380 zfs_acl_ids_t acl_ids;
3381 boolean_t fuid_dirtied;
3382 uint64_t txtype = TX_SYMLINK;
3384 ASSERT3S(vap->va_type, ==, VLNK);
3386 if (is_nametoolong(zfsvfs, name))
3387 return (SET_ERROR(ENAMETOOLONG));
3389 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
3390 return (error);
3391 zilog = zfsvfs->z_log;
3393 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3394 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3395 zfs_exit(zfsvfs, FTAG);
3396 return (SET_ERROR(EILSEQ));
3399 if (len > MAXPATHLEN) {
3400 zfs_exit(zfsvfs, FTAG);
3401 return (SET_ERROR(ENAMETOOLONG));
3404 if ((error = zfs_acl_ids_create(dzp, 0,
3405 vap, cr, NULL, &acl_ids, NULL)) != 0) {
3406 zfs_exit(zfsvfs, FTAG);
3407 return (error);
3411 * Attempt to lock directory; fail if entry already exists.
3413 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3414 if (error) {
3415 zfs_acl_ids_free(&acl_ids);
3416 zfs_exit(zfsvfs, FTAG);
3417 return (error);
3420 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
3421 zfs_acl_ids_free(&acl_ids);
3422 zfs_exit(zfsvfs, FTAG);
3423 return (error);
3426 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3427 0 /* projid */)) {
3428 zfs_acl_ids_free(&acl_ids);
3429 zfs_exit(zfsvfs, FTAG);
3430 return (SET_ERROR(EDQUOT));
3433 getnewvnode_reserve();
3434 tx = dmu_tx_create(zfsvfs->z_os);
3435 fuid_dirtied = zfsvfs->z_fuid_dirty;
3436 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3437 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3438 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3439 ZFS_SA_BASE_ATTR_SIZE + len);
3440 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3441 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3442 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3443 acl_ids.z_aclp->z_acl_bytes);
3445 if (fuid_dirtied)
3446 zfs_fuid_txhold(zfsvfs, tx);
3447 error = dmu_tx_assign(tx, TXG_WAIT);
3448 if (error) {
3449 zfs_acl_ids_free(&acl_ids);
3450 dmu_tx_abort(tx);
3451 getnewvnode_drop_reserve();
3452 zfs_exit(zfsvfs, FTAG);
3453 return (error);
3457 * Create a new object for the symlink.
3458 * for version 4 ZPL datasets the symlink will be an SA attribute
3460 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3462 if (fuid_dirtied)
3463 zfs_fuid_sync(zfsvfs, tx);
3465 if (zp->z_is_sa)
3466 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3467 __DECONST(void *, link), len, tx);
3468 else
3469 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3471 zp->z_size = len;
3472 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3473 &zp->z_size, sizeof (zp->z_size), tx);
3475 * Insert the new object into the directory.
3477 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
3478 if (error != 0) {
3479 zfs_znode_delete(zp, tx);
3480 VOP_UNLOCK(ZTOV(zp));
3481 zrele(zp);
3482 } else {
3483 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3486 zfs_acl_ids_free(&acl_ids);
3488 dmu_tx_commit(tx);
3490 getnewvnode_drop_reserve();
3492 if (error == 0) {
3493 *zpp = zp;
3495 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3496 zil_commit(zilog, 0);
3499 zfs_exit(zfsvfs, FTAG);
3500 return (error);
3504 * Return, in the buffer contained in the provided uio structure,
3505 * the symbolic path referred to by vp.
3507 * IN: vp - vnode of symbolic link.
3508 * uio - structure to contain the link path.
3509 * cr - credentials of caller.
3510 * ct - caller context
3512 * OUT: uio - structure containing the link path.
3514 * RETURN: 0 on success, error code on failure.
3516 * Timestamps:
3517 * vp - atime updated
3519 static int
3520 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3522 (void) cr, (void) ct;
3523 znode_t *zp = VTOZ(vp);
3524 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3525 int error;
3527 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3528 return (error);
3530 if (zp->z_is_sa)
3531 error = sa_lookup_uio(zp->z_sa_hdl,
3532 SA_ZPL_SYMLINK(zfsvfs), uio);
3533 else
3534 error = zfs_sa_readlink(zp, uio);
3536 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3538 zfs_exit(zfsvfs, FTAG);
3539 return (error);
3543 * Insert a new entry into directory tdvp referencing svp.
3545 * IN: tdvp - Directory to contain new entry.
3546 * svp - vnode of new entry.
3547 * name - name of new entry.
3548 * cr - credentials of caller.
3550 * RETURN: 0 on success, error code on failure.
3552 * Timestamps:
3553 * tdvp - ctime|mtime updated
3554 * svp - ctime updated
3557 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3558 int flags)
3560 (void) flags;
3561 znode_t *tzp;
3562 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3563 zilog_t *zilog;
3564 dmu_tx_t *tx;
3565 int error;
3566 uint64_t parent;
3567 uid_t owner;
3569 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3571 if (is_nametoolong(zfsvfs, name))
3572 return (SET_ERROR(ENAMETOOLONG));
3574 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3575 return (error);
3576 zilog = zfsvfs->z_log;
3579 * POSIX dictates that we return EPERM here.
3580 * Better choices include ENOTSUP or EISDIR.
3582 if (ZTOV(szp)->v_type == VDIR) {
3583 zfs_exit(zfsvfs, FTAG);
3584 return (SET_ERROR(EPERM));
3587 if ((error = zfs_verify_zp(szp)) != 0) {
3588 zfs_exit(zfsvfs, FTAG);
3589 return (error);
3593 * If we are using project inheritance, means if the directory has
3594 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3595 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3596 * such case, we only allow hard link creation in our tree when the
3597 * project IDs are the same.
3599 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3600 tdzp->z_projid != szp->z_projid) {
3601 zfs_exit(zfsvfs, FTAG);
3602 return (SET_ERROR(EXDEV));
3605 if (szp->z_pflags & (ZFS_APPENDONLY |
3606 ZFS_IMMUTABLE | ZFS_READONLY)) {
3607 zfs_exit(zfsvfs, FTAG);
3608 return (SET_ERROR(EPERM));
3611 /* Prevent links to .zfs/shares files */
3613 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3614 &parent, sizeof (uint64_t))) != 0) {
3615 zfs_exit(zfsvfs, FTAG);
3616 return (error);
3618 if (parent == zfsvfs->z_shares_dir) {
3619 zfs_exit(zfsvfs, FTAG);
3620 return (SET_ERROR(EPERM));
3623 if (zfsvfs->z_utf8 && u8_validate(name,
3624 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3625 zfs_exit(zfsvfs, FTAG);
3626 return (SET_ERROR(EILSEQ));
3630 * We do not support links between attributes and non-attributes
3631 * because of the potential security risk of creating links
3632 * into "normal" file space in order to circumvent restrictions
3633 * imposed in attribute space.
3635 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3636 zfs_exit(zfsvfs, FTAG);
3637 return (SET_ERROR(EINVAL));
3641 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3642 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3643 zfs_exit(zfsvfs, FTAG);
3644 return (SET_ERROR(EPERM));
3647 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
3648 zfs_exit(zfsvfs, FTAG);
3649 return (error);
3653 * Attempt to lock directory; fail if entry already exists.
3655 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3656 if (error) {
3657 zfs_exit(zfsvfs, FTAG);
3658 return (error);
3661 tx = dmu_tx_create(zfsvfs->z_os);
3662 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3663 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3664 zfs_sa_upgrade_txholds(tx, szp);
3665 zfs_sa_upgrade_txholds(tx, tdzp);
3666 error = dmu_tx_assign(tx, TXG_WAIT);
3667 if (error) {
3668 dmu_tx_abort(tx);
3669 zfs_exit(zfsvfs, FTAG);
3670 return (error);
3673 error = zfs_link_create(tdzp, name, szp, tx, 0);
3675 if (error == 0) {
3676 uint64_t txtype = TX_LINK;
3677 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3680 dmu_tx_commit(tx);
3682 if (error == 0) {
3683 vnevent_link(ZTOV(szp), ct);
3686 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3687 zil_commit(zilog, 0);
3689 zfs_exit(zfsvfs, FTAG);
3690 return (error);
3694 * Free or allocate space in a file. Currently, this function only
3695 * supports the `F_FREESP' command. However, this command is somewhat
3696 * misnamed, as its functionality includes the ability to allocate as
3697 * well as free space.
3699 * IN: ip - inode of file to free data in.
3700 * cmd - action to take (only F_FREESP supported).
3701 * bfp - section of file to free/alloc.
3702 * flag - current file open mode flags.
3703 * offset - current file offset.
3704 * cr - credentials of caller.
3706 * RETURN: 0 on success, error code on failure.
3708 * Timestamps:
3709 * ip - ctime|mtime updated
3712 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3713 offset_t offset, cred_t *cr)
3715 (void) offset;
3716 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3717 uint64_t off, len;
3718 int error;
3720 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3721 return (error);
3723 if (cmd != F_FREESP) {
3724 zfs_exit(zfsvfs, FTAG);
3725 return (SET_ERROR(EINVAL));
3729 * Callers might not be able to detect properly that we are read-only,
3730 * so check it explicitly here.
3732 if (zfs_is_readonly(zfsvfs)) {
3733 zfs_exit(zfsvfs, FTAG);
3734 return (SET_ERROR(EROFS));
3737 if (bfp->l_len < 0) {
3738 zfs_exit(zfsvfs, FTAG);
3739 return (SET_ERROR(EINVAL));
3743 * Permissions aren't checked on Solaris because on this OS
3744 * zfs_space() can only be called with an opened file handle.
3745 * On Linux we can get here through truncate_range() which
3746 * operates directly on inodes, so we need to check access rights.
3748 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
3749 zfs_exit(zfsvfs, FTAG);
3750 return (error);
3753 off = bfp->l_start;
3754 len = bfp->l_len; /* 0 means from off to end of file */
3756 error = zfs_freesp(zp, off, len, flag, TRUE);
3758 zfs_exit(zfsvfs, FTAG);
3759 return (error);
3762 static void
3763 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3765 (void) cr, (void) ct;
3766 znode_t *zp = VTOZ(vp);
3767 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3768 int error;
3770 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3771 if (zp->z_sa_hdl == NULL) {
3773 * The fs has been unmounted, or we did a
3774 * suspend/resume and this file no longer exists.
3776 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3777 vrecycle(vp);
3778 return;
3781 if (zp->z_unlinked) {
3783 * Fast path to recycle a vnode of a removed file.
3785 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3786 vrecycle(vp);
3787 return;
3790 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3791 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3793 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3794 zfs_sa_upgrade_txholds(tx, zp);
3795 error = dmu_tx_assign(tx, TXG_WAIT);
3796 if (error) {
3797 dmu_tx_abort(tx);
3798 } else {
3799 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3800 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3801 zp->z_atime_dirty = 0;
3802 dmu_tx_commit(tx);
3805 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3809 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
3810 "struct zfid_short bigger than struct fid");
3811 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
3812 "struct zfid_long bigger than struct fid");
3814 static int
3815 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3817 (void) ct;
3818 znode_t *zp = VTOZ(vp);
3819 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3820 uint32_t gen;
3821 uint64_t gen64;
3822 uint64_t object = zp->z_id;
3823 zfid_short_t *zfid;
3824 int size, i, error;
3826 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3827 return (error);
3829 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3830 &gen64, sizeof (uint64_t))) != 0) {
3831 zfs_exit(zfsvfs, FTAG);
3832 return (error);
3835 gen = (uint32_t)gen64;
3837 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3838 fidp->fid_len = size;
3840 zfid = (zfid_short_t *)fidp;
3842 zfid->zf_len = size;
3844 for (i = 0; i < sizeof (zfid->zf_object); i++)
3845 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3847 /* Must have a non-zero generation number to distinguish from .zfs */
3848 if (gen == 0)
3849 gen = 1;
3850 for (i = 0; i < sizeof (zfid->zf_gen); i++)
3851 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3853 if (size == LONG_FID_LEN) {
3854 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
3855 zfid_long_t *zlfid;
3857 zlfid = (zfid_long_t *)fidp;
3859 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3860 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3862 /* XXX - this should be the generation number for the objset */
3863 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3864 zlfid->zf_setgen[i] = 0;
3867 zfs_exit(zfsvfs, FTAG);
3868 return (0);
3871 static int
3872 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3873 caller_context_t *ct)
3875 znode_t *zp;
3876 zfsvfs_t *zfsvfs;
3877 int error;
3879 switch (cmd) {
3880 case _PC_LINK_MAX:
3881 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
3882 return (0);
3884 case _PC_FILESIZEBITS:
3885 *valp = 64;
3886 return (0);
3887 case _PC_MIN_HOLE_SIZE:
3888 *valp = (int)SPA_MINBLOCKSIZE;
3889 return (0);
3890 case _PC_ACL_EXTENDED:
3891 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
3892 zp = VTOZ(vp);
3893 zfsvfs = zp->z_zfsvfs;
3894 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3895 return (error);
3896 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
3897 zfs_exit(zfsvfs, FTAG);
3898 #else
3899 *valp = 0;
3900 #endif
3901 return (0);
3903 case _PC_ACL_NFS4:
3904 zp = VTOZ(vp);
3905 zfsvfs = zp->z_zfsvfs;
3906 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3907 return (error);
3908 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
3909 zfs_exit(zfsvfs, FTAG);
3910 return (0);
3912 case _PC_ACL_PATH_MAX:
3913 *valp = ACL_MAX_ENTRIES;
3914 return (0);
3916 default:
3917 return (EOPNOTSUPP);
3921 static int
3922 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
3923 int *rahead)
3925 znode_t *zp = VTOZ(vp);
3926 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3927 zfs_locked_range_t *lr;
3928 vm_object_t object;
3929 off_t start, end, obj_size;
3930 uint_t blksz;
3931 int pgsin_b, pgsin_a;
3932 int error;
3934 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
3935 return (zfs_vm_pagerret_error);
3937 object = ma[0]->object;
3938 start = IDX_TO_OFF(ma[0]->pindex);
3939 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
3942 * Lock a range covering all required and optional pages.
3943 * Note that we need to handle the case of the block size growing.
3945 for (;;) {
3946 uint64_t len;
3948 blksz = zp->z_blksz;
3949 len = roundup(end, blksz) - rounddown(start, blksz);
3951 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
3952 rounddown(start, blksz), len, RL_READER);
3953 if (lr == NULL) {
3955 * Avoid a deadlock with update_pages(). We need to
3956 * hold the range lock when copying from the DMU, so
3957 * give up the busy lock to allow update_pages() to
3958 * proceed. We might need to allocate new pages, which
3959 * isn't quite right since this allocation isn't subject
3960 * to the page fault handler's OOM logic, but this is
3961 * the best we can do for now.
3963 for (int i = 0; i < count; i++) {
3964 ASSERT(vm_page_none_valid(ma[i]));
3965 vm_page_xunbusy(ma[i]);
3968 lr = zfs_rangelock_enter(&zp->z_rangelock,
3969 rounddown(start, blksz), len, RL_READER);
3971 zfs_vmobject_wlock(object);
3972 (void) vm_page_grab_pages(object, OFF_TO_IDX(start),
3973 VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO,
3974 ma, count);
3975 zfs_vmobject_wunlock(object);
3977 if (blksz == zp->z_blksz)
3978 break;
3979 zfs_rangelock_exit(lr);
3982 zfs_vmobject_wlock(object);
3983 obj_size = object->un_pager.vnp.vnp_size;
3984 zfs_vmobject_wunlock(object);
3985 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
3986 zfs_rangelock_exit(lr);
3987 zfs_exit(zfsvfs, FTAG);
3988 return (zfs_vm_pagerret_bad);
3991 pgsin_b = 0;
3992 if (rbehind != NULL) {
3993 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
3994 pgsin_b = MIN(*rbehind, pgsin_b);
3997 pgsin_a = 0;
3998 if (rahead != NULL) {
3999 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4000 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4001 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4002 pgsin_a = MIN(*rahead, pgsin_a);
4006 * NB: we need to pass the exact byte size of the data that we expect
4007 * to read after accounting for the file size. This is required because
4008 * ZFS will panic if we request DMU to read beyond the end of the last
4009 * allocated block.
4011 for (int i = 0; i < count; i++) {
4012 int dummypgsin, count1, j, last_size;
4014 if (vm_page_any_valid(ma[i])) {
4015 ASSERT(vm_page_all_valid(ma[i]));
4016 continue;
4018 for (j = i + 1; j < count; j++) {
4019 if (vm_page_any_valid(ma[j])) {
4020 ASSERT(vm_page_all_valid(ma[j]));
4021 break;
4024 count1 = j - i;
4025 dummypgsin = 0;
4026 last_size = j == count ?
4027 MIN(end, obj_size) - (end - PAGE_SIZE) : PAGE_SIZE;
4028 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, &ma[i], count1,
4029 i == 0 ? &pgsin_b : &dummypgsin,
4030 j == count ? &pgsin_a : &dummypgsin,
4031 last_size);
4032 if (error != 0)
4033 break;
4034 i += count1 - 1;
4037 zfs_rangelock_exit(lr);
4038 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4040 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
4042 zfs_exit(zfsvfs, FTAG);
4044 if (error != 0)
4045 return (zfs_vm_pagerret_error);
4047 VM_CNT_INC(v_vnodein);
4048 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4049 if (rbehind != NULL)
4050 *rbehind = pgsin_b;
4051 if (rahead != NULL)
4052 *rahead = pgsin_a;
4053 return (zfs_vm_pagerret_ok);
4056 #ifndef _SYS_SYSPROTO_H_
4057 struct vop_getpages_args {
4058 struct vnode *a_vp;
4059 vm_page_t *a_m;
4060 int a_count;
4061 int *a_rbehind;
4062 int *a_rahead;
4064 #endif
4066 static int
4067 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4070 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4071 ap->a_rahead));
4074 static int
4075 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4076 int *rtvals)
4078 znode_t *zp = VTOZ(vp);
4079 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4080 zfs_locked_range_t *lr;
4081 dmu_tx_t *tx;
4082 struct sf_buf *sf;
4083 vm_object_t object;
4084 vm_page_t m;
4085 caddr_t va;
4086 size_t tocopy;
4087 size_t lo_len;
4088 vm_ooffset_t lo_off;
4089 vm_ooffset_t off;
4090 uint_t blksz;
4091 int ncount;
4092 int pcount;
4093 int err;
4094 int i;
4096 object = vp->v_object;
4097 KASSERT(ma[0]->object == object, ("mismatching object"));
4098 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4100 pcount = btoc(len);
4101 ncount = pcount;
4102 for (i = 0; i < pcount; i++)
4103 rtvals[i] = zfs_vm_pagerret_error;
4105 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4106 return (zfs_vm_pagerret_error);
4108 off = IDX_TO_OFF(ma[0]->pindex);
4109 blksz = zp->z_blksz;
4110 lo_off = rounddown(off, blksz);
4111 lo_len = roundup(len + (off - lo_off), blksz);
4112 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4114 zfs_vmobject_wlock(object);
4115 if (len + off > object->un_pager.vnp.vnp_size) {
4116 if (object->un_pager.vnp.vnp_size > off) {
4117 int pgoff;
4119 len = object->un_pager.vnp.vnp_size - off;
4120 ncount = btoc(len);
4121 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4123 * If the object is locked and the following
4124 * conditions hold, then the page's dirty
4125 * field cannot be concurrently changed by a
4126 * pmap operation.
4128 m = ma[ncount - 1];
4129 vm_page_assert_sbusied(m);
4130 KASSERT(!pmap_page_is_write_mapped(m),
4131 ("zfs_putpages: page %p is not read-only",
4132 m));
4133 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4134 pgoff);
4136 } else {
4137 len = 0;
4138 ncount = 0;
4140 if (ncount < pcount) {
4141 for (i = ncount; i < pcount; i++) {
4142 rtvals[i] = zfs_vm_pagerret_bad;
4146 zfs_vmobject_wunlock(object);
4148 boolean_t commit = (flags & (zfs_vm_pagerput_sync |
4149 zfs_vm_pagerput_inval)) != 0 ||
4150 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
4152 if (ncount == 0)
4153 goto out;
4155 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4156 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4157 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4158 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4159 zp->z_projid))) {
4160 goto out;
4163 tx = dmu_tx_create(zfsvfs->z_os);
4164 dmu_tx_hold_write(tx, zp->z_id, off, len);
4166 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4167 zfs_sa_upgrade_txholds(tx, zp);
4168 err = dmu_tx_assign(tx, TXG_WAIT);
4169 if (err != 0) {
4170 dmu_tx_abort(tx);
4171 goto out;
4174 if (zp->z_blksz < PAGE_SIZE) {
4175 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4176 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4177 va = zfs_map_page(ma[i], &sf);
4178 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4179 zfs_unmap_page(sf);
4181 } else {
4182 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4185 if (err == 0) {
4186 uint64_t mtime[2], ctime[2];
4187 sa_bulk_attr_t bulk[3];
4188 int count = 0;
4190 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4191 &mtime, 16);
4192 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4193 &ctime, 16);
4194 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4195 &zp->z_pflags, 8);
4196 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4197 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4198 ASSERT0(err);
4200 * XXX we should be passing a callback to undirty
4201 * but that would make the locking messier
4203 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4204 len, commit, B_FALSE, NULL, NULL);
4206 zfs_vmobject_wlock(object);
4207 for (i = 0; i < ncount; i++) {
4208 rtvals[i] = zfs_vm_pagerret_ok;
4209 vm_page_undirty(ma[i]);
4211 zfs_vmobject_wunlock(object);
4212 VM_CNT_INC(v_vnodeout);
4213 VM_CNT_ADD(v_vnodepgsout, ncount);
4215 dmu_tx_commit(tx);
4217 out:
4218 zfs_rangelock_exit(lr);
4219 if (commit)
4220 zil_commit(zfsvfs->z_log, zp->z_id);
4222 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4224 zfs_exit(zfsvfs, FTAG);
4225 return (rtvals[0]);
4228 #ifndef _SYS_SYSPROTO_H_
4229 struct vop_putpages_args {
4230 struct vnode *a_vp;
4231 vm_page_t *a_m;
4232 int a_count;
4233 int a_sync;
4234 int *a_rtvals;
4236 #endif
4238 static int
4239 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4242 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4243 ap->a_rtvals));
4246 #ifndef _SYS_SYSPROTO_H_
4247 struct vop_bmap_args {
4248 struct vnode *a_vp;
4249 daddr_t a_bn;
4250 struct bufobj **a_bop;
4251 daddr_t *a_bnp;
4252 int *a_runp;
4253 int *a_runb;
4255 #endif
4257 static int
4258 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4261 if (ap->a_bop != NULL)
4262 *ap->a_bop = &ap->a_vp->v_bufobj;
4263 if (ap->a_bnp != NULL)
4264 *ap->a_bnp = ap->a_bn;
4265 if (ap->a_runp != NULL)
4266 *ap->a_runp = 0;
4267 if (ap->a_runb != NULL)
4268 *ap->a_runb = 0;
4270 return (0);
4273 #ifndef _SYS_SYSPROTO_H_
4274 struct vop_open_args {
4275 struct vnode *a_vp;
4276 int a_mode;
4277 struct ucred *a_cred;
4278 struct thread *a_td;
4280 #endif
4282 static int
4283 zfs_freebsd_open(struct vop_open_args *ap)
4285 vnode_t *vp = ap->a_vp;
4286 znode_t *zp = VTOZ(vp);
4287 int error;
4289 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4290 if (error == 0)
4291 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4292 return (error);
4295 #ifndef _SYS_SYSPROTO_H_
4296 struct vop_close_args {
4297 struct vnode *a_vp;
4298 int a_fflag;
4299 struct ucred *a_cred;
4300 struct thread *a_td;
4302 #endif
4304 static int
4305 zfs_freebsd_close(struct vop_close_args *ap)
4308 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4311 #ifndef _SYS_SYSPROTO_H_
4312 struct vop_ioctl_args {
4313 struct vnode *a_vp;
4314 ulong_t a_command;
4315 caddr_t a_data;
4316 int a_fflag;
4317 struct ucred *cred;
4318 struct thread *td;
4320 #endif
4322 static int
4323 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4326 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4327 ap->a_fflag, ap->a_cred, NULL));
4330 static int
4331 ioflags(int ioflags)
4333 int flags = 0;
4335 if (ioflags & IO_APPEND)
4336 flags |= O_APPEND;
4337 if (ioflags & IO_NDELAY)
4338 flags |= O_NONBLOCK;
4339 if (ioflags & IO_DIRECT)
4340 flags |= O_DIRECT;
4341 if (ioflags & IO_SYNC)
4342 flags |= O_SYNC;
4344 return (flags);
4347 #ifndef _SYS_SYSPROTO_H_
4348 struct vop_read_args {
4349 struct vnode *a_vp;
4350 struct uio *a_uio;
4351 int a_ioflag;
4352 struct ucred *a_cred;
4354 #endif
4356 static int
4357 zfs_freebsd_read(struct vop_read_args *ap)
4359 zfs_uio_t uio;
4360 int error = 0;
4361 zfs_uio_init(&uio, ap->a_uio);
4362 error = zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4363 ap->a_cred);
4365 * XXX We occasionally get an EFAULT for Direct I/O reads on
4366 * FreeBSD 13. This still needs to be resolved. The EFAULT comes
4367 * from:
4368 * zfs_uio_get__dio_pages_alloc() ->
4369 * zfs_uio_get_dio_pages_impl() ->
4370 * zfs_uio_iov_step() ->
4371 * zfs_uio_get_user_pages().
4372 * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O
4373 * read fails to map in the user pages (returning EFAULT) the
4374 * Direct I/O request is broken up into two separate IO requests
4375 * and issued separately using Direct I/O.
4377 #ifdef ZFS_DEBUG
4378 if (error == EFAULT && uio.uio_extflg & UIO_DIRECT) {
4379 #if 0
4380 printf("%s(%d): Direct I/O read returning EFAULT "
4381 "uio = %p, zfs_uio_offset(uio) = %lu "
4382 "zfs_uio_resid(uio) = %lu\n",
4383 __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio),
4384 zfs_uio_resid(&uio));
4385 #endif
4388 #endif
4389 return (error);
4392 #ifndef _SYS_SYSPROTO_H_
4393 struct vop_write_args {
4394 struct vnode *a_vp;
4395 struct uio *a_uio;
4396 int a_ioflag;
4397 struct ucred *a_cred;
4399 #endif
4401 static int
4402 zfs_freebsd_write(struct vop_write_args *ap)
4404 zfs_uio_t uio;
4405 zfs_uio_init(&uio, ap->a_uio);
4406 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4407 ap->a_cred));
4411 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4412 * the comment above cache_fplookup for details.
4414 static int
4415 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4417 vnode_t *vp;
4418 znode_t *zp;
4419 uint64_t pflags;
4421 vp = v->a_vp;
4422 zp = VTOZ_SMR(vp);
4423 if (__predict_false(zp == NULL))
4424 return (EAGAIN);
4425 pflags = atomic_load_64(&zp->z_pflags);
4426 if (pflags & ZFS_AV_QUARANTINED)
4427 return (EAGAIN);
4428 if (pflags & ZFS_XATTR)
4429 return (EAGAIN);
4430 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4431 return (EAGAIN);
4432 return (0);
4435 static int
4436 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4438 vnode_t *vp;
4439 znode_t *zp;
4440 char *target;
4442 vp = v->a_vp;
4443 zp = VTOZ_SMR(vp);
4444 if (__predict_false(zp == NULL)) {
4445 return (EAGAIN);
4448 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4449 if (target == NULL) {
4450 return (EAGAIN);
4452 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4455 #ifndef _SYS_SYSPROTO_H_
4456 struct vop_access_args {
4457 struct vnode *a_vp;
4458 accmode_t a_accmode;
4459 struct ucred *a_cred;
4460 struct thread *a_td;
4462 #endif
4464 static int
4465 zfs_freebsd_access(struct vop_access_args *ap)
4467 vnode_t *vp = ap->a_vp;
4468 znode_t *zp = VTOZ(vp);
4469 accmode_t accmode;
4470 int error = 0;
4473 if (ap->a_accmode == VEXEC) {
4474 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4475 return (0);
4479 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4481 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4482 if (accmode != 0)
4483 error = zfs_access(zp, accmode, 0, ap->a_cred);
4486 * VADMIN has to be handled by vaccess().
4488 if (error == 0) {
4489 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4490 if (accmode != 0) {
4491 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4492 zp->z_gid, accmode, ap->a_cred);
4497 * For VEXEC, ensure that at least one execute bit is set for
4498 * non-directories.
4500 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4501 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4502 error = EACCES;
4505 return (error);
4508 #ifndef _SYS_SYSPROTO_H_
4509 struct vop_lookup_args {
4510 struct vnode *a_dvp;
4511 struct vnode **a_vpp;
4512 struct componentname *a_cnp;
4514 #endif
4516 static int
4517 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4519 struct componentname *cnp = ap->a_cnp;
4520 char nm[NAME_MAX + 1];
4522 ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4523 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4525 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4526 cnp->cn_cred, 0, cached));
4529 static int
4530 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4533 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4536 #ifndef _SYS_SYSPROTO_H_
4537 struct vop_lookup_args {
4538 struct vnode *a_dvp;
4539 struct vnode **a_vpp;
4540 struct componentname *a_cnp;
4542 #endif
4544 static int
4545 zfs_cache_lookup(struct vop_lookup_args *ap)
4547 zfsvfs_t *zfsvfs;
4549 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4550 if (zfsvfs->z_use_namecache)
4551 return (vfs_cache_lookup(ap));
4552 else
4553 return (zfs_freebsd_lookup(ap, B_FALSE));
4556 #ifndef _SYS_SYSPROTO_H_
4557 struct vop_create_args {
4558 struct vnode *a_dvp;
4559 struct vnode **a_vpp;
4560 struct componentname *a_cnp;
4561 struct vattr *a_vap;
4563 #endif
4565 static int
4566 zfs_freebsd_create(struct vop_create_args *ap)
4568 zfsvfs_t *zfsvfs;
4569 struct componentname *cnp = ap->a_cnp;
4570 vattr_t *vap = ap->a_vap;
4571 znode_t *zp = NULL;
4572 int rc, mode;
4574 #if __FreeBSD_version < 1400068
4575 ASSERT(cnp->cn_flags & SAVENAME);
4576 #endif
4578 vattr_init_mask(vap);
4579 mode = vap->va_mode & ALLPERMS;
4580 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4581 *ap->a_vpp = NULL;
4583 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
4584 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
4585 if (rc == 0)
4586 *ap->a_vpp = ZTOV(zp);
4587 if (zfsvfs->z_use_namecache &&
4588 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4589 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4591 return (rc);
4594 #ifndef _SYS_SYSPROTO_H_
4595 struct vop_remove_args {
4596 struct vnode *a_dvp;
4597 struct vnode *a_vp;
4598 struct componentname *a_cnp;
4600 #endif
4602 static int
4603 zfs_freebsd_remove(struct vop_remove_args *ap)
4606 #if __FreeBSD_version < 1400068
4607 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4608 #endif
4610 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4611 ap->a_cnp->cn_cred));
4614 #ifndef _SYS_SYSPROTO_H_
4615 struct vop_mkdir_args {
4616 struct vnode *a_dvp;
4617 struct vnode **a_vpp;
4618 struct componentname *a_cnp;
4619 struct vattr *a_vap;
4621 #endif
4623 static int
4624 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4626 vattr_t *vap = ap->a_vap;
4627 znode_t *zp = NULL;
4628 int rc;
4630 #if __FreeBSD_version < 1400068
4631 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4632 #endif
4634 vattr_init_mask(vap);
4635 *ap->a_vpp = NULL;
4637 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4638 ap->a_cnp->cn_cred, 0, NULL, NULL);
4640 if (rc == 0)
4641 *ap->a_vpp = ZTOV(zp);
4642 return (rc);
4645 #ifndef _SYS_SYSPROTO_H_
4646 struct vop_rmdir_args {
4647 struct vnode *a_dvp;
4648 struct vnode *a_vp;
4649 struct componentname *a_cnp;
4651 #endif
4653 static int
4654 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4656 struct componentname *cnp = ap->a_cnp;
4658 #if __FreeBSD_version < 1400068
4659 ASSERT(cnp->cn_flags & SAVENAME);
4660 #endif
4662 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4665 #ifndef _SYS_SYSPROTO_H_
4666 struct vop_readdir_args {
4667 struct vnode *a_vp;
4668 struct uio *a_uio;
4669 struct ucred *a_cred;
4670 int *a_eofflag;
4671 int *a_ncookies;
4672 cookie_t **a_cookies;
4674 #endif
4676 static int
4677 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4679 zfs_uio_t uio;
4680 zfs_uio_init(&uio, ap->a_uio);
4681 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4682 ap->a_ncookies, ap->a_cookies));
4685 #ifndef _SYS_SYSPROTO_H_
4686 struct vop_fsync_args {
4687 struct vnode *a_vp;
4688 int a_waitfor;
4689 struct thread *a_td;
4691 #endif
4693 static int
4694 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4697 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4700 #ifndef _SYS_SYSPROTO_H_
4701 struct vop_getattr_args {
4702 struct vnode *a_vp;
4703 struct vattr *a_vap;
4704 struct ucred *a_cred;
4706 #endif
4708 static int
4709 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4711 vattr_t *vap = ap->a_vap;
4712 xvattr_t xvap;
4713 ulong_t fflags = 0;
4714 int error;
4716 xva_init(&xvap);
4717 xvap.xva_vattr = *vap;
4718 xvap.xva_vattr.va_mask |= AT_XVATTR;
4720 /* Convert chflags into ZFS-type flags. */
4721 /* XXX: what about SF_SETTABLE?. */
4722 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4723 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4724 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4725 XVA_SET_REQ(&xvap, XAT_NODUMP);
4726 XVA_SET_REQ(&xvap, XAT_READONLY);
4727 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4728 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4729 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4730 XVA_SET_REQ(&xvap, XAT_REPARSE);
4731 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4732 XVA_SET_REQ(&xvap, XAT_SPARSE);
4734 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4735 if (error != 0)
4736 return (error);
4738 /* Convert ZFS xattr into chflags. */
4739 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4740 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4741 fflags |= (fflag); \
4742 } while (0)
4743 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4744 xvap.xva_xoptattrs.xoa_immutable);
4745 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4746 xvap.xva_xoptattrs.xoa_appendonly);
4747 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4748 xvap.xva_xoptattrs.xoa_nounlink);
4749 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4750 xvap.xva_xoptattrs.xoa_archive);
4751 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4752 xvap.xva_xoptattrs.xoa_nodump);
4753 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4754 xvap.xva_xoptattrs.xoa_readonly);
4755 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4756 xvap.xva_xoptattrs.xoa_system);
4757 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4758 xvap.xva_xoptattrs.xoa_hidden);
4759 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4760 xvap.xva_xoptattrs.xoa_reparse);
4761 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4762 xvap.xva_xoptattrs.xoa_offline);
4763 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4764 xvap.xva_xoptattrs.xoa_sparse);
4766 #undef FLAG_CHECK
4767 *vap = xvap.xva_vattr;
4768 vap->va_flags = fflags;
4769 return (0);
4772 #ifndef _SYS_SYSPROTO_H_
4773 struct vop_setattr_args {
4774 struct vnode *a_vp;
4775 struct vattr *a_vap;
4776 struct ucred *a_cred;
4778 #endif
4780 static int
4781 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4783 vnode_t *vp = ap->a_vp;
4784 vattr_t *vap = ap->a_vap;
4785 cred_t *cred = ap->a_cred;
4786 xvattr_t xvap;
4787 ulong_t fflags;
4788 uint64_t zflags;
4790 vattr_init_mask(vap);
4791 vap->va_mask &= ~AT_NOSET;
4793 xva_init(&xvap);
4794 xvap.xva_vattr = *vap;
4796 zflags = VTOZ(vp)->z_pflags;
4798 if (vap->va_flags != VNOVAL) {
4799 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4800 int error;
4802 if (zfsvfs->z_use_fuids == B_FALSE)
4803 return (EOPNOTSUPP);
4805 fflags = vap->va_flags;
4807 * XXX KDM
4808 * We need to figure out whether it makes sense to allow
4809 * UF_REPARSE through, since we don't really have other
4810 * facilities to handle reparse points and zfs_setattr()
4811 * doesn't currently allow setting that attribute anyway.
4813 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4814 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4815 UF_OFFLINE|UF_SPARSE)) != 0)
4816 return (EOPNOTSUPP);
4818 * Unprivileged processes are not permitted to unset system
4819 * flags, or modify flags if any system flags are set.
4820 * Privileged non-jail processes may not modify system flags
4821 * if securelevel > 0 and any existing system flags are set.
4822 * Privileged jail processes behave like privileged non-jail
4823 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4824 * otherwise, they behave like unprivileged processes.
4826 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4827 priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4828 if (zflags &
4829 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4830 error = securelevel_gt(cred, 0);
4831 if (error != 0)
4832 return (error);
4834 } else {
4836 * Callers may only modify the file flags on
4837 * objects they have VADMIN rights for.
4839 if ((error = VOP_ACCESS(vp, VADMIN, cred,
4840 curthread)) != 0)
4841 return (error);
4842 if (zflags &
4843 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4844 ZFS_NOUNLINK)) {
4845 return (EPERM);
4847 if (fflags &
4848 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4849 return (EPERM);
4853 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4854 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4855 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4856 XVA_SET_REQ(&xvap, (xflag)); \
4857 (xfield) = ((fflags & (fflag)) != 0); \
4859 } while (0)
4860 /* Convert chflags into ZFS-type flags. */
4861 /* XXX: what about SF_SETTABLE?. */
4862 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4863 xvap.xva_xoptattrs.xoa_immutable);
4864 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4865 xvap.xva_xoptattrs.xoa_appendonly);
4866 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4867 xvap.xva_xoptattrs.xoa_nounlink);
4868 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4869 xvap.xva_xoptattrs.xoa_archive);
4870 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4871 xvap.xva_xoptattrs.xoa_nodump);
4872 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4873 xvap.xva_xoptattrs.xoa_readonly);
4874 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4875 xvap.xva_xoptattrs.xoa_system);
4876 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4877 xvap.xva_xoptattrs.xoa_hidden);
4878 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4879 xvap.xva_xoptattrs.xoa_reparse);
4880 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4881 xvap.xva_xoptattrs.xoa_offline);
4882 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4883 xvap.xva_xoptattrs.xoa_sparse);
4884 #undef FLAG_CHANGE
4886 if (vap->va_birthtime.tv_sec != VNOVAL) {
4887 xvap.xva_vattr.va_mask |= AT_XVATTR;
4888 XVA_SET_REQ(&xvap, XAT_CREATETIME);
4890 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
4893 #ifndef _SYS_SYSPROTO_H_
4894 struct vop_rename_args {
4895 struct vnode *a_fdvp;
4896 struct vnode *a_fvp;
4897 struct componentname *a_fcnp;
4898 struct vnode *a_tdvp;
4899 struct vnode *a_tvp;
4900 struct componentname *a_tcnp;
4902 #endif
4904 static int
4905 zfs_freebsd_rename(struct vop_rename_args *ap)
4907 vnode_t *fdvp = ap->a_fdvp;
4908 vnode_t *fvp = ap->a_fvp;
4909 vnode_t *tdvp = ap->a_tdvp;
4910 vnode_t *tvp = ap->a_tvp;
4911 int error;
4913 #if __FreeBSD_version < 1400068
4914 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4915 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4916 #endif
4918 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4919 ap->a_tcnp, ap->a_fcnp->cn_cred);
4921 vrele(fdvp);
4922 vrele(fvp);
4923 vrele(tdvp);
4924 if (tvp != NULL)
4925 vrele(tvp);
4927 return (error);
4930 #ifndef _SYS_SYSPROTO_H_
4931 struct vop_symlink_args {
4932 struct vnode *a_dvp;
4933 struct vnode **a_vpp;
4934 struct componentname *a_cnp;
4935 struct vattr *a_vap;
4936 char *a_target;
4938 #endif
4940 static int
4941 zfs_freebsd_symlink(struct vop_symlink_args *ap)
4943 struct componentname *cnp = ap->a_cnp;
4944 vattr_t *vap = ap->a_vap;
4945 znode_t *zp = NULL;
4946 char *symlink;
4947 size_t symlink_len;
4948 int rc;
4950 #if __FreeBSD_version < 1400068
4951 ASSERT(cnp->cn_flags & SAVENAME);
4952 #endif
4954 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
4955 vattr_init_mask(vap);
4956 *ap->a_vpp = NULL;
4958 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
4959 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
4960 if (rc == 0) {
4961 *ap->a_vpp = ZTOV(zp);
4962 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
4963 MPASS(zp->z_cached_symlink == NULL);
4964 symlink_len = strlen(ap->a_target);
4965 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4966 if (symlink != NULL) {
4967 memcpy(symlink, ap->a_target, symlink_len);
4968 symlink[symlink_len] = '\0';
4969 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4970 (uintptr_t)symlink);
4973 return (rc);
4976 #ifndef _SYS_SYSPROTO_H_
4977 struct vop_readlink_args {
4978 struct vnode *a_vp;
4979 struct uio *a_uio;
4980 struct ucred *a_cred;
4982 #endif
4984 static int
4985 zfs_freebsd_readlink(struct vop_readlink_args *ap)
4987 zfs_uio_t uio;
4988 int error;
4989 znode_t *zp = VTOZ(ap->a_vp);
4990 char *symlink, *base;
4991 size_t symlink_len;
4992 bool trycache;
4994 zfs_uio_init(&uio, ap->a_uio);
4995 trycache = false;
4996 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
4997 zfs_uio_iovcnt(&uio) == 1) {
4998 base = zfs_uio_iovbase(&uio, 0);
4999 symlink_len = zfs_uio_iovlen(&uio, 0);
5000 trycache = true;
5002 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
5003 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5004 error != 0 || !trycache) {
5005 return (error);
5007 symlink_len -= zfs_uio_resid(&uio);
5008 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5009 if (symlink != NULL) {
5010 memcpy(symlink, base, symlink_len);
5011 symlink[symlink_len] = '\0';
5012 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5013 (uintptr_t)NULL, (uintptr_t)symlink)) {
5014 cache_symlink_free(symlink, symlink_len + 1);
5017 return (error);
5020 #ifndef _SYS_SYSPROTO_H_
5021 struct vop_link_args {
5022 struct vnode *a_tdvp;
5023 struct vnode *a_vp;
5024 struct componentname *a_cnp;
5026 #endif
5028 static int
5029 zfs_freebsd_link(struct vop_link_args *ap)
5031 struct componentname *cnp = ap->a_cnp;
5032 vnode_t *vp = ap->a_vp;
5033 vnode_t *tdvp = ap->a_tdvp;
5035 if (tdvp->v_mount != vp->v_mount)
5036 return (EXDEV);
5038 #if __FreeBSD_version < 1400068
5039 ASSERT(cnp->cn_flags & SAVENAME);
5040 #endif
5042 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5043 cnp->cn_nameptr, cnp->cn_cred, 0));
5046 #ifndef _SYS_SYSPROTO_H_
5047 struct vop_inactive_args {
5048 struct vnode *a_vp;
5049 struct thread *a_td;
5051 #endif
5053 static int
5054 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5056 vnode_t *vp = ap->a_vp;
5058 zfs_inactive(vp, curthread->td_ucred, NULL);
5059 return (0);
5062 #ifndef _SYS_SYSPROTO_H_
5063 struct vop_need_inactive_args {
5064 struct vnode *a_vp;
5065 struct thread *a_td;
5067 #endif
5069 static int
5070 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5072 vnode_t *vp = ap->a_vp;
5073 znode_t *zp = VTOZ(vp);
5074 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5075 int need;
5077 if (vn_need_pageq_flush(vp))
5078 return (1);
5080 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5081 return (1);
5082 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5083 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5085 return (need);
5088 #ifndef _SYS_SYSPROTO_H_
5089 struct vop_reclaim_args {
5090 struct vnode *a_vp;
5091 struct thread *a_td;
5093 #endif
5095 static int
5096 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5098 vnode_t *vp = ap->a_vp;
5099 znode_t *zp = VTOZ(vp);
5100 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5102 ASSERT3P(zp, !=, NULL);
5105 * z_teardown_inactive_lock protects from a race with
5106 * zfs_znode_dmu_fini in zfsvfs_teardown during
5107 * force unmount.
5109 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5110 if (zp->z_sa_hdl == NULL)
5111 zfs_znode_free(zp);
5112 else
5113 zfs_zinactive(zp);
5114 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5116 vp->v_data = NULL;
5117 return (0);
5120 #ifndef _SYS_SYSPROTO_H_
5121 struct vop_fid_args {
5122 struct vnode *a_vp;
5123 struct fid *a_fid;
5125 #endif
5127 static int
5128 zfs_freebsd_fid(struct vop_fid_args *ap)
5131 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5135 #ifndef _SYS_SYSPROTO_H_
5136 struct vop_pathconf_args {
5137 struct vnode *a_vp;
5138 int a_name;
5139 register_t *a_retval;
5140 } *ap;
5141 #endif
5143 static int
5144 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5146 ulong_t val;
5147 int error;
5149 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5150 curthread->td_ucred, NULL);
5151 if (error == 0) {
5152 *ap->a_retval = val;
5153 return (error);
5155 if (error != EOPNOTSUPP)
5156 return (error);
5158 switch (ap->a_name) {
5159 case _PC_NAME_MAX:
5160 *ap->a_retval = NAME_MAX;
5161 return (0);
5162 #if __FreeBSD_version >= 1400032
5163 case _PC_DEALLOC_PRESENT:
5164 *ap->a_retval = 1;
5165 return (0);
5166 #endif
5167 case _PC_PIPE_BUF:
5168 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5169 *ap->a_retval = PIPE_BUF;
5170 return (0);
5172 return (EINVAL);
5173 default:
5174 return (vop_stdpathconf(ap));
5178 static int zfs_xattr_compat = 1;
5180 static int
5181 zfs_check_attrname(const char *name)
5183 /* We don't allow '/' character in attribute name. */
5184 if (strchr(name, '/') != NULL)
5185 return (SET_ERROR(EINVAL));
5186 /* We don't allow attribute names that start with a namespace prefix. */
5187 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5188 return (SET_ERROR(EINVAL));
5189 return (0);
5193 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5194 * extended attribute name:
5196 * NAMESPACE XATTR_COMPAT PREFIX
5197 * system * freebsd:system:
5198 * user 1 (none, can be used to access ZFS
5199 * fsattr(5) attributes created on Solaris)
5200 * user 0 user.
5202 static int
5203 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5204 size_t size, boolean_t compat)
5206 const char *namespace, *prefix, *suffix;
5208 memset(attrname, 0, size);
5210 switch (attrnamespace) {
5211 case EXTATTR_NAMESPACE_USER:
5212 if (compat) {
5214 * This is the default namespace by which we can access
5215 * all attributes created on Solaris.
5217 prefix = namespace = suffix = "";
5218 } else {
5220 * This is compatible with the user namespace encoding
5221 * on Linux prior to xattr_compat, but nothing
5222 * else.
5224 prefix = "";
5225 namespace = "user";
5226 suffix = ".";
5228 break;
5229 case EXTATTR_NAMESPACE_SYSTEM:
5230 prefix = "freebsd:";
5231 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5232 suffix = ":";
5233 break;
5234 case EXTATTR_NAMESPACE_EMPTY:
5235 default:
5236 return (SET_ERROR(EINVAL));
5238 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5239 name) >= size) {
5240 return (SET_ERROR(ENAMETOOLONG));
5242 return (0);
5245 static int
5246 zfs_ensure_xattr_cached(znode_t *zp)
5248 int error = 0;
5250 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5252 if (zp->z_xattr_cached != NULL)
5253 return (0);
5255 if (rw_write_held(&zp->z_xattr_lock))
5256 return (zfs_sa_get_xattr(zp));
5258 if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5259 rw_exit(&zp->z_xattr_lock);
5260 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5262 if (zp->z_xattr_cached == NULL)
5263 error = zfs_sa_get_xattr(zp);
5264 rw_downgrade(&zp->z_xattr_lock);
5265 return (error);
5268 #ifndef _SYS_SYSPROTO_H_
5269 struct vop_getextattr {
5270 IN struct vnode *a_vp;
5271 IN int a_attrnamespace;
5272 IN const char *a_name;
5273 INOUT struct uio *a_uio;
5274 OUT size_t *a_size;
5275 IN struct ucred *a_cred;
5276 IN struct thread *a_td;
5278 #endif
5280 static int
5281 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5283 struct thread *td = ap->a_td;
5284 struct nameidata nd;
5285 struct vattr va;
5286 vnode_t *xvp = NULL, *vp;
5287 int error, flags;
5289 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5290 LOOKUP_XATTR, B_FALSE);
5291 if (error != 0)
5292 return (error);
5294 flags = FREAD;
5295 #if __FreeBSD_version < 1400043
5296 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5297 xvp, td);
5298 #else
5299 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5300 #endif
5301 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5302 if (error != 0)
5303 return (SET_ERROR(error));
5304 vp = nd.ni_vp;
5305 NDFREE_PNBUF(&nd);
5307 if (ap->a_size != NULL) {
5308 error = VOP_GETATTR(vp, &va, ap->a_cred);
5309 if (error == 0)
5310 *ap->a_size = (size_t)va.va_size;
5311 } else if (ap->a_uio != NULL)
5312 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5314 VOP_UNLOCK(vp);
5315 vn_close(vp, flags, ap->a_cred, td);
5316 return (error);
5319 static int
5320 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5322 znode_t *zp = VTOZ(ap->a_vp);
5323 uchar_t *nv_value;
5324 uint_t nv_size;
5325 int error;
5327 error = zfs_ensure_xattr_cached(zp);
5328 if (error != 0)
5329 return (error);
5331 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5332 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5334 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5335 &nv_value, &nv_size);
5336 if (error != 0)
5337 return (SET_ERROR(error));
5339 if (ap->a_size != NULL)
5340 *ap->a_size = nv_size;
5341 else if (ap->a_uio != NULL)
5342 error = uiomove(nv_value, nv_size, ap->a_uio);
5343 if (error != 0)
5344 return (SET_ERROR(error));
5346 return (0);
5349 static int
5350 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
5352 znode_t *zp = VTOZ(ap->a_vp);
5353 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5354 char attrname[EXTATTR_MAXNAMELEN+1];
5355 int error;
5357 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5358 sizeof (attrname), compat);
5359 if (error != 0)
5360 return (error);
5362 error = ENOENT;
5363 if (zfsvfs->z_use_sa && zp->z_is_sa)
5364 error = zfs_getextattr_sa(ap, attrname);
5365 if (error == ENOENT)
5366 error = zfs_getextattr_dir(ap, attrname);
5367 return (error);
5371 * Vnode operation to retrieve a named extended attribute.
5373 static int
5374 zfs_getextattr(struct vop_getextattr_args *ap)
5376 znode_t *zp = VTOZ(ap->a_vp);
5377 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5378 int error;
5381 * If the xattr property is off, refuse the request.
5383 if (!(zfsvfs->z_flags & ZSB_XATTR))
5384 return (SET_ERROR(EOPNOTSUPP));
5386 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5387 ap->a_cred, ap->a_td, VREAD);
5388 if (error != 0)
5389 return (SET_ERROR(error));
5391 error = zfs_check_attrname(ap->a_name);
5392 if (error != 0)
5393 return (error);
5395 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5396 return (error);
5397 error = ENOENT;
5398 rw_enter(&zp->z_xattr_lock, RW_READER);
5400 error = zfs_getextattr_impl(ap, zfs_xattr_compat);
5401 if ((error == ENOENT || error == ENOATTR) &&
5402 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5404 * Fall back to the alternate namespace format if we failed to
5405 * find a user xattr.
5407 error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
5410 rw_exit(&zp->z_xattr_lock);
5411 zfs_exit(zfsvfs, FTAG);
5412 if (error == ENOENT)
5413 error = SET_ERROR(ENOATTR);
5414 return (error);
5417 #ifndef _SYS_SYSPROTO_H_
5418 struct vop_deleteextattr {
5419 IN struct vnode *a_vp;
5420 IN int a_attrnamespace;
5421 IN const char *a_name;
5422 IN struct ucred *a_cred;
5423 IN struct thread *a_td;
5425 #endif
5427 static int
5428 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5430 struct nameidata nd;
5431 vnode_t *xvp = NULL, *vp;
5432 int error;
5434 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5435 LOOKUP_XATTR, B_FALSE);
5436 if (error != 0)
5437 return (error);
5439 #if __FreeBSD_version < 1400043
5440 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5441 UIO_SYSSPACE, attrname, xvp, ap->a_td);
5442 #else
5443 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5444 UIO_SYSSPACE, attrname, xvp);
5445 #endif
5446 error = namei(&nd);
5447 if (error != 0)
5448 return (SET_ERROR(error));
5450 vp = nd.ni_vp;
5451 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5452 NDFREE_PNBUF(&nd);
5454 vput(nd.ni_dvp);
5455 if (vp == nd.ni_dvp)
5456 vrele(vp);
5457 else
5458 vput(vp);
5460 return (error);
5463 static int
5464 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5466 znode_t *zp = VTOZ(ap->a_vp);
5467 nvlist_t *nvl;
5468 int error;
5470 error = zfs_ensure_xattr_cached(zp);
5471 if (error != 0)
5472 return (error);
5474 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5475 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5477 nvl = zp->z_xattr_cached;
5478 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5479 if (error != 0)
5480 error = SET_ERROR(error);
5481 else
5482 error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
5483 if (error != 0) {
5484 zp->z_xattr_cached = NULL;
5485 nvlist_free(nvl);
5487 return (error);
5490 static int
5491 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
5493 znode_t *zp = VTOZ(ap->a_vp);
5494 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5495 char attrname[EXTATTR_MAXNAMELEN+1];
5496 int error;
5498 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5499 sizeof (attrname), compat);
5500 if (error != 0)
5501 return (error);
5503 error = ENOENT;
5504 if (zfsvfs->z_use_sa && zp->z_is_sa)
5505 error = zfs_deleteextattr_sa(ap, attrname);
5506 if (error == ENOENT)
5507 error = zfs_deleteextattr_dir(ap, attrname);
5508 return (error);
5512 * Vnode operation to remove a named attribute.
5514 static int
5515 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5517 znode_t *zp = VTOZ(ap->a_vp);
5518 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5519 int error;
5522 * If the xattr property is off, refuse the request.
5524 if (!(zfsvfs->z_flags & ZSB_XATTR))
5525 return (SET_ERROR(EOPNOTSUPP));
5527 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5528 ap->a_cred, ap->a_td, VWRITE);
5529 if (error != 0)
5530 return (SET_ERROR(error));
5532 error = zfs_check_attrname(ap->a_name);
5533 if (error != 0)
5534 return (error);
5536 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5537 return (error);
5538 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5540 error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
5541 if ((error == ENOENT || error == ENOATTR) &&
5542 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5544 * Fall back to the alternate namespace format if we failed to
5545 * find a user xattr.
5547 error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
5550 rw_exit(&zp->z_xattr_lock);
5551 zfs_exit(zfsvfs, FTAG);
5552 if (error == ENOENT)
5553 error = SET_ERROR(ENOATTR);
5554 return (error);
5557 #ifndef _SYS_SYSPROTO_H_
5558 struct vop_setextattr {
5559 IN struct vnode *a_vp;
5560 IN int a_attrnamespace;
5561 IN const char *a_name;
5562 INOUT struct uio *a_uio;
5563 IN struct ucred *a_cred;
5564 IN struct thread *a_td;
5566 #endif
5568 static int
5569 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5571 struct thread *td = ap->a_td;
5572 struct nameidata nd;
5573 struct vattr va;
5574 vnode_t *xvp = NULL, *vp;
5575 int error, flags;
5577 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5578 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5579 if (error != 0)
5580 return (error);
5582 flags = FFLAGS(O_WRONLY | O_CREAT);
5583 #if __FreeBSD_version < 1400043
5584 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5585 #else
5586 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5587 #endif
5588 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5589 NULL);
5590 if (error != 0)
5591 return (SET_ERROR(error));
5592 vp = nd.ni_vp;
5593 NDFREE_PNBUF(&nd);
5595 VATTR_NULL(&va);
5596 va.va_size = 0;
5597 error = VOP_SETATTR(vp, &va, ap->a_cred);
5598 if (error == 0)
5599 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5601 VOP_UNLOCK(vp);
5602 vn_close(vp, flags, ap->a_cred, td);
5603 return (error);
5606 static int
5607 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5609 znode_t *zp = VTOZ(ap->a_vp);
5610 nvlist_t *nvl;
5611 size_t sa_size;
5612 int error;
5614 error = zfs_ensure_xattr_cached(zp);
5615 if (error != 0)
5616 return (error);
5618 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5619 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5621 nvl = zp->z_xattr_cached;
5622 size_t entry_size = ap->a_uio->uio_resid;
5623 if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5624 return (SET_ERROR(EFBIG));
5625 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5626 if (error != 0)
5627 return (SET_ERROR(error));
5628 if (sa_size > DXATTR_MAX_SA_SIZE)
5629 return (SET_ERROR(EFBIG));
5630 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5631 error = uiomove(buf, entry_size, ap->a_uio);
5632 if (error != 0) {
5633 error = SET_ERROR(error);
5634 } else {
5635 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5636 if (error != 0)
5637 error = SET_ERROR(error);
5639 if (error == 0)
5640 error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
5641 kmem_free(buf, entry_size);
5642 if (error != 0) {
5643 zp->z_xattr_cached = NULL;
5644 nvlist_free(nvl);
5646 return (error);
5649 static int
5650 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
5652 znode_t *zp = VTOZ(ap->a_vp);
5653 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5654 char attrname[EXTATTR_MAXNAMELEN+1];
5655 int error;
5657 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5658 sizeof (attrname), compat);
5659 if (error != 0)
5660 return (error);
5662 struct vop_deleteextattr_args vda = {
5663 .a_vp = ap->a_vp,
5664 .a_attrnamespace = ap->a_attrnamespace,
5665 .a_name = ap->a_name,
5666 .a_cred = ap->a_cred,
5667 .a_td = ap->a_td,
5669 error = ENOENT;
5670 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5671 error = zfs_setextattr_sa(ap, attrname);
5672 if (error == 0) {
5674 * Successfully put into SA, we need to clear the one
5675 * in dir if present.
5677 zfs_deleteextattr_dir(&vda, attrname);
5680 if (error != 0) {
5681 error = zfs_setextattr_dir(ap, attrname);
5682 if (error == 0 && zp->z_is_sa) {
5684 * Successfully put into dir, we need to clear the one
5685 * in SA if present.
5687 zfs_deleteextattr_sa(&vda, attrname);
5690 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5692 * Also clear all versions of the alternate compat name.
5694 zfs_deleteextattr_impl(&vda, !compat);
5696 return (error);
5700 * Vnode operation to set a named attribute.
5702 static int
5703 zfs_setextattr(struct vop_setextattr_args *ap)
5705 znode_t *zp = VTOZ(ap->a_vp);
5706 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5707 int error;
5710 * If the xattr property is off, refuse the request.
5712 if (!(zfsvfs->z_flags & ZSB_XATTR))
5713 return (SET_ERROR(EOPNOTSUPP));
5715 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5716 ap->a_cred, ap->a_td, VWRITE);
5717 if (error != 0)
5718 return (SET_ERROR(error));
5720 error = zfs_check_attrname(ap->a_name);
5721 if (error != 0)
5722 return (error);
5724 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5725 return (error);
5726 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5728 error = zfs_setextattr_impl(ap, zfs_xattr_compat);
5730 rw_exit(&zp->z_xattr_lock);
5731 zfs_exit(zfsvfs, FTAG);
5732 return (error);
5735 #ifndef _SYS_SYSPROTO_H_
5736 struct vop_listextattr {
5737 IN struct vnode *a_vp;
5738 IN int a_attrnamespace;
5739 INOUT struct uio *a_uio;
5740 OUT size_t *a_size;
5741 IN struct ucred *a_cred;
5742 IN struct thread *a_td;
5744 #endif
5746 static int
5747 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5749 struct thread *td = ap->a_td;
5750 struct nameidata nd;
5751 uint8_t dirbuf[sizeof (struct dirent)];
5752 struct iovec aiov;
5753 struct uio auio;
5754 vnode_t *xvp = NULL, *vp;
5755 int error, eof;
5757 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5758 LOOKUP_XATTR, B_FALSE);
5759 if (error != 0) {
5761 * ENOATTR means that the EA directory does not yet exist,
5762 * i.e. there are no extended attributes there.
5764 if (error == ENOATTR)
5765 error = 0;
5766 return (error);
5769 #if __FreeBSD_version < 1400043
5770 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5771 UIO_SYSSPACE, ".", xvp, td);
5772 #else
5773 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5774 UIO_SYSSPACE, ".", xvp);
5775 #endif
5776 error = namei(&nd);
5777 if (error != 0)
5778 return (SET_ERROR(error));
5779 vp = nd.ni_vp;
5780 NDFREE_PNBUF(&nd);
5782 auio.uio_iov = &aiov;
5783 auio.uio_iovcnt = 1;
5784 auio.uio_segflg = UIO_SYSSPACE;
5785 auio.uio_td = td;
5786 auio.uio_rw = UIO_READ;
5787 auio.uio_offset = 0;
5789 size_t plen = strlen(attrprefix);
5791 do {
5792 aiov.iov_base = (void *)dirbuf;
5793 aiov.iov_len = sizeof (dirbuf);
5794 auio.uio_resid = sizeof (dirbuf);
5795 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5796 if (error != 0)
5797 break;
5798 int done = sizeof (dirbuf) - auio.uio_resid;
5799 for (int pos = 0; pos < done; ) {
5800 struct dirent *dp = (struct dirent *)(dirbuf + pos);
5801 pos += dp->d_reclen;
5803 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5804 * is what we get when attribute was created on Solaris.
5806 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5807 continue;
5808 else if (plen == 0 &&
5809 ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
5810 continue;
5811 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5812 continue;
5813 uint8_t nlen = dp->d_namlen - plen;
5814 if (ap->a_size != NULL) {
5815 *ap->a_size += 1 + nlen;
5816 } else if (ap->a_uio != NULL) {
5818 * Format of extattr name entry is one byte for
5819 * length and the rest for name.
5821 error = uiomove(&nlen, 1, ap->a_uio);
5822 if (error == 0) {
5823 char *namep = dp->d_name + plen;
5824 error = uiomove(namep, nlen, ap->a_uio);
5826 if (error != 0) {
5827 error = SET_ERROR(error);
5828 break;
5832 } while (!eof && error == 0);
5834 vput(vp);
5835 return (error);
5838 static int
5839 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5841 znode_t *zp = VTOZ(ap->a_vp);
5842 int error;
5844 error = zfs_ensure_xattr_cached(zp);
5845 if (error != 0)
5846 return (error);
5848 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5849 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5851 size_t plen = strlen(attrprefix);
5852 nvpair_t *nvp = NULL;
5853 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5854 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5856 const char *name = nvpair_name(nvp);
5857 if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5858 continue;
5859 else if (strncmp(name, attrprefix, plen) != 0)
5860 continue;
5861 uint8_t nlen = strlen(name) - plen;
5862 if (ap->a_size != NULL) {
5863 *ap->a_size += 1 + nlen;
5864 } else if (ap->a_uio != NULL) {
5866 * Format of extattr name entry is one byte for
5867 * length and the rest for name.
5869 error = uiomove(&nlen, 1, ap->a_uio);
5870 if (error == 0) {
5871 char *namep = __DECONST(char *, name) + plen;
5872 error = uiomove(namep, nlen, ap->a_uio);
5874 if (error != 0) {
5875 error = SET_ERROR(error);
5876 break;
5881 return (error);
5884 static int
5885 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
5887 znode_t *zp = VTOZ(ap->a_vp);
5888 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5889 char attrprefix[16];
5890 int error;
5892 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5893 sizeof (attrprefix), compat);
5894 if (error != 0)
5895 return (error);
5897 if (zfsvfs->z_use_sa && zp->z_is_sa)
5898 error = zfs_listextattr_sa(ap, attrprefix);
5899 if (error == 0)
5900 error = zfs_listextattr_dir(ap, attrprefix);
5901 return (error);
5905 * Vnode operation to retrieve extended attributes on a vnode.
5907 static int
5908 zfs_listextattr(struct vop_listextattr_args *ap)
5910 znode_t *zp = VTOZ(ap->a_vp);
5911 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5912 int error;
5914 if (ap->a_size != NULL)
5915 *ap->a_size = 0;
5918 * If the xattr property is off, refuse the request.
5920 if (!(zfsvfs->z_flags & ZSB_XATTR))
5921 return (SET_ERROR(EOPNOTSUPP));
5923 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5924 ap->a_cred, ap->a_td, VREAD);
5925 if (error != 0)
5926 return (SET_ERROR(error));
5928 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5929 return (error);
5930 rw_enter(&zp->z_xattr_lock, RW_READER);
5932 error = zfs_listextattr_impl(ap, zfs_xattr_compat);
5933 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5934 /* Also list user xattrs with the alternate format. */
5935 error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
5938 rw_exit(&zp->z_xattr_lock);
5939 zfs_exit(zfsvfs, FTAG);
5940 return (error);
5943 #ifndef _SYS_SYSPROTO_H_
5944 struct vop_getacl_args {
5945 struct vnode *vp;
5946 acl_type_t type;
5947 struct acl *aclp;
5948 struct ucred *cred;
5949 struct thread *td;
5951 #endif
5953 static int
5954 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5956 int error;
5957 vsecattr_t vsecattr;
5959 if (ap->a_type != ACL_TYPE_NFS4)
5960 return (EINVAL);
5962 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5963 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5964 &vsecattr, 0, ap->a_cred)))
5965 return (error);
5967 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5968 vsecattr.vsa_aclcnt);
5969 if (vsecattr.vsa_aclentp != NULL)
5970 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5972 return (error);
5975 #ifndef _SYS_SYSPROTO_H_
5976 struct vop_setacl_args {
5977 struct vnode *vp;
5978 acl_type_t type;
5979 struct acl *aclp;
5980 struct ucred *cred;
5981 struct thread *td;
5983 #endif
5985 static int
5986 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5988 int error;
5989 vsecattr_t vsecattr;
5990 int aclbsize; /* size of acl list in bytes */
5991 aclent_t *aaclp;
5993 if (ap->a_type != ACL_TYPE_NFS4)
5994 return (EINVAL);
5996 if (ap->a_aclp == NULL)
5997 return (EINVAL);
5999 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
6000 return (EINVAL);
6003 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
6004 * splitting every entry into two and appending "canonical six"
6005 * entries at the end. Don't allow for setting an ACL that would
6006 * cause chmod(2) to run out of ACL entries.
6008 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
6009 return (ENOSPC);
6011 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
6012 if (error != 0)
6013 return (error);
6015 vsecattr.vsa_mask = VSA_ACE;
6016 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
6017 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
6018 aaclp = vsecattr.vsa_aclentp;
6019 vsecattr.vsa_aclentsz = aclbsize;
6021 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
6022 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
6023 kmem_free(aaclp, aclbsize);
6025 return (error);
6028 #ifndef _SYS_SYSPROTO_H_
6029 struct vop_aclcheck_args {
6030 struct vnode *vp;
6031 acl_type_t type;
6032 struct acl *aclp;
6033 struct ucred *cred;
6034 struct thread *td;
6036 #endif
6038 static int
6039 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
6042 return (EOPNOTSUPP);
6045 static int
6046 zfs_vptocnp(struct vop_vptocnp_args *ap)
6048 vnode_t *covered_vp;
6049 vnode_t *vp = ap->a_vp;
6050 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
6051 znode_t *zp = VTOZ(vp);
6052 int ltype;
6053 int error;
6055 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6056 return (error);
6059 * If we are a snapshot mounted under .zfs, run the operation
6060 * on the covered vnode.
6062 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
6063 char name[MAXNAMLEN + 1];
6064 znode_t *dzp;
6065 size_t len;
6067 error = zfs_znode_parent_and_name(zp, &dzp, name,
6068 sizeof (name));
6069 if (error == 0) {
6070 len = strlen(name);
6071 if (*ap->a_buflen < len)
6072 error = SET_ERROR(ENOMEM);
6074 if (error == 0) {
6075 *ap->a_buflen -= len;
6076 memcpy(ap->a_buf + *ap->a_buflen, name, len);
6077 *ap->a_vpp = ZTOV(dzp);
6079 zfs_exit(zfsvfs, FTAG);
6080 return (error);
6082 zfs_exit(zfsvfs, FTAG);
6084 covered_vp = vp->v_mount->mnt_vnodecovered;
6085 enum vgetstate vs = vget_prep(covered_vp);
6086 ltype = VOP_ISLOCKED(vp);
6087 VOP_UNLOCK(vp);
6088 error = vget_finish(covered_vp, LK_SHARED, vs);
6089 if (error == 0) {
6090 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
6091 ap->a_buflen);
6092 vput(covered_vp);
6094 vn_lock(vp, ltype | LK_RETRY);
6095 if (VN_IS_DOOMED(vp))
6096 error = SET_ERROR(ENOENT);
6097 return (error);
6100 #if __FreeBSD_version >= 1400032
6101 static int
6102 zfs_deallocate(struct vop_deallocate_args *ap)
6104 znode_t *zp = VTOZ(ap->a_vp);
6105 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6106 zilog_t *zilog;
6107 off_t off, len, file_sz;
6108 int error;
6110 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6111 return (error);
6114 * Callers might not be able to detect properly that we are read-only,
6115 * so check it explicitly here.
6117 if (zfs_is_readonly(zfsvfs)) {
6118 zfs_exit(zfsvfs, FTAG);
6119 return (SET_ERROR(EROFS));
6122 zilog = zfsvfs->z_log;
6123 off = *ap->a_offset;
6124 len = *ap->a_len;
6125 file_sz = zp->z_size;
6126 if (off + len > file_sz)
6127 len = file_sz - off;
6128 /* Fast path for out-of-range request. */
6129 if (len <= 0) {
6130 *ap->a_len = 0;
6131 zfs_exit(zfsvfs, FTAG);
6132 return (0);
6135 error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6136 if (error == 0) {
6137 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6138 (ap->a_ioflag & IO_SYNC) != 0)
6139 zil_commit(zilog, zp->z_id);
6140 *ap->a_offset = off + len;
6141 *ap->a_len = 0;
6144 zfs_exit(zfsvfs, FTAG);
6145 return (error);
6147 #endif
6149 #ifndef _SYS_SYSPROTO_H_
6150 struct vop_copy_file_range_args {
6151 struct vnode *a_invp;
6152 off_t *a_inoffp;
6153 struct vnode *a_outvp;
6154 off_t *a_outoffp;
6155 size_t *a_lenp;
6156 unsigned int a_flags;
6157 struct ucred *a_incred;
6158 struct ucred *a_outcred;
6159 struct thread *a_fsizetd;
6161 #endif
6163 * TODO: FreeBSD will only call file system-specific copy_file_range() if both
6164 * files resides under the same mountpoint. In case of ZFS we want to be called
6165 * even is files are in different datasets (but on the same pools, but we need
6166 * to check that ourselves).
6168 static int
6169 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
6171 zfsvfs_t *outzfsvfs;
6172 struct vnode *invp = ap->a_invp;
6173 struct vnode *outvp = ap->a_outvp;
6174 struct mount *mp;
6175 int error;
6176 uint64_t len = *ap->a_lenp;
6178 if (!zfs_bclone_enabled) {
6179 mp = NULL;
6180 goto bad_write_fallback;
6184 * TODO: If offset/length is not aligned to recordsize, use
6185 * vn_generic_copy_file_range() on this fragment.
6186 * It would be better to do this after we lock the vnodes, but then we
6187 * need something else than vn_generic_copy_file_range().
6190 vn_start_write(outvp, &mp, V_WAIT);
6191 if (__predict_true(mp == outvp->v_mount)) {
6192 outzfsvfs = (zfsvfs_t *)mp->mnt_data;
6193 if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os),
6194 SPA_FEATURE_BLOCK_CLONING)) {
6195 goto bad_write_fallback;
6198 if (invp == outvp) {
6199 if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
6200 goto bad_write_fallback;
6202 } else {
6203 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
6204 __FreeBSD_version >= 1400086
6205 vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
6206 LK_EXCLUSIVE);
6207 #else
6208 vn_lock_pair(invp, false, outvp, false);
6209 #endif
6210 if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
6211 goto bad_locked_fallback;
6215 #ifdef MAC
6216 error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
6217 outvp);
6218 if (error != 0)
6219 goto out_locked;
6220 #endif
6222 error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
6223 ap->a_outoffp, &len, ap->a_outcred);
6224 if (error == EXDEV || error == EAGAIN || error == EINVAL ||
6225 error == EOPNOTSUPP)
6226 goto bad_locked_fallback;
6227 *ap->a_lenp = (size_t)len;
6228 #ifdef MAC
6229 out_locked:
6230 #endif
6231 if (invp != outvp)
6232 VOP_UNLOCK(invp);
6233 VOP_UNLOCK(outvp);
6234 if (mp != NULL)
6235 vn_finished_write(mp);
6236 return (error);
6238 bad_locked_fallback:
6239 if (invp != outvp)
6240 VOP_UNLOCK(invp);
6241 VOP_UNLOCK(outvp);
6242 bad_write_fallback:
6243 if (mp != NULL)
6244 vn_finished_write(mp);
6245 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
6246 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
6247 ap->a_incred, ap->a_outcred, ap->a_fsizetd);
6248 return (error);
6251 struct vop_vector zfs_vnodeops;
6252 struct vop_vector zfs_fifoops;
6253 struct vop_vector zfs_shareops;
6255 struct vop_vector zfs_vnodeops = {
6256 .vop_default = &default_vnodeops,
6257 .vop_inactive = zfs_freebsd_inactive,
6258 .vop_need_inactive = zfs_freebsd_need_inactive,
6259 .vop_reclaim = zfs_freebsd_reclaim,
6260 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6261 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6262 .vop_access = zfs_freebsd_access,
6263 .vop_allocate = VOP_EINVAL,
6264 #if __FreeBSD_version >= 1400032
6265 .vop_deallocate = zfs_deallocate,
6266 #endif
6267 .vop_lookup = zfs_cache_lookup,
6268 .vop_cachedlookup = zfs_freebsd_cachedlookup,
6269 .vop_getattr = zfs_freebsd_getattr,
6270 .vop_setattr = zfs_freebsd_setattr,
6271 .vop_create = zfs_freebsd_create,
6272 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
6273 .vop_mkdir = zfs_freebsd_mkdir,
6274 .vop_readdir = zfs_freebsd_readdir,
6275 .vop_fsync = zfs_freebsd_fsync,
6276 .vop_open = zfs_freebsd_open,
6277 .vop_close = zfs_freebsd_close,
6278 .vop_rmdir = zfs_freebsd_rmdir,
6279 .vop_ioctl = zfs_freebsd_ioctl,
6280 .vop_link = zfs_freebsd_link,
6281 .vop_symlink = zfs_freebsd_symlink,
6282 .vop_readlink = zfs_freebsd_readlink,
6283 .vop_read = zfs_freebsd_read,
6284 .vop_write = zfs_freebsd_write,
6285 .vop_remove = zfs_freebsd_remove,
6286 .vop_rename = zfs_freebsd_rename,
6287 .vop_pathconf = zfs_freebsd_pathconf,
6288 .vop_bmap = zfs_freebsd_bmap,
6289 .vop_fid = zfs_freebsd_fid,
6290 .vop_getextattr = zfs_getextattr,
6291 .vop_deleteextattr = zfs_deleteextattr,
6292 .vop_setextattr = zfs_setextattr,
6293 .vop_listextattr = zfs_listextattr,
6294 .vop_getacl = zfs_freebsd_getacl,
6295 .vop_setacl = zfs_freebsd_setacl,
6296 .vop_aclcheck = zfs_freebsd_aclcheck,
6297 .vop_getpages = zfs_freebsd_getpages,
6298 .vop_putpages = zfs_freebsd_putpages,
6299 .vop_vptocnp = zfs_vptocnp,
6300 .vop_lock1 = vop_lock,
6301 .vop_unlock = vop_unlock,
6302 .vop_islocked = vop_islocked,
6303 #if __FreeBSD_version >= 1400043
6304 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6305 #endif
6306 .vop_copy_file_range = zfs_freebsd_copy_file_range,
6308 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6310 struct vop_vector zfs_fifoops = {
6311 .vop_default = &fifo_specops,
6312 .vop_fsync = zfs_freebsd_fsync,
6313 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6314 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6315 .vop_access = zfs_freebsd_access,
6316 .vop_getattr = zfs_freebsd_getattr,
6317 .vop_inactive = zfs_freebsd_inactive,
6318 .vop_read = VOP_PANIC,
6319 .vop_reclaim = zfs_freebsd_reclaim,
6320 .vop_setattr = zfs_freebsd_setattr,
6321 .vop_write = VOP_PANIC,
6322 .vop_pathconf = zfs_freebsd_pathconf,
6323 .vop_fid = zfs_freebsd_fid,
6324 .vop_getacl = zfs_freebsd_getacl,
6325 .vop_setacl = zfs_freebsd_setacl,
6326 .vop_aclcheck = zfs_freebsd_aclcheck,
6327 #if __FreeBSD_version >= 1400043
6328 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6329 #endif
6331 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6334 * special share hidden files vnode operations template
6336 struct vop_vector zfs_shareops = {
6337 .vop_default = &default_vnodeops,
6338 .vop_fplookup_vexec = VOP_EAGAIN,
6339 .vop_fplookup_symlink = VOP_EAGAIN,
6340 .vop_access = zfs_freebsd_access,
6341 .vop_inactive = zfs_freebsd_inactive,
6342 .vop_reclaim = zfs_freebsd_reclaim,
6343 .vop_fid = zfs_freebsd_fid,
6344 .vop_pathconf = zfs_freebsd_pathconf,
6345 #if __FreeBSD_version >= 1400043
6346 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6347 #endif
6349 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6351 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
6352 "Use legacy ZFS xattr naming for writing new user namespace xattrs");