FreeBSD: Fix RLIMIT_FSIZE handling for block cloning
[zfs.git] / module / os / freebsd / zfs / zfs_vnops_os.c
blob283f56963170841bdcec2dd96e9976aa55befa20
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Integros [integros.com]
26 * Copyright 2017 Nexenta Systems, Inc.
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <security/mac/mac_framework.h>
38 #include <sys/vfs.h>
39 #include <sys/endian.h>
40 #include <sys/vm.h>
41 #include <sys/vnode.h>
42 #include <sys/smr.h>
43 #include <sys/dirent.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/kmem.h>
47 #include <sys/taskq.h>
48 #include <sys/uio.h>
49 #include <sys/atomic.h>
50 #include <sys/namei.h>
51 #include <sys/mman.h>
52 #include <sys/cmn_err.h>
53 #include <sys/kdb.h>
54 #include <sys/sysproto.h>
55 #include <sys/errno.h>
56 #include <sys/unistd.h>
57 #include <sys/zfs_dir.h>
58 #include <sys/zfs_ioctl.h>
59 #include <sys/fs/zfs.h>
60 #include <sys/dmu.h>
61 #include <sys/dmu_objset.h>
62 #include <sys/spa.h>
63 #include <sys/txg.h>
64 #include <sys/dbuf.h>
65 #include <sys/zap.h>
66 #include <sys/sa.h>
67 #include <sys/policy.h>
68 #include <sys/sunddi.h>
69 #include <sys/filio.h>
70 #include <sys/sid.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_fuid.h>
73 #include <sys/zfs_quota.h>
74 #include <sys/zfs_sa.h>
75 #include <sys/zfs_rlock.h>
76 #include <sys/bio.h>
77 #include <sys/buf.h>
78 #include <sys/sched.h>
79 #include <sys/acl.h>
80 #include <sys/vmmeter.h>
81 #include <vm/vm_param.h>
82 #include <sys/zil.h>
83 #include <sys/zfs_vnops.h>
84 #include <sys/module.h>
85 #include <sys/sysent.h>
86 #include <sys/dmu_impl.h>
87 #include <sys/brt.h>
88 #include <sys/zfeature.h>
90 #include <vm/vm_object.h>
92 #include <sys/extattr.h>
93 #include <sys/priv.h>
95 #ifndef VN_OPEN_INVFS
96 #define VN_OPEN_INVFS 0x0
97 #endif
99 VFS_SMR_DECLARE;
101 #ifdef DEBUG_VFS_LOCKS
102 #define VNCHECKREF(vp) \
103 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
104 ("%s: wrong ref counts", __func__));
105 #else
106 #define VNCHECKREF(vp)
107 #endif
109 #if __FreeBSD_version >= 1400045
110 typedef uint64_t cookie_t;
111 #else
112 typedef ulong_t cookie_t;
113 #endif
116 * Programming rules.
118 * Each vnode op performs some logical unit of work. To do this, the ZPL must
119 * properly lock its in-core state, create a DMU transaction, do the work,
120 * record this work in the intent log (ZIL), commit the DMU transaction,
121 * and wait for the intent log to commit if it is a synchronous operation.
122 * Moreover, the vnode ops must work in both normal and log replay context.
123 * The ordering of events is important to avoid deadlocks and references
124 * to freed memory. The example below illustrates the following Big Rules:
126 * (1) A check must be made in each zfs thread for a mounted file system.
127 * This is done avoiding races using zfs_enter(zfsvfs).
128 * A zfs_exit(zfsvfs) is needed before all returns. Any znodes
129 * must be checked with zfs_verify_zp(zp). Both of these macros
130 * can return EIO from the calling function.
132 * (2) VN_RELE() should always be the last thing except for zil_commit()
133 * (if necessary) and zfs_exit(). This is for 3 reasons:
134 * First, if it's the last reference, the vnode/znode
135 * can be freed, so the zp may point to freed memory. Second, the last
136 * reference will call zfs_zinactive(), which may induce a lot of work --
137 * pushing cached pages (which acquires range locks) and syncing out
138 * cached atime changes. Third, zfs_zinactive() may require a new tx,
139 * which could deadlock the system if you were already holding one.
140 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
142 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
143 * as they can span dmu_tx_assign() calls.
145 * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
146 * dmu_tx_assign(). This is critical because we don't want to block
147 * while holding locks.
149 * If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT. This
150 * reduces lock contention and CPU usage when we must wait (note that if
151 * throughput is constrained by the storage, nearly every transaction
152 * must wait).
154 * Note, in particular, that if a lock is sometimes acquired before
155 * the tx assigns, and sometimes after (e.g. z_lock), then failing
156 * to use a non-blocking assign can deadlock the system. The scenario:
158 * Thread A has grabbed a lock before calling dmu_tx_assign().
159 * Thread B is in an already-assigned tx, and blocks for this lock.
160 * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
161 * forever, because the previous txg can't quiesce until B's tx commits.
163 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
164 * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
165 * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
166 * to indicate that this operation has already called dmu_tx_wait().
167 * This will ensure that we don't retry forever, waiting a short bit
168 * each time.
170 * (5) If the operation succeeded, generate the intent log entry for it
171 * before dropping locks. This ensures that the ordering of events
172 * in the intent log matches the order in which they actually occurred.
173 * During ZIL replay the zfs_log_* functions will update the sequence
174 * number to indicate the zil transaction has replayed.
176 * (6) At the end of each vnode op, the DMU tx must always commit,
177 * regardless of whether there were any errors.
179 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
180 * to ensure that synchronous semantics are provided when necessary.
182 * In general, this is how things should be ordered in each vnode op:
184 * zfs_enter(zfsvfs); // exit if unmounted
185 * top:
186 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
187 * rw_enter(...); // grab any other locks you need
188 * tx = dmu_tx_create(...); // get DMU tx
189 * dmu_tx_hold_*(); // hold each object you might modify
190 * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
191 * if (error) {
192 * rw_exit(...); // drop locks
193 * zfs_dirent_unlock(dl); // unlock directory entry
194 * VN_RELE(...); // release held vnodes
195 * if (error == ERESTART) {
196 * waited = B_TRUE;
197 * dmu_tx_wait(tx);
198 * dmu_tx_abort(tx);
199 * goto top;
201 * dmu_tx_abort(tx); // abort DMU tx
202 * zfs_exit(zfsvfs); // finished in zfs
203 * return (error); // really out of space
205 * error = do_real_work(); // do whatever this VOP does
206 * if (error == 0)
207 * zfs_log_*(...); // on success, make ZIL entry
208 * dmu_tx_commit(tx); // commit DMU tx -- error or not
209 * rw_exit(...); // drop locks
210 * zfs_dirent_unlock(dl); // unlock directory entry
211 * VN_RELE(...); // release held vnodes
212 * zil_commit(zilog, foid); // synchronous when necessary
213 * zfs_exit(zfsvfs); // finished in zfs
214 * return (error); // done, report error
216 static int
217 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
219 (void) cr;
220 znode_t *zp = VTOZ(*vpp);
221 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
222 int error;
224 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
225 return (error);
227 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
228 ((flag & FAPPEND) == 0)) {
229 zfs_exit(zfsvfs, FTAG);
230 return (SET_ERROR(EPERM));
234 * Keep a count of the synchronous opens in the znode. On first
235 * synchronous open we must convert all previous async transactions
236 * into sync to keep correct ordering.
238 if (flag & O_SYNC) {
239 if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
240 zil_async_to_sync(zfsvfs->z_log, zp->z_id);
243 zfs_exit(zfsvfs, FTAG);
244 return (0);
247 static int
248 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
250 (void) offset, (void) cr;
251 znode_t *zp = VTOZ(vp);
252 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
253 int error;
255 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
256 return (error);
258 /* Decrement the synchronous opens in the znode */
259 if ((flag & O_SYNC) && (count == 1))
260 atomic_dec_32(&zp->z_sync_cnt);
262 zfs_exit(zfsvfs, FTAG);
263 return (0);
266 static int
267 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
268 int *rvalp)
270 (void) flag, (void) cred, (void) rvalp;
271 loff_t off;
272 int error;
274 switch (com) {
275 case _FIOFFS:
277 return (0);
280 * The following two ioctls are used by bfu. Faking out,
281 * necessary to avoid bfu errors.
284 case _FIOGDIO:
285 case _FIOSDIO:
287 return (0);
290 case F_SEEK_DATA:
291 case F_SEEK_HOLE:
293 off = *(offset_t *)data;
294 /* offset parameter is in/out */
295 error = zfs_holey(VTOZ(vp), com, &off);
296 if (error)
297 return (error);
298 *(offset_t *)data = off;
299 return (0);
302 return (SET_ERROR(ENOTTY));
305 static vm_page_t
306 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
308 vm_object_t obj;
309 vm_page_t pp;
310 int64_t end;
313 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
314 * aligned boundaries, if the range is not aligned. As a result a
315 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
316 * It may happen that all DEV_BSIZE subranges are marked clean and thus
317 * the whole page would be considered clean despite have some
318 * dirty data.
319 * For this reason we should shrink the range to DEV_BSIZE aligned
320 * boundaries before calling vm_page_clear_dirty.
322 end = rounddown2(off + nbytes, DEV_BSIZE);
323 off = roundup2(off, DEV_BSIZE);
324 nbytes = end - off;
326 obj = vp->v_object;
327 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
328 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
329 VM_ALLOC_IGN_SBUSY);
330 if (pp != NULL) {
331 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
332 vm_object_pip_add(obj, 1);
333 pmap_remove_write(pp);
334 if (nbytes != 0)
335 vm_page_clear_dirty(pp, off, nbytes);
337 return (pp);
340 static void
341 page_unbusy(vm_page_t pp)
344 vm_page_sunbusy(pp);
345 vm_object_pip_wakeup(pp->object);
348 static vm_page_t
349 page_hold(vnode_t *vp, int64_t start)
351 vm_object_t obj;
352 vm_page_t m;
354 obj = vp->v_object;
355 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
356 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
357 VM_ALLOC_NOBUSY);
358 return (m);
361 static void
362 page_unhold(vm_page_t pp)
364 vm_page_unwire(pp, PQ_ACTIVE);
368 * When a file is memory mapped, we must keep the IO data synchronized
369 * between the DMU cache and the memory mapped pages. What this means:
371 * On Write: If we find a memory mapped page, we write to *both*
372 * the page and the dmu buffer.
374 void
375 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
377 vm_object_t obj;
378 struct sf_buf *sf;
379 vnode_t *vp = ZTOV(zp);
380 caddr_t va;
381 int off;
383 ASSERT3P(vp->v_mount, !=, NULL);
384 obj = vp->v_object;
385 ASSERT3P(obj, !=, NULL);
387 off = start & PAGEOFFSET;
388 vm_object_pip_add(obj, 1);
389 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
390 vm_page_t pp;
391 int nbytes = imin(PAGESIZE - off, len);
393 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
394 va = zfs_map_page(pp, &sf);
395 (void) dmu_read(os, zp->z_id, start + off, nbytes,
396 va + off, DMU_READ_PREFETCH);
397 zfs_unmap_page(sf);
398 page_unbusy(pp);
400 len -= nbytes;
401 off = 0;
403 vm_object_pip_wakeup(obj);
407 * Read with UIO_NOCOPY flag means that sendfile(2) requests
408 * ZFS to populate a range of page cache pages with data.
410 * NOTE: this function could be optimized to pre-allocate
411 * all pages in advance, drain exclusive busy on all of them,
412 * map them into contiguous KVA region and populate them
413 * in one single dmu_read() call.
416 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
418 vnode_t *vp = ZTOV(zp);
419 objset_t *os = zp->z_zfsvfs->z_os;
420 struct sf_buf *sf;
421 vm_object_t obj;
422 vm_page_t pp;
423 int64_t start;
424 caddr_t va;
425 int len = nbytes;
426 int error = 0;
428 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
429 ASSERT3P(vp->v_mount, !=, NULL);
430 obj = vp->v_object;
431 ASSERT3P(obj, !=, NULL);
432 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
434 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
435 int bytes = MIN(PAGESIZE, len);
437 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
438 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
439 if (vm_page_none_valid(pp)) {
440 va = zfs_map_page(pp, &sf);
441 error = dmu_read(os, zp->z_id, start, bytes, va,
442 DMU_READ_PREFETCH);
443 if (bytes != PAGESIZE && error == 0)
444 memset(va + bytes, 0, PAGESIZE - bytes);
445 zfs_unmap_page(sf);
446 if (error == 0) {
447 vm_page_valid(pp);
448 vm_page_activate(pp);
449 vm_page_sunbusy(pp);
450 } else {
451 zfs_vmobject_wlock(obj);
452 if (!vm_page_wired(pp) && pp->valid == 0 &&
453 vm_page_busy_tryupgrade(pp))
454 vm_page_free(pp);
455 else
456 vm_page_sunbusy(pp);
457 zfs_vmobject_wunlock(obj);
459 } else {
460 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
461 vm_page_sunbusy(pp);
463 if (error)
464 break;
465 zfs_uio_advance(uio, bytes);
466 len -= bytes;
468 return (error);
472 * When a file is memory mapped, we must keep the IO data synchronized
473 * between the DMU cache and the memory mapped pages. What this means:
475 * On Read: We "read" preferentially from memory mapped pages,
476 * else we default from the dmu buffer.
478 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
479 * the file is memory mapped.
482 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
484 vnode_t *vp = ZTOV(zp);
485 vm_object_t obj;
486 int64_t start;
487 int len = nbytes;
488 int off;
489 int error = 0;
491 ASSERT3P(vp->v_mount, !=, NULL);
492 obj = vp->v_object;
493 ASSERT3P(obj, !=, NULL);
495 start = zfs_uio_offset(uio);
496 off = start & PAGEOFFSET;
497 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
498 vm_page_t pp;
499 uint64_t bytes = MIN(PAGESIZE - off, len);
501 if ((pp = page_hold(vp, start))) {
502 struct sf_buf *sf;
503 caddr_t va;
505 va = zfs_map_page(pp, &sf);
506 error = vn_io_fault_uiomove(va + off, bytes,
507 GET_UIO_STRUCT(uio));
508 zfs_unmap_page(sf);
509 page_unhold(pp);
510 } else {
511 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
512 uio, bytes);
514 len -= bytes;
515 off = 0;
516 if (error)
517 break;
519 return (error);
523 zfs_write_simple(znode_t *zp, const void *data, size_t len,
524 loff_t pos, size_t *presid)
526 int error = 0;
527 ssize_t resid;
529 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
530 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
532 if (error) {
533 return (SET_ERROR(error));
534 } else if (presid == NULL) {
535 if (resid != 0) {
536 error = SET_ERROR(EIO);
538 } else {
539 *presid = resid;
541 return (error);
544 void
545 zfs_zrele_async(znode_t *zp)
547 vnode_t *vp = ZTOV(zp);
548 objset_t *os = ITOZSB(vp)->z_os;
550 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
553 static int
554 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
556 int error;
558 *vpp = arg;
559 error = vn_lock(*vpp, lkflags);
560 if (error != 0)
561 vrele(*vpp);
562 return (error);
565 static int
566 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
568 znode_t *zdp = VTOZ(dvp);
569 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
570 int error;
571 int ltype;
573 if (zfsvfs->z_replay == B_FALSE)
574 ASSERT_VOP_LOCKED(dvp, __func__);
576 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
577 ASSERT3P(dvp, ==, vp);
578 vref(dvp);
579 ltype = lkflags & LK_TYPE_MASK;
580 if (ltype != VOP_ISLOCKED(dvp)) {
581 if (ltype == LK_EXCLUSIVE)
582 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
583 else /* if (ltype == LK_SHARED) */
584 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
587 * Relock for the "." case could leave us with
588 * reclaimed vnode.
590 if (VN_IS_DOOMED(dvp)) {
591 vrele(dvp);
592 return (SET_ERROR(ENOENT));
595 return (0);
596 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
598 * Note that in this case, dvp is the child vnode, and we
599 * are looking up the parent vnode - exactly reverse from
600 * normal operation. Unlocking dvp requires some rather
601 * tricky unlock/relock dance to prevent mp from being freed;
602 * use vn_vget_ino_gen() which takes care of all that.
604 * XXX Note that there is a time window when both vnodes are
605 * unlocked. It is possible, although highly unlikely, that
606 * during that window the parent-child relationship between
607 * the vnodes may change, for example, get reversed.
608 * In that case we would have a wrong lock order for the vnodes.
609 * All other filesystems seem to ignore this problem, so we
610 * do the same here.
611 * A potential solution could be implemented as follows:
612 * - using LK_NOWAIT when locking the second vnode and retrying
613 * if necessary
614 * - checking that the parent-child relationship still holds
615 * after locking both vnodes and retrying if it doesn't
617 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
618 return (error);
619 } else {
620 error = vn_lock(vp, lkflags);
621 if (error != 0)
622 vrele(vp);
623 return (error);
628 * Lookup an entry in a directory, or an extended attribute directory.
629 * If it exists, return a held vnode reference for it.
631 * IN: dvp - vnode of directory to search.
632 * nm - name of entry to lookup.
633 * pnp - full pathname to lookup [UNUSED].
634 * flags - LOOKUP_XATTR set if looking for an attribute.
635 * rdir - root directory vnode [UNUSED].
636 * cr - credentials of caller.
637 * ct - caller context
639 * OUT: vpp - vnode of located entry, NULL if not found.
641 * RETURN: 0 on success, error code on failure.
643 * Timestamps:
644 * NA
646 static int
647 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
648 struct componentname *cnp, int nameiop, cred_t *cr, int flags,
649 boolean_t cached)
651 znode_t *zdp = VTOZ(dvp);
652 znode_t *zp;
653 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
654 seqc_t dvp_seqc;
655 int error = 0;
658 * Fast path lookup, however we must skip DNLC lookup
659 * for case folding or normalizing lookups because the
660 * DNLC code only stores the passed in name. This means
661 * creating 'a' and removing 'A' on a case insensitive
662 * file system would work, but DNLC still thinks 'a'
663 * exists and won't let you create it again on the next
664 * pass through fast path.
666 if (!(flags & LOOKUP_XATTR)) {
667 if (dvp->v_type != VDIR) {
668 return (SET_ERROR(ENOTDIR));
669 } else if (zdp->z_sa_hdl == NULL) {
670 return (SET_ERROR(EIO));
674 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
675 const char *, nm);
677 if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
678 return (error);
680 dvp_seqc = vn_seqc_read_notmodify(dvp);
682 *vpp = NULL;
684 if (flags & LOOKUP_XATTR) {
686 * If the xattr property is off, refuse the lookup request.
688 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
689 zfs_exit(zfsvfs, FTAG);
690 return (SET_ERROR(EOPNOTSUPP));
694 * We don't allow recursive attributes..
695 * Maybe someday we will.
697 if (zdp->z_pflags & ZFS_XATTR) {
698 zfs_exit(zfsvfs, FTAG);
699 return (SET_ERROR(EINVAL));
702 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
703 zfs_exit(zfsvfs, FTAG);
704 return (error);
706 *vpp = ZTOV(zp);
709 * Do we have permission to get into attribute directory?
711 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
712 if (error) {
713 vrele(ZTOV(zp));
716 zfs_exit(zfsvfs, FTAG);
717 return (error);
721 * Check accessibility of directory if we're not coming in via
722 * VOP_CACHEDLOOKUP.
724 if (!cached) {
725 #ifdef NOEXECCHECK
726 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
727 cnp->cn_flags &= ~NOEXECCHECK;
728 } else
729 #endif
730 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
731 NULL))) {
732 zfs_exit(zfsvfs, FTAG);
733 return (error);
737 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
738 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
739 zfs_exit(zfsvfs, FTAG);
740 return (SET_ERROR(EILSEQ));
745 * First handle the special cases.
747 if ((cnp->cn_flags & ISDOTDOT) != 0) {
749 * If we are a snapshot mounted under .zfs, return
750 * the vp for the snapshot directory.
752 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
753 struct componentname cn;
754 vnode_t *zfsctl_vp;
755 int ltype;
757 zfs_exit(zfsvfs, FTAG);
758 ltype = VOP_ISLOCKED(dvp);
759 VOP_UNLOCK(dvp);
760 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
761 &zfsctl_vp);
762 if (error == 0) {
763 cn.cn_nameptr = "snapshot";
764 cn.cn_namelen = strlen(cn.cn_nameptr);
765 cn.cn_nameiop = cnp->cn_nameiop;
766 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
767 cn.cn_lkflags = cnp->cn_lkflags;
768 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
769 vput(zfsctl_vp);
771 vn_lock(dvp, ltype | LK_RETRY);
772 return (error);
775 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
776 zfs_exit(zfsvfs, FTAG);
777 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
778 return (SET_ERROR(ENOTSUP));
779 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
780 return (error);
784 * The loop is retry the lookup if the parent-child relationship
785 * changes during the dot-dot locking complexities.
787 for (;;) {
788 uint64_t parent;
790 error = zfs_dirlook(zdp, nm, &zp);
791 if (error == 0)
792 *vpp = ZTOV(zp);
794 zfs_exit(zfsvfs, FTAG);
795 if (error != 0)
796 break;
798 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
799 if (error != 0) {
801 * If we've got a locking error, then the vnode
802 * got reclaimed because of a force unmount.
803 * We never enter doomed vnodes into the name cache.
805 *vpp = NULL;
806 return (error);
809 if ((cnp->cn_flags & ISDOTDOT) == 0)
810 break;
812 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
813 vput(ZTOV(zp));
814 *vpp = NULL;
815 return (error);
817 if (zdp->z_sa_hdl == NULL) {
818 error = SET_ERROR(EIO);
819 } else {
820 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
821 &parent, sizeof (parent));
823 if (error != 0) {
824 zfs_exit(zfsvfs, FTAG);
825 vput(ZTOV(zp));
826 break;
828 if (zp->z_id == parent) {
829 zfs_exit(zfsvfs, FTAG);
830 break;
832 vput(ZTOV(zp));
835 if (error != 0)
836 *vpp = NULL;
838 /* Translate errors and add SAVENAME when needed. */
839 if (cnp->cn_flags & ISLASTCN) {
840 switch (nameiop) {
841 case CREATE:
842 case RENAME:
843 if (error == ENOENT) {
844 error = EJUSTRETURN;
845 #if __FreeBSD_version < 1400068
846 cnp->cn_flags |= SAVENAME;
847 #endif
848 break;
850 zfs_fallthrough;
851 case DELETE:
852 #if __FreeBSD_version < 1400068
853 if (error == 0)
854 cnp->cn_flags |= SAVENAME;
855 #endif
856 break;
860 if ((cnp->cn_flags & ISDOTDOT) != 0) {
862 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
863 * handle races. In particular different callers may end up
864 * with different vnodes and will try to add conflicting
865 * entries to the namecache.
867 * While finding different result may be acceptable in face
868 * of concurrent modification, adding conflicting entries
869 * trips over an assert in the namecache.
871 * Ultimately let an entry through once everything settles.
873 if (!vn_seqc_consistent(dvp, dvp_seqc)) {
874 cnp->cn_flags &= ~MAKEENTRY;
878 /* Insert name into cache (as non-existent) if appropriate. */
879 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
880 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
881 cache_enter(dvp, NULL, cnp);
883 /* Insert name into cache if appropriate. */
884 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
885 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
886 if (!(cnp->cn_flags & ISLASTCN) ||
887 (nameiop != DELETE && nameiop != RENAME)) {
888 cache_enter(dvp, *vpp, cnp);
892 return (error);
896 * Attempt to create a new entry in a directory. If the entry
897 * already exists, truncate the file if permissible, else return
898 * an error. Return the vp of the created or trunc'd file.
900 * IN: dvp - vnode of directory to put new file entry in.
901 * name - name of new file entry.
902 * vap - attributes of new file.
903 * excl - flag indicating exclusive or non-exclusive mode.
904 * mode - mode to open file with.
905 * cr - credentials of caller.
906 * flag - large file flag [UNUSED].
907 * ct - caller context
908 * vsecp - ACL to be set
909 * mnt_ns - Unused on FreeBSD
911 * OUT: vpp - vnode of created or trunc'd entry.
913 * RETURN: 0 on success, error code on failure.
915 * Timestamps:
916 * dvp - ctime|mtime updated if new entry created
917 * vp - ctime|mtime always, atime if new
920 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
921 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns)
923 (void) excl, (void) mode, (void) flag;
924 znode_t *zp;
925 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
926 zilog_t *zilog;
927 objset_t *os;
928 dmu_tx_t *tx;
929 int error;
930 uid_t uid = crgetuid(cr);
931 gid_t gid = crgetgid(cr);
932 uint64_t projid = ZFS_DEFAULT_PROJID;
933 zfs_acl_ids_t acl_ids;
934 boolean_t fuid_dirtied;
935 uint64_t txtype;
936 #ifdef DEBUG_VFS_LOCKS
937 vnode_t *dvp = ZTOV(dzp);
938 #endif
941 * If we have an ephemeral id, ACL, or XVATTR then
942 * make sure file system is at proper version
944 if (zfsvfs->z_use_fuids == B_FALSE &&
945 (vsecp || (vap->va_mask & AT_XVATTR) ||
946 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
947 return (SET_ERROR(EINVAL));
949 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
950 return (error);
951 os = zfsvfs->z_os;
952 zilog = zfsvfs->z_log;
954 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
955 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
956 zfs_exit(zfsvfs, FTAG);
957 return (SET_ERROR(EILSEQ));
960 if (vap->va_mask & AT_XVATTR) {
961 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
962 crgetuid(cr), cr, vap->va_type)) != 0) {
963 zfs_exit(zfsvfs, FTAG);
964 return (error);
968 *zpp = NULL;
970 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
971 vap->va_mode &= ~S_ISVTX;
973 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
974 if (error) {
975 zfs_exit(zfsvfs, FTAG);
976 return (error);
978 ASSERT3P(zp, ==, NULL);
981 * Create a new file object and update the directory
982 * to reference it.
984 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
985 goto out;
989 * We only support the creation of regular files in
990 * extended attribute directories.
993 if ((dzp->z_pflags & ZFS_XATTR) &&
994 (vap->va_type != VREG)) {
995 error = SET_ERROR(EINVAL);
996 goto out;
999 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1000 cr, vsecp, &acl_ids, NULL)) != 0)
1001 goto out;
1003 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1004 projid = zfs_inherit_projid(dzp);
1005 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1006 zfs_acl_ids_free(&acl_ids);
1007 error = SET_ERROR(EDQUOT);
1008 goto out;
1011 getnewvnode_reserve();
1013 tx = dmu_tx_create(os);
1015 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1016 ZFS_SA_BASE_ATTR_SIZE);
1018 fuid_dirtied = zfsvfs->z_fuid_dirty;
1019 if (fuid_dirtied)
1020 zfs_fuid_txhold(zfsvfs, tx);
1021 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1022 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1023 if (!zfsvfs->z_use_sa &&
1024 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1025 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1026 0, acl_ids.z_aclp->z_acl_bytes);
1028 error = dmu_tx_assign(tx, TXG_WAIT);
1029 if (error) {
1030 zfs_acl_ids_free(&acl_ids);
1031 dmu_tx_abort(tx);
1032 getnewvnode_drop_reserve();
1033 zfs_exit(zfsvfs, FTAG);
1034 return (error);
1036 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1038 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
1039 if (error != 0) {
1041 * Since, we failed to add the directory entry for it,
1042 * delete the newly created dnode.
1044 zfs_znode_delete(zp, tx);
1045 VOP_UNLOCK(ZTOV(zp));
1046 zrele(zp);
1047 zfs_acl_ids_free(&acl_ids);
1048 dmu_tx_commit(tx);
1049 getnewvnode_drop_reserve();
1050 goto out;
1053 if (fuid_dirtied)
1054 zfs_fuid_sync(zfsvfs, tx);
1056 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1057 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1058 vsecp, acl_ids.z_fuidp, vap);
1059 zfs_acl_ids_free(&acl_ids);
1060 dmu_tx_commit(tx);
1062 getnewvnode_drop_reserve();
1064 out:
1065 VNCHECKREF(dvp);
1066 if (error == 0) {
1067 *zpp = zp;
1070 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1071 zil_commit(zilog, 0);
1073 zfs_exit(zfsvfs, FTAG);
1074 return (error);
1078 * Remove an entry from a directory.
1080 * IN: dvp - vnode of directory to remove entry from.
1081 * name - name of entry to remove.
1082 * cr - credentials of caller.
1083 * ct - caller context
1084 * flags - case flags
1086 * RETURN: 0 on success, error code on failure.
1088 * Timestamps:
1089 * dvp - ctime|mtime
1090 * vp - ctime (if nlink > 0)
1092 static int
1093 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1095 znode_t *dzp = VTOZ(dvp);
1096 znode_t *zp;
1097 znode_t *xzp;
1098 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1099 zilog_t *zilog;
1100 uint64_t xattr_obj;
1101 uint64_t obj = 0;
1102 dmu_tx_t *tx;
1103 boolean_t unlinked;
1104 uint64_t txtype;
1105 int error;
1108 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1109 return (error);
1110 zp = VTOZ(vp);
1111 if ((error = zfs_verify_zp(zp)) != 0) {
1112 zfs_exit(zfsvfs, FTAG);
1113 return (error);
1115 zilog = zfsvfs->z_log;
1117 xattr_obj = 0;
1118 xzp = NULL;
1120 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1121 goto out;
1125 * Need to use rmdir for removing directories.
1127 if (vp->v_type == VDIR) {
1128 error = SET_ERROR(EPERM);
1129 goto out;
1132 vnevent_remove(vp, dvp, name, ct);
1134 obj = zp->z_id;
1136 /* are there any extended attributes? */
1137 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1138 &xattr_obj, sizeof (xattr_obj));
1139 if (error == 0 && xattr_obj) {
1140 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1141 ASSERT0(error);
1145 * We may delete the znode now, or we may put it in the unlinked set;
1146 * it depends on whether we're the last link, and on whether there are
1147 * other holds on the vnode. So we dmu_tx_hold() the right things to
1148 * allow for either case.
1150 tx = dmu_tx_create(zfsvfs->z_os);
1151 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1152 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1153 zfs_sa_upgrade_txholds(tx, zp);
1154 zfs_sa_upgrade_txholds(tx, dzp);
1156 if (xzp) {
1157 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1158 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1161 /* charge as an update -- would be nice not to charge at all */
1162 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1165 * Mark this transaction as typically resulting in a net free of space
1167 dmu_tx_mark_netfree(tx);
1169 error = dmu_tx_assign(tx, TXG_WAIT);
1170 if (error) {
1171 dmu_tx_abort(tx);
1172 zfs_exit(zfsvfs, FTAG);
1173 return (error);
1177 * Remove the directory entry.
1179 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1181 if (error) {
1182 dmu_tx_commit(tx);
1183 goto out;
1186 if (unlinked) {
1187 zfs_unlinked_add(zp, tx);
1188 vp->v_vflag |= VV_NOSYNC;
1190 /* XXX check changes to linux vnops */
1191 txtype = TX_REMOVE;
1192 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1194 dmu_tx_commit(tx);
1195 out:
1197 if (xzp)
1198 vrele(ZTOV(xzp));
1200 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1201 zil_commit(zilog, 0);
1204 zfs_exit(zfsvfs, FTAG);
1205 return (error);
1209 static int
1210 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1211 struct componentname *cnp, int nameiop)
1213 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1214 int error;
1216 cnp->cn_nameptr = __DECONST(char *, name);
1217 cnp->cn_namelen = strlen(name);
1218 cnp->cn_nameiop = nameiop;
1219 cnp->cn_flags = ISLASTCN;
1220 #if __FreeBSD_version < 1400068
1221 cnp->cn_flags |= SAVENAME;
1222 #endif
1223 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1224 cnp->cn_cred = kcred;
1225 #if __FreeBSD_version < 1400037
1226 cnp->cn_thread = curthread;
1227 #endif
1229 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1230 struct vop_lookup_args a;
1232 a.a_gen.a_desc = &vop_lookup_desc;
1233 a.a_dvp = ZTOV(dzp);
1234 a.a_vpp = vpp;
1235 a.a_cnp = cnp;
1236 error = vfs_cache_lookup(&a);
1237 } else {
1238 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1239 B_FALSE);
1241 #ifdef ZFS_DEBUG
1242 if (error) {
1243 printf("got error %d on name %s on op %d\n", error, name,
1244 nameiop);
1245 kdb_backtrace();
1247 #endif
1248 return (error);
1252 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1254 vnode_t *vp;
1255 int error;
1256 struct componentname cn;
1258 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1259 return (error);
1261 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1262 vput(vp);
1263 return (error);
1266 * Create a new directory and insert it into dvp using the name
1267 * provided. Return a pointer to the inserted directory.
1269 * IN: dvp - vnode of directory to add subdir to.
1270 * dirname - name of new directory.
1271 * vap - attributes of new directory.
1272 * cr - credentials of caller.
1273 * ct - caller context
1274 * flags - case flags
1275 * vsecp - ACL to be set
1276 * mnt_ns - Unused on FreeBSD
1278 * OUT: vpp - vnode of created directory.
1280 * RETURN: 0 on success, error code on failure.
1282 * Timestamps:
1283 * dvp - ctime|mtime updated
1284 * vp - ctime|mtime|atime updated
1287 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1288 cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1290 (void) flags, (void) vsecp;
1291 znode_t *zp;
1292 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1293 zilog_t *zilog;
1294 uint64_t txtype;
1295 dmu_tx_t *tx;
1296 int error;
1297 uid_t uid = crgetuid(cr);
1298 gid_t gid = crgetgid(cr);
1299 zfs_acl_ids_t acl_ids;
1300 boolean_t fuid_dirtied;
1302 ASSERT3U(vap->va_type, ==, VDIR);
1305 * If we have an ephemeral id, ACL, or XVATTR then
1306 * make sure file system is at proper version
1308 if (zfsvfs->z_use_fuids == B_FALSE &&
1309 ((vap->va_mask & AT_XVATTR) ||
1310 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1311 return (SET_ERROR(EINVAL));
1313 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1314 return (error);
1315 zilog = zfsvfs->z_log;
1317 if (dzp->z_pflags & ZFS_XATTR) {
1318 zfs_exit(zfsvfs, FTAG);
1319 return (SET_ERROR(EINVAL));
1322 if (zfsvfs->z_utf8 && u8_validate(dirname,
1323 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1324 zfs_exit(zfsvfs, FTAG);
1325 return (SET_ERROR(EILSEQ));
1328 if (vap->va_mask & AT_XVATTR) {
1329 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1330 crgetuid(cr), cr, vap->va_type)) != 0) {
1331 zfs_exit(zfsvfs, FTAG);
1332 return (error);
1336 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1337 NULL, &acl_ids, NULL)) != 0) {
1338 zfs_exit(zfsvfs, FTAG);
1339 return (error);
1343 * First make sure the new directory doesn't exist.
1345 * Existence is checked first to make sure we don't return
1346 * EACCES instead of EEXIST which can cause some applications
1347 * to fail.
1349 *zpp = NULL;
1351 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1352 zfs_acl_ids_free(&acl_ids);
1353 zfs_exit(zfsvfs, FTAG);
1354 return (error);
1356 ASSERT3P(zp, ==, NULL);
1358 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
1359 mnt_ns))) {
1360 zfs_acl_ids_free(&acl_ids);
1361 zfs_exit(zfsvfs, FTAG);
1362 return (error);
1365 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1366 zfs_acl_ids_free(&acl_ids);
1367 zfs_exit(zfsvfs, FTAG);
1368 return (SET_ERROR(EDQUOT));
1372 * Add a new entry to the directory.
1374 getnewvnode_reserve();
1375 tx = dmu_tx_create(zfsvfs->z_os);
1376 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1377 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1378 fuid_dirtied = zfsvfs->z_fuid_dirty;
1379 if (fuid_dirtied)
1380 zfs_fuid_txhold(zfsvfs, tx);
1381 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1382 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1383 acl_ids.z_aclp->z_acl_bytes);
1386 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1387 ZFS_SA_BASE_ATTR_SIZE);
1389 error = dmu_tx_assign(tx, TXG_WAIT);
1390 if (error) {
1391 zfs_acl_ids_free(&acl_ids);
1392 dmu_tx_abort(tx);
1393 getnewvnode_drop_reserve();
1394 zfs_exit(zfsvfs, FTAG);
1395 return (error);
1399 * Create new node.
1401 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1404 * Now put new name in parent dir.
1406 error = zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1407 if (error != 0) {
1408 zfs_znode_delete(zp, tx);
1409 VOP_UNLOCK(ZTOV(zp));
1410 zrele(zp);
1411 goto out;
1414 if (fuid_dirtied)
1415 zfs_fuid_sync(zfsvfs, tx);
1417 *zpp = zp;
1419 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1420 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1421 acl_ids.z_fuidp, vap);
1423 out:
1424 zfs_acl_ids_free(&acl_ids);
1426 dmu_tx_commit(tx);
1428 getnewvnode_drop_reserve();
1430 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1431 zil_commit(zilog, 0);
1433 zfs_exit(zfsvfs, FTAG);
1434 return (error);
1438 * Remove a directory subdir entry. If the current working
1439 * directory is the same as the subdir to be removed, the
1440 * remove will fail.
1442 * IN: dvp - vnode of directory to remove from.
1443 * name - name of directory to be removed.
1444 * cwd - vnode of current working directory.
1445 * cr - credentials of caller.
1446 * ct - caller context
1447 * flags - case flags
1449 * RETURN: 0 on success, error code on failure.
1451 * Timestamps:
1452 * dvp - ctime|mtime updated
1454 static int
1455 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1457 znode_t *dzp = VTOZ(dvp);
1458 znode_t *zp = VTOZ(vp);
1459 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1460 zilog_t *zilog;
1461 dmu_tx_t *tx;
1462 int error;
1464 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1465 return (error);
1466 if ((error = zfs_verify_zp(zp)) != 0) {
1467 zfs_exit(zfsvfs, FTAG);
1468 return (error);
1470 zilog = zfsvfs->z_log;
1473 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1474 goto out;
1477 if (vp->v_type != VDIR) {
1478 error = SET_ERROR(ENOTDIR);
1479 goto out;
1482 vnevent_rmdir(vp, dvp, name, ct);
1484 tx = dmu_tx_create(zfsvfs->z_os);
1485 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1486 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1487 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1488 zfs_sa_upgrade_txholds(tx, zp);
1489 zfs_sa_upgrade_txholds(tx, dzp);
1490 dmu_tx_mark_netfree(tx);
1491 error = dmu_tx_assign(tx, TXG_WAIT);
1492 if (error) {
1493 dmu_tx_abort(tx);
1494 zfs_exit(zfsvfs, FTAG);
1495 return (error);
1498 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1500 if (error == 0) {
1501 uint64_t txtype = TX_RMDIR;
1502 zfs_log_remove(zilog, tx, txtype, dzp, name,
1503 ZFS_NO_OBJECT, B_FALSE);
1506 dmu_tx_commit(tx);
1508 if (zfsvfs->z_use_namecache)
1509 cache_vop_rmdir(dvp, vp);
1510 out:
1511 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1512 zil_commit(zilog, 0);
1514 zfs_exit(zfsvfs, FTAG);
1515 return (error);
1519 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1521 struct componentname cn;
1522 vnode_t *vp;
1523 int error;
1525 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1526 return (error);
1528 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1529 vput(vp);
1530 return (error);
1534 * Read as many directory entries as will fit into the provided
1535 * buffer from the given directory cursor position (specified in
1536 * the uio structure).
1538 * IN: vp - vnode of directory to read.
1539 * uio - structure supplying read location, range info,
1540 * and return buffer.
1541 * cr - credentials of caller.
1542 * ct - caller context
1544 * OUT: uio - updated offset and range, buffer filled.
1545 * eofp - set to true if end-of-file detected.
1546 * ncookies- number of entries in cookies
1547 * cookies - offsets to directory entries
1549 * RETURN: 0 on success, error code on failure.
1551 * Timestamps:
1552 * vp - atime updated
1554 * Note that the low 4 bits of the cookie returned by zap is always zero.
1555 * This allows us to use the low range for "special" directory entries:
1556 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1557 * we use the offset 2 for the '.zfs' directory.
1559 static int
1560 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1561 int *ncookies, cookie_t **cookies)
1563 znode_t *zp = VTOZ(vp);
1564 iovec_t *iovp;
1565 dirent64_t *odp;
1566 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1567 objset_t *os;
1568 caddr_t outbuf;
1569 size_t bufsize;
1570 zap_cursor_t zc;
1571 zap_attribute_t zap;
1572 uint_t bytes_wanted;
1573 uint64_t offset; /* must be unsigned; checks for < 1 */
1574 uint64_t parent;
1575 int local_eof;
1576 int outcount;
1577 int error;
1578 uint8_t prefetch;
1579 uint8_t type;
1580 int ncooks;
1581 cookie_t *cooks = NULL;
1583 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1584 return (error);
1586 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1587 &parent, sizeof (parent))) != 0) {
1588 zfs_exit(zfsvfs, FTAG);
1589 return (error);
1593 * If we are not given an eof variable,
1594 * use a local one.
1596 if (eofp == NULL)
1597 eofp = &local_eof;
1600 * Check for valid iov_len.
1602 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1603 zfs_exit(zfsvfs, FTAG);
1604 return (SET_ERROR(EINVAL));
1608 * Quit if directory has been removed (posix)
1610 if ((*eofp = zp->z_unlinked) != 0) {
1611 zfs_exit(zfsvfs, FTAG);
1612 return (0);
1615 error = 0;
1616 os = zfsvfs->z_os;
1617 offset = zfs_uio_offset(uio);
1618 prefetch = zp->z_zn_prefetch;
1621 * Initialize the iterator cursor.
1623 if (offset <= 3) {
1625 * Start iteration from the beginning of the directory.
1627 zap_cursor_init(&zc, os, zp->z_id);
1628 } else {
1630 * The offset is a serialized cursor.
1632 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1636 * Get space to change directory entries into fs independent format.
1638 iovp = GET_UIO_STRUCT(uio)->uio_iov;
1639 bytes_wanted = iovp->iov_len;
1640 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1641 bufsize = bytes_wanted;
1642 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1643 odp = (struct dirent64 *)outbuf;
1644 } else {
1645 bufsize = bytes_wanted;
1646 outbuf = NULL;
1647 odp = (struct dirent64 *)iovp->iov_base;
1650 if (ncookies != NULL) {
1652 * Minimum entry size is dirent size and 1 byte for a file name.
1654 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1655 sizeof (((struct dirent *)NULL)->d_name) + 1);
1656 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1657 *cookies = cooks;
1658 *ncookies = ncooks;
1662 * Transform to file-system independent format
1664 outcount = 0;
1665 while (outcount < bytes_wanted) {
1666 ino64_t objnum;
1667 ushort_t reclen;
1668 off64_t *next = NULL;
1671 * Special case `.', `..', and `.zfs'.
1673 if (offset == 0) {
1674 (void) strcpy(zap.za_name, ".");
1675 zap.za_normalization_conflict = 0;
1676 objnum = zp->z_id;
1677 type = DT_DIR;
1678 } else if (offset == 1) {
1679 (void) strcpy(zap.za_name, "..");
1680 zap.za_normalization_conflict = 0;
1681 objnum = parent;
1682 type = DT_DIR;
1683 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1684 (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
1685 zap.za_normalization_conflict = 0;
1686 objnum = ZFSCTL_INO_ROOT;
1687 type = DT_DIR;
1688 } else {
1690 * Grab next entry.
1692 if ((error = zap_cursor_retrieve(&zc, &zap))) {
1693 if ((*eofp = (error == ENOENT)) != 0)
1694 break;
1695 else
1696 goto update;
1699 if (zap.za_integer_length != 8 ||
1700 zap.za_num_integers != 1) {
1701 cmn_err(CE_WARN, "zap_readdir: bad directory "
1702 "entry, obj = %lld, offset = %lld\n",
1703 (u_longlong_t)zp->z_id,
1704 (u_longlong_t)offset);
1705 error = SET_ERROR(ENXIO);
1706 goto update;
1709 objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
1711 * MacOS X can extract the object type here such as:
1712 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1714 type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1717 reclen = DIRENT64_RECLEN(strlen(zap.za_name));
1720 * Will this entry fit in the buffer?
1722 if (outcount + reclen > bufsize) {
1724 * Did we manage to fit anything in the buffer?
1726 if (!outcount) {
1727 error = SET_ERROR(EINVAL);
1728 goto update;
1730 break;
1733 * Add normal entry:
1735 odp->d_ino = objnum;
1736 odp->d_reclen = reclen;
1737 odp->d_namlen = strlen(zap.za_name);
1738 /* NOTE: d_off is the offset for the *next* entry. */
1739 next = &odp->d_off;
1740 strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
1741 odp->d_type = type;
1742 dirent_terminate(odp);
1743 odp = (dirent64_t *)((intptr_t)odp + reclen);
1745 outcount += reclen;
1747 ASSERT3S(outcount, <=, bufsize);
1749 if (prefetch)
1750 dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
1753 * Move to the next entry, fill in the previous offset.
1755 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1756 zap_cursor_advance(&zc);
1757 offset = zap_cursor_serialize(&zc);
1758 } else {
1759 offset += 1;
1762 /* Fill the offset right after advancing the cursor. */
1763 if (next != NULL)
1764 *next = offset;
1765 if (cooks != NULL) {
1766 *cooks++ = offset;
1767 ncooks--;
1768 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1771 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1773 /* Subtract unused cookies */
1774 if (ncookies != NULL)
1775 *ncookies -= ncooks;
1777 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1778 iovp->iov_base += outcount;
1779 iovp->iov_len -= outcount;
1780 zfs_uio_resid(uio) -= outcount;
1781 } else if ((error =
1782 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1784 * Reset the pointer.
1786 offset = zfs_uio_offset(uio);
1789 update:
1790 zap_cursor_fini(&zc);
1791 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1792 kmem_free(outbuf, bufsize);
1794 if (error == ENOENT)
1795 error = 0;
1797 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1799 zfs_uio_setoffset(uio, offset);
1800 zfs_exit(zfsvfs, FTAG);
1801 if (error != 0 && cookies != NULL) {
1802 free(*cookies, M_TEMP);
1803 *cookies = NULL;
1804 *ncookies = 0;
1806 return (error);
1810 * Get the requested file attributes and place them in the provided
1811 * vattr structure.
1813 * IN: vp - vnode of file.
1814 * vap - va_mask identifies requested attributes.
1815 * If AT_XVATTR set, then optional attrs are requested
1816 * flags - ATTR_NOACLCHECK (CIFS server context)
1817 * cr - credentials of caller.
1819 * OUT: vap - attribute values.
1821 * RETURN: 0 (always succeeds).
1823 static int
1824 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1826 znode_t *zp = VTOZ(vp);
1827 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1828 int error = 0;
1829 uint32_t blksize;
1830 u_longlong_t nblocks;
1831 uint64_t mtime[2], ctime[2], crtime[2], rdev;
1832 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
1833 xoptattr_t *xoap = NULL;
1834 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1835 sa_bulk_attr_t bulk[4];
1836 int count = 0;
1838 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1839 return (error);
1841 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1843 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1844 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1845 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1846 if (vp->v_type == VBLK || vp->v_type == VCHR)
1847 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
1848 &rdev, 8);
1850 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
1851 zfs_exit(zfsvfs, FTAG);
1852 return (error);
1856 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
1857 * Also, if we are the owner don't bother, since owner should
1858 * always be allowed to read basic attributes of file.
1860 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
1861 (vap->va_uid != crgetuid(cr))) {
1862 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
1863 skipaclchk, cr, NULL))) {
1864 zfs_exit(zfsvfs, FTAG);
1865 return (error);
1870 * Return all attributes. It's cheaper to provide the answer
1871 * than to determine whether we were asked the question.
1874 vap->va_type = IFTOVT(zp->z_mode);
1875 vap->va_mode = zp->z_mode & ~S_IFMT;
1876 vn_fsid(vp, vap);
1877 vap->va_nodeid = zp->z_id;
1878 vap->va_nlink = zp->z_links;
1879 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
1880 zp->z_links < ZFS_LINK_MAX)
1881 vap->va_nlink++;
1882 vap->va_size = zp->z_size;
1883 if (vp->v_type == VBLK || vp->v_type == VCHR)
1884 vap->va_rdev = zfs_cmpldev(rdev);
1885 else
1886 vap->va_rdev = 0;
1887 vap->va_gen = zp->z_gen;
1888 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
1889 vap->va_filerev = zp->z_seq;
1892 * Add in any requested optional attributes and the create time.
1893 * Also set the corresponding bits in the returned attribute bitmap.
1895 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
1896 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1897 xoap->xoa_archive =
1898 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
1899 XVA_SET_RTN(xvap, XAT_ARCHIVE);
1902 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1903 xoap->xoa_readonly =
1904 ((zp->z_pflags & ZFS_READONLY) != 0);
1905 XVA_SET_RTN(xvap, XAT_READONLY);
1908 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1909 xoap->xoa_system =
1910 ((zp->z_pflags & ZFS_SYSTEM) != 0);
1911 XVA_SET_RTN(xvap, XAT_SYSTEM);
1914 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1915 xoap->xoa_hidden =
1916 ((zp->z_pflags & ZFS_HIDDEN) != 0);
1917 XVA_SET_RTN(xvap, XAT_HIDDEN);
1920 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1921 xoap->xoa_nounlink =
1922 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
1923 XVA_SET_RTN(xvap, XAT_NOUNLINK);
1926 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1927 xoap->xoa_immutable =
1928 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
1929 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1932 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1933 xoap->xoa_appendonly =
1934 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
1935 XVA_SET_RTN(xvap, XAT_APPENDONLY);
1938 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1939 xoap->xoa_nodump =
1940 ((zp->z_pflags & ZFS_NODUMP) != 0);
1941 XVA_SET_RTN(xvap, XAT_NODUMP);
1944 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1945 xoap->xoa_opaque =
1946 ((zp->z_pflags & ZFS_OPAQUE) != 0);
1947 XVA_SET_RTN(xvap, XAT_OPAQUE);
1950 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1951 xoap->xoa_av_quarantined =
1952 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
1953 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1956 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1957 xoap->xoa_av_modified =
1958 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
1959 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1962 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
1963 vp->v_type == VREG) {
1964 zfs_sa_get_scanstamp(zp, xvap);
1967 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1968 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
1969 XVA_SET_RTN(xvap, XAT_REPARSE);
1971 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
1972 xoap->xoa_generation = zp->z_gen;
1973 XVA_SET_RTN(xvap, XAT_GEN);
1976 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1977 xoap->xoa_offline =
1978 ((zp->z_pflags & ZFS_OFFLINE) != 0);
1979 XVA_SET_RTN(xvap, XAT_OFFLINE);
1982 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1983 xoap->xoa_sparse =
1984 ((zp->z_pflags & ZFS_SPARSE) != 0);
1985 XVA_SET_RTN(xvap, XAT_SPARSE);
1988 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
1989 xoap->xoa_projinherit =
1990 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
1991 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
1994 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
1995 xoap->xoa_projid = zp->z_projid;
1996 XVA_SET_RTN(xvap, XAT_PROJID);
2000 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2001 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2002 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2003 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2006 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2007 vap->va_blksize = blksize;
2008 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2010 if (zp->z_blksz == 0) {
2012 * Block size hasn't been set; suggest maximal I/O transfers.
2014 vap->va_blksize = zfsvfs->z_max_blksz;
2017 zfs_exit(zfsvfs, FTAG);
2018 return (0);
2022 * Set the file attributes to the values contained in the
2023 * vattr structure.
2025 * IN: zp - znode of file to be modified.
2026 * vap - new attribute values.
2027 * If AT_XVATTR set, then optional attrs are being set
2028 * flags - ATTR_UTIME set if non-default time values provided.
2029 * - ATTR_NOACLCHECK (CIFS context only).
2030 * cr - credentials of caller.
2031 * mnt_ns - Unused on FreeBSD
2033 * RETURN: 0 on success, error code on failure.
2035 * Timestamps:
2036 * vp - ctime updated, mtime updated if size changed.
2039 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
2041 vnode_t *vp = ZTOV(zp);
2042 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2043 objset_t *os;
2044 zilog_t *zilog;
2045 dmu_tx_t *tx;
2046 vattr_t oldva;
2047 xvattr_t tmpxvattr;
2048 uint_t mask = vap->va_mask;
2049 uint_t saved_mask = 0;
2050 uint64_t saved_mode;
2051 int trim_mask = 0;
2052 uint64_t new_mode;
2053 uint64_t new_uid, new_gid;
2054 uint64_t xattr_obj;
2055 uint64_t mtime[2], ctime[2];
2056 uint64_t projid = ZFS_INVALID_PROJID;
2057 znode_t *attrzp;
2058 int need_policy = FALSE;
2059 int err, err2;
2060 zfs_fuid_info_t *fuidp = NULL;
2061 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2062 xoptattr_t *xoap;
2063 zfs_acl_t *aclp;
2064 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2065 boolean_t fuid_dirtied = B_FALSE;
2066 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2067 int count = 0, xattr_count = 0;
2069 if (mask == 0)
2070 return (0);
2072 if (mask & AT_NOSET)
2073 return (SET_ERROR(EINVAL));
2075 if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
2076 return (err);
2078 os = zfsvfs->z_os;
2079 zilog = zfsvfs->z_log;
2082 * Make sure that if we have ephemeral uid/gid or xvattr specified
2083 * that file system is at proper version level
2086 if (zfsvfs->z_use_fuids == B_FALSE &&
2087 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2088 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2089 (mask & AT_XVATTR))) {
2090 zfs_exit(zfsvfs, FTAG);
2091 return (SET_ERROR(EINVAL));
2094 if (mask & AT_SIZE && vp->v_type == VDIR) {
2095 zfs_exit(zfsvfs, FTAG);
2096 return (SET_ERROR(EISDIR));
2099 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2100 zfs_exit(zfsvfs, FTAG);
2101 return (SET_ERROR(EINVAL));
2105 * If this is an xvattr_t, then get a pointer to the structure of
2106 * optional attributes. If this is NULL, then we have a vattr_t.
2108 xoap = xva_getxoptattr(xvap);
2110 xva_init(&tmpxvattr);
2113 * Immutable files can only alter immutable bit and atime
2115 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2116 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2117 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2118 zfs_exit(zfsvfs, FTAG);
2119 return (SET_ERROR(EPERM));
2123 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2127 * Verify timestamps doesn't overflow 32 bits.
2128 * ZFS can handle large timestamps, but 32bit syscalls can't
2129 * handle times greater than 2039. This check should be removed
2130 * once large timestamps are fully supported.
2132 if (mask & (AT_ATIME | AT_MTIME)) {
2133 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2134 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2135 zfs_exit(zfsvfs, FTAG);
2136 return (SET_ERROR(EOVERFLOW));
2139 if (xoap != NULL && (mask & AT_XVATTR)) {
2140 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2141 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2142 zfs_exit(zfsvfs, FTAG);
2143 return (SET_ERROR(EOVERFLOW));
2146 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2147 if (!dmu_objset_projectquota_enabled(os) ||
2148 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2149 zfs_exit(zfsvfs, FTAG);
2150 return (SET_ERROR(EOPNOTSUPP));
2153 projid = xoap->xoa_projid;
2154 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2155 zfs_exit(zfsvfs, FTAG);
2156 return (SET_ERROR(EINVAL));
2159 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2160 projid = ZFS_INVALID_PROJID;
2161 else
2162 need_policy = TRUE;
2165 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2166 (xoap->xoa_projinherit !=
2167 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2168 (!dmu_objset_projectquota_enabled(os) ||
2169 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2170 zfs_exit(zfsvfs, FTAG);
2171 return (SET_ERROR(EOPNOTSUPP));
2175 attrzp = NULL;
2176 aclp = NULL;
2178 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2179 zfs_exit(zfsvfs, FTAG);
2180 return (SET_ERROR(EROFS));
2184 * First validate permissions
2187 if (mask & AT_SIZE) {
2189 * XXX - Note, we are not providing any open
2190 * mode flags here (like FNDELAY), so we may
2191 * block if there are locks present... this
2192 * should be addressed in openat().
2194 /* XXX - would it be OK to generate a log record here? */
2195 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2196 if (err) {
2197 zfs_exit(zfsvfs, FTAG);
2198 return (err);
2202 if (mask & (AT_ATIME|AT_MTIME) ||
2203 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2204 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2205 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2206 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2207 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2208 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2209 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2210 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2211 skipaclchk, cr, mnt_ns);
2214 if (mask & (AT_UID|AT_GID)) {
2215 int idmask = (mask & (AT_UID|AT_GID));
2216 int take_owner;
2217 int take_group;
2220 * NOTE: even if a new mode is being set,
2221 * we may clear S_ISUID/S_ISGID bits.
2224 if (!(mask & AT_MODE))
2225 vap->va_mode = zp->z_mode;
2228 * Take ownership or chgrp to group we are a member of
2231 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2232 take_group = (mask & AT_GID) &&
2233 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2236 * If both AT_UID and AT_GID are set then take_owner and
2237 * take_group must both be set in order to allow taking
2238 * ownership.
2240 * Otherwise, send the check through secpolicy_vnode_setattr()
2244 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2245 ((idmask == AT_UID) && take_owner) ||
2246 ((idmask == AT_GID) && take_group)) {
2247 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2248 skipaclchk, cr, mnt_ns) == 0) {
2250 * Remove setuid/setgid for non-privileged users
2252 secpolicy_setid_clear(vap, vp, cr);
2253 trim_mask = (mask & (AT_UID|AT_GID));
2254 } else {
2255 need_policy = TRUE;
2257 } else {
2258 need_policy = TRUE;
2262 oldva.va_mode = zp->z_mode;
2263 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2264 if (mask & AT_XVATTR) {
2266 * Update xvattr mask to include only those attributes
2267 * that are actually changing.
2269 * the bits will be restored prior to actually setting
2270 * the attributes so the caller thinks they were set.
2272 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2273 if (xoap->xoa_appendonly !=
2274 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2275 need_policy = TRUE;
2276 } else {
2277 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2278 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2282 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2283 if (xoap->xoa_projinherit !=
2284 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2285 need_policy = TRUE;
2286 } else {
2287 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2288 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2292 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2293 if (xoap->xoa_nounlink !=
2294 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2295 need_policy = TRUE;
2296 } else {
2297 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2298 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2302 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2303 if (xoap->xoa_immutable !=
2304 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2305 need_policy = TRUE;
2306 } else {
2307 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2308 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2312 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2313 if (xoap->xoa_nodump !=
2314 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2315 need_policy = TRUE;
2316 } else {
2317 XVA_CLR_REQ(xvap, XAT_NODUMP);
2318 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2322 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2323 if (xoap->xoa_av_modified !=
2324 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2325 need_policy = TRUE;
2326 } else {
2327 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2328 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2332 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2333 if ((vp->v_type != VREG &&
2334 xoap->xoa_av_quarantined) ||
2335 xoap->xoa_av_quarantined !=
2336 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2337 need_policy = TRUE;
2338 } else {
2339 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2340 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2344 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2345 zfs_exit(zfsvfs, FTAG);
2346 return (SET_ERROR(EPERM));
2349 if (need_policy == FALSE &&
2350 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2351 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2352 need_policy = TRUE;
2356 if (mask & AT_MODE) {
2357 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
2358 mnt_ns) == 0) {
2359 err = secpolicy_setid_setsticky_clear(vp, vap,
2360 &oldva, cr);
2361 if (err) {
2362 zfs_exit(zfsvfs, FTAG);
2363 return (err);
2365 trim_mask |= AT_MODE;
2366 } else {
2367 need_policy = TRUE;
2371 if (need_policy) {
2373 * If trim_mask is set then take ownership
2374 * has been granted or write_acl is present and user
2375 * has the ability to modify mode. In that case remove
2376 * UID|GID and or MODE from mask so that
2377 * secpolicy_vnode_setattr() doesn't revoke it.
2380 if (trim_mask) {
2381 saved_mask = vap->va_mask;
2382 vap->va_mask &= ~trim_mask;
2383 if (trim_mask & AT_MODE) {
2385 * Save the mode, as secpolicy_vnode_setattr()
2386 * will overwrite it with ova.va_mode.
2388 saved_mode = vap->va_mode;
2391 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2392 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2393 if (err) {
2394 zfs_exit(zfsvfs, FTAG);
2395 return (err);
2398 if (trim_mask) {
2399 vap->va_mask |= saved_mask;
2400 if (trim_mask & AT_MODE) {
2402 * Recover the mode after
2403 * secpolicy_vnode_setattr().
2405 vap->va_mode = saved_mode;
2411 * secpolicy_vnode_setattr, or take ownership may have
2412 * changed va_mask
2414 mask = vap->va_mask;
2416 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2417 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2418 &xattr_obj, sizeof (xattr_obj));
2420 if (err == 0 && xattr_obj) {
2421 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2422 if (err == 0) {
2423 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2424 if (err != 0)
2425 vrele(ZTOV(attrzp));
2427 if (err)
2428 goto out2;
2430 if (mask & AT_UID) {
2431 new_uid = zfs_fuid_create(zfsvfs,
2432 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2433 if (new_uid != zp->z_uid &&
2434 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2435 new_uid)) {
2436 if (attrzp)
2437 vput(ZTOV(attrzp));
2438 err = SET_ERROR(EDQUOT);
2439 goto out2;
2443 if (mask & AT_GID) {
2444 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2445 cr, ZFS_GROUP, &fuidp);
2446 if (new_gid != zp->z_gid &&
2447 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2448 new_gid)) {
2449 if (attrzp)
2450 vput(ZTOV(attrzp));
2451 err = SET_ERROR(EDQUOT);
2452 goto out2;
2456 if (projid != ZFS_INVALID_PROJID &&
2457 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2458 if (attrzp)
2459 vput(ZTOV(attrzp));
2460 err = SET_ERROR(EDQUOT);
2461 goto out2;
2464 tx = dmu_tx_create(os);
2466 if (mask & AT_MODE) {
2467 uint64_t pmode = zp->z_mode;
2468 uint64_t acl_obj;
2469 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2471 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2472 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2473 err = SET_ERROR(EPERM);
2474 goto out;
2477 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2478 goto out;
2480 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2482 * Are we upgrading ACL from old V0 format
2483 * to V1 format?
2485 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2486 zfs_znode_acl_version(zp) ==
2487 ZFS_ACL_VERSION_INITIAL) {
2488 dmu_tx_hold_free(tx, acl_obj, 0,
2489 DMU_OBJECT_END);
2490 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2491 0, aclp->z_acl_bytes);
2492 } else {
2493 dmu_tx_hold_write(tx, acl_obj, 0,
2494 aclp->z_acl_bytes);
2496 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2497 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2498 0, aclp->z_acl_bytes);
2500 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2501 } else {
2502 if (((mask & AT_XVATTR) &&
2503 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2504 (projid != ZFS_INVALID_PROJID &&
2505 !(zp->z_pflags & ZFS_PROJID)))
2506 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2507 else
2508 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2511 if (attrzp) {
2512 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2515 fuid_dirtied = zfsvfs->z_fuid_dirty;
2516 if (fuid_dirtied)
2517 zfs_fuid_txhold(zfsvfs, tx);
2519 zfs_sa_upgrade_txholds(tx, zp);
2521 err = dmu_tx_assign(tx, TXG_WAIT);
2522 if (err)
2523 goto out;
2525 count = 0;
2527 * Set each attribute requested.
2528 * We group settings according to the locks they need to acquire.
2530 * Note: you cannot set ctime directly, although it will be
2531 * updated as a side-effect of calling this function.
2534 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2536 * For the existed object that is upgraded from old system,
2537 * its on-disk layout has no slot for the project ID attribute.
2538 * But quota accounting logic needs to access related slots by
2539 * offset directly. So we need to adjust old objects' layout
2540 * to make the project ID to some unified and fixed offset.
2542 if (attrzp)
2543 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2544 if (err == 0)
2545 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2547 if (unlikely(err == EEXIST))
2548 err = 0;
2549 else if (err != 0)
2550 goto out;
2551 else
2552 projid = ZFS_INVALID_PROJID;
2555 if (mask & (AT_UID|AT_GID|AT_MODE))
2556 mutex_enter(&zp->z_acl_lock);
2558 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2559 &zp->z_pflags, sizeof (zp->z_pflags));
2561 if (attrzp) {
2562 if (mask & (AT_UID|AT_GID|AT_MODE))
2563 mutex_enter(&attrzp->z_acl_lock);
2564 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2565 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2566 sizeof (attrzp->z_pflags));
2567 if (projid != ZFS_INVALID_PROJID) {
2568 attrzp->z_projid = projid;
2569 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2570 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2571 sizeof (attrzp->z_projid));
2575 if (mask & (AT_UID|AT_GID)) {
2577 if (mask & AT_UID) {
2578 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2579 &new_uid, sizeof (new_uid));
2580 zp->z_uid = new_uid;
2581 if (attrzp) {
2582 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2583 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2584 sizeof (new_uid));
2585 attrzp->z_uid = new_uid;
2589 if (mask & AT_GID) {
2590 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2591 NULL, &new_gid, sizeof (new_gid));
2592 zp->z_gid = new_gid;
2593 if (attrzp) {
2594 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2595 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2596 sizeof (new_gid));
2597 attrzp->z_gid = new_gid;
2600 if (!(mask & AT_MODE)) {
2601 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2602 NULL, &new_mode, sizeof (new_mode));
2603 new_mode = zp->z_mode;
2605 err = zfs_acl_chown_setattr(zp);
2606 ASSERT0(err);
2607 if (attrzp) {
2608 vn_seqc_write_begin(ZTOV(attrzp));
2609 err = zfs_acl_chown_setattr(attrzp);
2610 vn_seqc_write_end(ZTOV(attrzp));
2611 ASSERT0(err);
2615 if (mask & AT_MODE) {
2616 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2617 &new_mode, sizeof (new_mode));
2618 zp->z_mode = new_mode;
2619 ASSERT3P(aclp, !=, NULL);
2620 err = zfs_aclset_common(zp, aclp, cr, tx);
2621 ASSERT0(err);
2622 if (zp->z_acl_cached)
2623 zfs_acl_free(zp->z_acl_cached);
2624 zp->z_acl_cached = aclp;
2625 aclp = NULL;
2629 if (mask & AT_ATIME) {
2630 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2631 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2632 &zp->z_atime, sizeof (zp->z_atime));
2635 if (mask & AT_MTIME) {
2636 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2637 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2638 mtime, sizeof (mtime));
2641 if (projid != ZFS_INVALID_PROJID) {
2642 zp->z_projid = projid;
2643 SA_ADD_BULK_ATTR(bulk, count,
2644 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2645 sizeof (zp->z_projid));
2648 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2649 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2650 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2651 NULL, mtime, sizeof (mtime));
2652 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2653 &ctime, sizeof (ctime));
2654 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2655 } else if (mask != 0) {
2656 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2657 &ctime, sizeof (ctime));
2658 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2659 if (attrzp) {
2660 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2661 SA_ZPL_CTIME(zfsvfs), NULL,
2662 &ctime, sizeof (ctime));
2663 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2664 mtime, ctime);
2669 * Do this after setting timestamps to prevent timestamp
2670 * update from toggling bit
2673 if (xoap && (mask & AT_XVATTR)) {
2675 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2676 xoap->xoa_createtime = vap->va_birthtime;
2678 * restore trimmed off masks
2679 * so that return masks can be set for caller.
2682 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2683 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2685 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2686 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2688 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2689 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2691 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2692 XVA_SET_REQ(xvap, XAT_NODUMP);
2694 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2695 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2697 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2698 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2700 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2701 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2704 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2705 ASSERT3S(vp->v_type, ==, VREG);
2707 zfs_xvattr_set(zp, xvap, tx);
2710 if (fuid_dirtied)
2711 zfs_fuid_sync(zfsvfs, tx);
2713 if (mask != 0)
2714 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2716 if (mask & (AT_UID|AT_GID|AT_MODE))
2717 mutex_exit(&zp->z_acl_lock);
2719 if (attrzp) {
2720 if (mask & (AT_UID|AT_GID|AT_MODE))
2721 mutex_exit(&attrzp->z_acl_lock);
2723 out:
2724 if (err == 0 && attrzp) {
2725 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2726 xattr_count, tx);
2727 ASSERT0(err2);
2730 if (attrzp)
2731 vput(ZTOV(attrzp));
2733 if (aclp)
2734 zfs_acl_free(aclp);
2736 if (fuidp) {
2737 zfs_fuid_info_free(fuidp);
2738 fuidp = NULL;
2741 if (err) {
2742 dmu_tx_abort(tx);
2743 } else {
2744 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2745 dmu_tx_commit(tx);
2748 out2:
2749 if (os->os_sync == ZFS_SYNC_ALWAYS)
2750 zil_commit(zilog, 0);
2752 zfs_exit(zfsvfs, FTAG);
2753 return (err);
2757 * Look up the directory entries corresponding to the source and target
2758 * directory/name pairs.
2760 static int
2761 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2762 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2763 znode_t **tzpp)
2765 zfsvfs_t *zfsvfs;
2766 znode_t *szp, *tzp;
2767 int error;
2770 * Before using sdzp and tdzp we must ensure that they are live.
2771 * As a porting legacy from illumos we have two things to worry
2772 * about. One is typical for FreeBSD and it is that the vnode is
2773 * not reclaimed (doomed). The other is that the znode is live.
2774 * The current code can invalidate the znode without acquiring the
2775 * corresponding vnode lock if the object represented by the znode
2776 * and vnode is no longer valid after a rollback or receive operation.
2777 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
2778 * that protects the znodes from the invalidation.
2780 zfsvfs = sdzp->z_zfsvfs;
2781 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2782 if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
2783 return (error);
2784 if ((error = zfs_verify_zp(tdzp)) != 0) {
2785 zfs_exit(zfsvfs, FTAG);
2786 return (error);
2790 * Re-resolve svp to be certain it still exists and fetch the
2791 * correct vnode.
2793 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2794 if (error != 0) {
2795 /* Source entry invalid or not there. */
2796 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2797 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2798 error = SET_ERROR(EINVAL);
2799 goto out;
2801 *szpp = szp;
2804 * Re-resolve tvp, if it disappeared we just carry on.
2806 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2807 if (error != 0) {
2808 vrele(ZTOV(szp));
2809 if ((tcnp->cn_flags & ISDOTDOT) != 0)
2810 error = SET_ERROR(EINVAL);
2811 goto out;
2813 *tzpp = tzp;
2814 out:
2815 zfs_exit(zfsvfs, FTAG);
2816 return (error);
2820 * We acquire all but fdvp locks using non-blocking acquisitions. If we
2821 * fail to acquire any lock in the path we will drop all held locks,
2822 * acquire the new lock in a blocking fashion, and then release it and
2823 * restart the rename. This acquire/release step ensures that we do not
2824 * spin on a lock waiting for release. On error release all vnode locks
2825 * and decrement references the way tmpfs_rename() would do.
2827 static int
2828 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2829 struct vnode *tdvp, struct vnode **tvpp,
2830 const struct componentname *scnp, const struct componentname *tcnp)
2832 struct vnode *nvp, *svp, *tvp;
2833 znode_t *sdzp, *tdzp, *szp, *tzp;
2834 int error;
2836 VOP_UNLOCK(tdvp);
2837 if (*tvpp != NULL && *tvpp != tdvp)
2838 VOP_UNLOCK(*tvpp);
2840 relock:
2841 error = vn_lock(sdvp, LK_EXCLUSIVE);
2842 if (error)
2843 goto out;
2844 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
2845 if (error != 0) {
2846 VOP_UNLOCK(sdvp);
2847 if (error != EBUSY)
2848 goto out;
2849 error = vn_lock(tdvp, LK_EXCLUSIVE);
2850 if (error)
2851 goto out;
2852 VOP_UNLOCK(tdvp);
2853 goto relock;
2855 tdzp = VTOZ(tdvp);
2856 sdzp = VTOZ(sdvp);
2858 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
2859 if (error != 0) {
2860 VOP_UNLOCK(sdvp);
2861 VOP_UNLOCK(tdvp);
2862 goto out;
2864 svp = ZTOV(szp);
2865 tvp = tzp != NULL ? ZTOV(tzp) : NULL;
2868 * Now try acquire locks on svp and tvp.
2870 nvp = svp;
2871 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2872 if (error != 0) {
2873 VOP_UNLOCK(sdvp);
2874 VOP_UNLOCK(tdvp);
2875 if (tvp != NULL)
2876 vrele(tvp);
2877 if (error != EBUSY) {
2878 vrele(nvp);
2879 goto out;
2881 error = vn_lock(nvp, LK_EXCLUSIVE);
2882 if (error != 0) {
2883 vrele(nvp);
2884 goto out;
2886 VOP_UNLOCK(nvp);
2888 * Concurrent rename race.
2889 * XXX ?
2891 if (nvp == tdvp) {
2892 vrele(nvp);
2893 error = SET_ERROR(EINVAL);
2894 goto out;
2896 vrele(*svpp);
2897 *svpp = nvp;
2898 goto relock;
2900 vrele(*svpp);
2901 *svpp = nvp;
2903 if (*tvpp != NULL)
2904 vrele(*tvpp);
2905 *tvpp = NULL;
2906 if (tvp != NULL) {
2907 nvp = tvp;
2908 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2909 if (error != 0) {
2910 VOP_UNLOCK(sdvp);
2911 VOP_UNLOCK(tdvp);
2912 VOP_UNLOCK(*svpp);
2913 if (error != EBUSY) {
2914 vrele(nvp);
2915 goto out;
2917 error = vn_lock(nvp, LK_EXCLUSIVE);
2918 if (error != 0) {
2919 vrele(nvp);
2920 goto out;
2922 vput(nvp);
2923 goto relock;
2925 *tvpp = nvp;
2928 return (0);
2930 out:
2931 return (error);
2935 * Note that we must use VRELE_ASYNC in this function as it walks
2936 * up the directory tree and vrele may need to acquire an exclusive
2937 * lock if a last reference to a vnode is dropped.
2939 static int
2940 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
2942 zfsvfs_t *zfsvfs;
2943 znode_t *zp, *zp1;
2944 uint64_t parent;
2945 int error;
2947 zfsvfs = tdzp->z_zfsvfs;
2948 if (tdzp == szp)
2949 return (SET_ERROR(EINVAL));
2950 if (tdzp == sdzp)
2951 return (0);
2952 if (tdzp->z_id == zfsvfs->z_root)
2953 return (0);
2954 zp = tdzp;
2955 for (;;) {
2956 ASSERT(!zp->z_unlinked);
2957 if ((error = sa_lookup(zp->z_sa_hdl,
2958 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
2959 break;
2961 if (parent == szp->z_id) {
2962 error = SET_ERROR(EINVAL);
2963 break;
2965 if (parent == zfsvfs->z_root)
2966 break;
2967 if (parent == sdzp->z_id)
2968 break;
2970 error = zfs_zget(zfsvfs, parent, &zp1);
2971 if (error != 0)
2972 break;
2974 if (zp != tdzp)
2975 VN_RELE_ASYNC(ZTOV(zp),
2976 dsl_pool_zrele_taskq(
2977 dmu_objset_pool(zfsvfs->z_os)));
2978 zp = zp1;
2981 if (error == ENOTDIR)
2982 panic("checkpath: .. not a directory\n");
2983 if (zp != tdzp)
2984 VN_RELE_ASYNC(ZTOV(zp),
2985 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
2986 return (error);
2989 static int
2990 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
2991 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
2992 cred_t *cr);
2995 * Move an entry from the provided source directory to the target
2996 * directory. Change the entry name as indicated.
2998 * IN: sdvp - Source directory containing the "old entry".
2999 * scnp - Old entry name.
3000 * tdvp - Target directory to contain the "new entry".
3001 * tcnp - New entry name.
3002 * cr - credentials of caller.
3003 * INOUT: svpp - Source file
3004 * tvpp - Target file, may point to NULL initially
3006 * RETURN: 0 on success, error code on failure.
3008 * Timestamps:
3009 * sdvp,tdvp - ctime|mtime updated
3011 static int
3012 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3013 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3014 cred_t *cr)
3016 int error;
3018 ASSERT_VOP_ELOCKED(tdvp, __func__);
3019 if (*tvpp != NULL)
3020 ASSERT_VOP_ELOCKED(*tvpp, __func__);
3022 /* Reject renames across filesystems. */
3023 if ((*svpp)->v_mount != tdvp->v_mount ||
3024 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3025 error = SET_ERROR(EXDEV);
3026 goto out;
3029 if (zfsctl_is_node(tdvp)) {
3030 error = SET_ERROR(EXDEV);
3031 goto out;
3035 * Lock all four vnodes to ensure safety and semantics of renaming.
3037 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3038 if (error != 0) {
3039 /* no vnodes are locked in the case of error here */
3040 return (error);
3043 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3044 VOP_UNLOCK(sdvp);
3045 VOP_UNLOCK(*svpp);
3046 out:
3047 if (*tvpp != NULL)
3048 VOP_UNLOCK(*tvpp);
3049 if (tdvp != *tvpp)
3050 VOP_UNLOCK(tdvp);
3052 return (error);
3055 static int
3056 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3057 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3058 cred_t *cr)
3060 dmu_tx_t *tx;
3061 zfsvfs_t *zfsvfs;
3062 zilog_t *zilog;
3063 znode_t *tdzp, *sdzp, *tzp, *szp;
3064 const char *snm = scnp->cn_nameptr;
3065 const char *tnm = tcnp->cn_nameptr;
3066 int error;
3068 tdzp = VTOZ(tdvp);
3069 sdzp = VTOZ(sdvp);
3070 zfsvfs = tdzp->z_zfsvfs;
3072 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3073 return (error);
3074 if ((error = zfs_verify_zp(sdzp)) != 0) {
3075 zfs_exit(zfsvfs, FTAG);
3076 return (error);
3078 zilog = zfsvfs->z_log;
3080 if (zfsvfs->z_utf8 && u8_validate(tnm,
3081 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3082 error = SET_ERROR(EILSEQ);
3083 goto out;
3086 /* If source and target are the same file, there is nothing to do. */
3087 if ((*svpp) == (*tvpp)) {
3088 error = 0;
3089 goto out;
3092 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3093 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3094 (*tvpp)->v_mountedhere != NULL)) {
3095 error = SET_ERROR(EXDEV);
3096 goto out;
3099 szp = VTOZ(*svpp);
3100 if ((error = zfs_verify_zp(szp)) != 0) {
3101 zfs_exit(zfsvfs, FTAG);
3102 return (error);
3104 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3105 if (tzp != NULL) {
3106 if ((error = zfs_verify_zp(tzp)) != 0) {
3107 zfs_exit(zfsvfs, FTAG);
3108 return (error);
3113 * This is to prevent the creation of links into attribute space
3114 * by renaming a linked file into/outof an attribute directory.
3115 * See the comment in zfs_link() for why this is considered bad.
3117 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3118 error = SET_ERROR(EINVAL);
3119 goto out;
3123 * If we are using project inheritance, means if the directory has
3124 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3125 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3126 * such case, we only allow renames into our tree when the project
3127 * IDs are the same.
3129 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3130 tdzp->z_projid != szp->z_projid) {
3131 error = SET_ERROR(EXDEV);
3132 goto out;
3136 * Must have write access at the source to remove the old entry
3137 * and write access at the target to create the new entry.
3138 * Note that if target and source are the same, this can be
3139 * done in a single check.
3141 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
3142 goto out;
3144 if ((*svpp)->v_type == VDIR) {
3146 * Avoid ".", "..", and aliases of "." for obvious reasons.
3148 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3149 sdzp == szp ||
3150 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3151 error = EINVAL;
3152 goto out;
3156 * Check to make sure rename is valid.
3157 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3159 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3160 goto out;
3164 * Does target exist?
3166 if (tzp) {
3168 * Source and target must be the same type.
3170 if ((*svpp)->v_type == VDIR) {
3171 if ((*tvpp)->v_type != VDIR) {
3172 error = SET_ERROR(ENOTDIR);
3173 goto out;
3174 } else {
3175 cache_purge(tdvp);
3176 if (sdvp != tdvp)
3177 cache_purge(sdvp);
3179 } else {
3180 if ((*tvpp)->v_type == VDIR) {
3181 error = SET_ERROR(EISDIR);
3182 goto out;
3187 vn_seqc_write_begin(*svpp);
3188 vn_seqc_write_begin(sdvp);
3189 if (*tvpp != NULL)
3190 vn_seqc_write_begin(*tvpp);
3191 if (tdvp != *tvpp)
3192 vn_seqc_write_begin(tdvp);
3194 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3195 if (tzp)
3196 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3199 * notify the target directory if it is not the same
3200 * as source directory.
3202 if (tdvp != sdvp) {
3203 vnevent_rename_dest_dir(tdvp, ct);
3206 tx = dmu_tx_create(zfsvfs->z_os);
3207 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3208 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3209 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3210 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3211 if (sdzp != tdzp) {
3212 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3213 zfs_sa_upgrade_txholds(tx, tdzp);
3215 if (tzp) {
3216 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3217 zfs_sa_upgrade_txholds(tx, tzp);
3220 zfs_sa_upgrade_txholds(tx, szp);
3221 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3222 error = dmu_tx_assign(tx, TXG_WAIT);
3223 if (error) {
3224 dmu_tx_abort(tx);
3225 goto out_seq;
3228 if (tzp) /* Attempt to remove the existing target */
3229 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3231 if (error == 0) {
3232 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3233 if (error == 0) {
3234 szp->z_pflags |= ZFS_AV_MODIFIED;
3236 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3237 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3238 ASSERT0(error);
3240 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3241 NULL);
3242 if (error == 0) {
3243 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3244 snm, tdzp, tnm, szp);
3245 } else {
3247 * At this point, we have successfully created
3248 * the target name, but have failed to remove
3249 * the source name. Since the create was done
3250 * with the ZRENAMING flag, there are
3251 * complications; for one, the link count is
3252 * wrong. The easiest way to deal with this
3253 * is to remove the newly created target, and
3254 * return the original error. This must
3255 * succeed; fortunately, it is very unlikely to
3256 * fail, since we just created it.
3258 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3259 ZRENAMING, NULL));
3262 if (error == 0) {
3263 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3267 dmu_tx_commit(tx);
3269 out_seq:
3270 vn_seqc_write_end(*svpp);
3271 vn_seqc_write_end(sdvp);
3272 if (*tvpp != NULL)
3273 vn_seqc_write_end(*tvpp);
3274 if (tdvp != *tvpp)
3275 vn_seqc_write_end(tdvp);
3277 out:
3278 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3279 zil_commit(zilog, 0);
3280 zfs_exit(zfsvfs, FTAG);
3282 return (error);
3286 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3287 cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
3289 struct componentname scn, tcn;
3290 vnode_t *sdvp, *tdvp;
3291 vnode_t *svp, *tvp;
3292 int error;
3293 svp = tvp = NULL;
3295 if (rflags != 0 || wo_vap != NULL)
3296 return (SET_ERROR(EINVAL));
3298 sdvp = ZTOV(sdzp);
3299 tdvp = ZTOV(tdzp);
3300 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3301 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3302 VOP_UNLOCK(sdvp);
3303 if (error != 0)
3304 goto fail;
3305 VOP_UNLOCK(svp);
3307 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3308 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3309 if (error == EJUSTRETURN)
3310 tvp = NULL;
3311 else if (error != 0) {
3312 VOP_UNLOCK(tdvp);
3313 goto fail;
3316 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3317 fail:
3318 if (svp != NULL)
3319 vrele(svp);
3320 if (tvp != NULL)
3321 vrele(tvp);
3323 return (error);
3327 * Insert the indicated symbolic reference entry into the directory.
3329 * IN: dvp - Directory to contain new symbolic link.
3330 * link - Name for new symlink entry.
3331 * vap - Attributes of new entry.
3332 * cr - credentials of caller.
3333 * ct - caller context
3334 * flags - case flags
3335 * mnt_ns - Unused on FreeBSD
3337 * RETURN: 0 on success, error code on failure.
3339 * Timestamps:
3340 * dvp - ctime|mtime updated
3343 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3344 const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
3346 (void) flags;
3347 znode_t *zp;
3348 dmu_tx_t *tx;
3349 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3350 zilog_t *zilog;
3351 uint64_t len = strlen(link);
3352 int error;
3353 zfs_acl_ids_t acl_ids;
3354 boolean_t fuid_dirtied;
3355 uint64_t txtype = TX_SYMLINK;
3357 ASSERT3S(vap->va_type, ==, VLNK);
3359 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
3360 return (error);
3361 zilog = zfsvfs->z_log;
3363 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3364 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3365 zfs_exit(zfsvfs, FTAG);
3366 return (SET_ERROR(EILSEQ));
3369 if (len > MAXPATHLEN) {
3370 zfs_exit(zfsvfs, FTAG);
3371 return (SET_ERROR(ENAMETOOLONG));
3374 if ((error = zfs_acl_ids_create(dzp, 0,
3375 vap, cr, NULL, &acl_ids, NULL)) != 0) {
3376 zfs_exit(zfsvfs, FTAG);
3377 return (error);
3381 * Attempt to lock directory; fail if entry already exists.
3383 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3384 if (error) {
3385 zfs_acl_ids_free(&acl_ids);
3386 zfs_exit(zfsvfs, FTAG);
3387 return (error);
3390 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
3391 zfs_acl_ids_free(&acl_ids);
3392 zfs_exit(zfsvfs, FTAG);
3393 return (error);
3396 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3397 0 /* projid */)) {
3398 zfs_acl_ids_free(&acl_ids);
3399 zfs_exit(zfsvfs, FTAG);
3400 return (SET_ERROR(EDQUOT));
3403 getnewvnode_reserve();
3404 tx = dmu_tx_create(zfsvfs->z_os);
3405 fuid_dirtied = zfsvfs->z_fuid_dirty;
3406 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3407 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3408 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3409 ZFS_SA_BASE_ATTR_SIZE + len);
3410 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3411 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3412 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3413 acl_ids.z_aclp->z_acl_bytes);
3415 if (fuid_dirtied)
3416 zfs_fuid_txhold(zfsvfs, tx);
3417 error = dmu_tx_assign(tx, TXG_WAIT);
3418 if (error) {
3419 zfs_acl_ids_free(&acl_ids);
3420 dmu_tx_abort(tx);
3421 getnewvnode_drop_reserve();
3422 zfs_exit(zfsvfs, FTAG);
3423 return (error);
3427 * Create a new object for the symlink.
3428 * for version 4 ZPL datasets the symlink will be an SA attribute
3430 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3432 if (fuid_dirtied)
3433 zfs_fuid_sync(zfsvfs, tx);
3435 if (zp->z_is_sa)
3436 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3437 __DECONST(void *, link), len, tx);
3438 else
3439 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3441 zp->z_size = len;
3442 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3443 &zp->z_size, sizeof (zp->z_size), tx);
3445 * Insert the new object into the directory.
3447 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
3448 if (error != 0) {
3449 zfs_znode_delete(zp, tx);
3450 VOP_UNLOCK(ZTOV(zp));
3451 zrele(zp);
3452 } else {
3453 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3456 zfs_acl_ids_free(&acl_ids);
3458 dmu_tx_commit(tx);
3460 getnewvnode_drop_reserve();
3462 if (error == 0) {
3463 *zpp = zp;
3465 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3466 zil_commit(zilog, 0);
3469 zfs_exit(zfsvfs, FTAG);
3470 return (error);
3474 * Return, in the buffer contained in the provided uio structure,
3475 * the symbolic path referred to by vp.
3477 * IN: vp - vnode of symbolic link.
3478 * uio - structure to contain the link path.
3479 * cr - credentials of caller.
3480 * ct - caller context
3482 * OUT: uio - structure containing the link path.
3484 * RETURN: 0 on success, error code on failure.
3486 * Timestamps:
3487 * vp - atime updated
3489 static int
3490 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3492 (void) cr, (void) ct;
3493 znode_t *zp = VTOZ(vp);
3494 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3495 int error;
3497 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3498 return (error);
3500 if (zp->z_is_sa)
3501 error = sa_lookup_uio(zp->z_sa_hdl,
3502 SA_ZPL_SYMLINK(zfsvfs), uio);
3503 else
3504 error = zfs_sa_readlink(zp, uio);
3506 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3508 zfs_exit(zfsvfs, FTAG);
3509 return (error);
3513 * Insert a new entry into directory tdvp referencing svp.
3515 * IN: tdvp - Directory to contain new entry.
3516 * svp - vnode of new entry.
3517 * name - name of new entry.
3518 * cr - credentials of caller.
3520 * RETURN: 0 on success, error code on failure.
3522 * Timestamps:
3523 * tdvp - ctime|mtime updated
3524 * svp - ctime updated
3527 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3528 int flags)
3530 (void) flags;
3531 znode_t *tzp;
3532 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3533 zilog_t *zilog;
3534 dmu_tx_t *tx;
3535 int error;
3536 uint64_t parent;
3537 uid_t owner;
3539 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3541 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3542 return (error);
3543 zilog = zfsvfs->z_log;
3546 * POSIX dictates that we return EPERM here.
3547 * Better choices include ENOTSUP or EISDIR.
3549 if (ZTOV(szp)->v_type == VDIR) {
3550 zfs_exit(zfsvfs, FTAG);
3551 return (SET_ERROR(EPERM));
3554 if ((error = zfs_verify_zp(szp)) != 0) {
3555 zfs_exit(zfsvfs, FTAG);
3556 return (error);
3560 * If we are using project inheritance, means if the directory has
3561 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3562 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3563 * such case, we only allow hard link creation in our tree when the
3564 * project IDs are the same.
3566 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3567 tdzp->z_projid != szp->z_projid) {
3568 zfs_exit(zfsvfs, FTAG);
3569 return (SET_ERROR(EXDEV));
3572 if (szp->z_pflags & (ZFS_APPENDONLY |
3573 ZFS_IMMUTABLE | ZFS_READONLY)) {
3574 zfs_exit(zfsvfs, FTAG);
3575 return (SET_ERROR(EPERM));
3578 /* Prevent links to .zfs/shares files */
3580 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3581 &parent, sizeof (uint64_t))) != 0) {
3582 zfs_exit(zfsvfs, FTAG);
3583 return (error);
3585 if (parent == zfsvfs->z_shares_dir) {
3586 zfs_exit(zfsvfs, FTAG);
3587 return (SET_ERROR(EPERM));
3590 if (zfsvfs->z_utf8 && u8_validate(name,
3591 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3592 zfs_exit(zfsvfs, FTAG);
3593 return (SET_ERROR(EILSEQ));
3597 * We do not support links between attributes and non-attributes
3598 * because of the potential security risk of creating links
3599 * into "normal" file space in order to circumvent restrictions
3600 * imposed in attribute space.
3602 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3603 zfs_exit(zfsvfs, FTAG);
3604 return (SET_ERROR(EINVAL));
3608 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3609 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3610 zfs_exit(zfsvfs, FTAG);
3611 return (SET_ERROR(EPERM));
3614 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
3615 zfs_exit(zfsvfs, FTAG);
3616 return (error);
3620 * Attempt to lock directory; fail if entry already exists.
3622 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3623 if (error) {
3624 zfs_exit(zfsvfs, FTAG);
3625 return (error);
3628 tx = dmu_tx_create(zfsvfs->z_os);
3629 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3630 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3631 zfs_sa_upgrade_txholds(tx, szp);
3632 zfs_sa_upgrade_txholds(tx, tdzp);
3633 error = dmu_tx_assign(tx, TXG_WAIT);
3634 if (error) {
3635 dmu_tx_abort(tx);
3636 zfs_exit(zfsvfs, FTAG);
3637 return (error);
3640 error = zfs_link_create(tdzp, name, szp, tx, 0);
3642 if (error == 0) {
3643 uint64_t txtype = TX_LINK;
3644 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3647 dmu_tx_commit(tx);
3649 if (error == 0) {
3650 vnevent_link(ZTOV(szp), ct);
3653 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3654 zil_commit(zilog, 0);
3656 zfs_exit(zfsvfs, FTAG);
3657 return (error);
3661 * Free or allocate space in a file. Currently, this function only
3662 * supports the `F_FREESP' command. However, this command is somewhat
3663 * misnamed, as its functionality includes the ability to allocate as
3664 * well as free space.
3666 * IN: ip - inode of file to free data in.
3667 * cmd - action to take (only F_FREESP supported).
3668 * bfp - section of file to free/alloc.
3669 * flag - current file open mode flags.
3670 * offset - current file offset.
3671 * cr - credentials of caller.
3673 * RETURN: 0 on success, error code on failure.
3675 * Timestamps:
3676 * ip - ctime|mtime updated
3679 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3680 offset_t offset, cred_t *cr)
3682 (void) offset;
3683 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3684 uint64_t off, len;
3685 int error;
3687 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3688 return (error);
3690 if (cmd != F_FREESP) {
3691 zfs_exit(zfsvfs, FTAG);
3692 return (SET_ERROR(EINVAL));
3696 * Callers might not be able to detect properly that we are read-only,
3697 * so check it explicitly here.
3699 if (zfs_is_readonly(zfsvfs)) {
3700 zfs_exit(zfsvfs, FTAG);
3701 return (SET_ERROR(EROFS));
3704 if (bfp->l_len < 0) {
3705 zfs_exit(zfsvfs, FTAG);
3706 return (SET_ERROR(EINVAL));
3710 * Permissions aren't checked on Solaris because on this OS
3711 * zfs_space() can only be called with an opened file handle.
3712 * On Linux we can get here through truncate_range() which
3713 * operates directly on inodes, so we need to check access rights.
3715 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
3716 zfs_exit(zfsvfs, FTAG);
3717 return (error);
3720 off = bfp->l_start;
3721 len = bfp->l_len; /* 0 means from off to end of file */
3723 error = zfs_freesp(zp, off, len, flag, TRUE);
3725 zfs_exit(zfsvfs, FTAG);
3726 return (error);
3729 static void
3730 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3732 (void) cr, (void) ct;
3733 znode_t *zp = VTOZ(vp);
3734 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3735 int error;
3737 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3738 if (zp->z_sa_hdl == NULL) {
3740 * The fs has been unmounted, or we did a
3741 * suspend/resume and this file no longer exists.
3743 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3744 vrecycle(vp);
3745 return;
3748 if (zp->z_unlinked) {
3750 * Fast path to recycle a vnode of a removed file.
3752 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3753 vrecycle(vp);
3754 return;
3757 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3758 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3760 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3761 zfs_sa_upgrade_txholds(tx, zp);
3762 error = dmu_tx_assign(tx, TXG_WAIT);
3763 if (error) {
3764 dmu_tx_abort(tx);
3765 } else {
3766 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3767 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3768 zp->z_atime_dirty = 0;
3769 dmu_tx_commit(tx);
3772 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3776 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
3777 "struct zfid_short bigger than struct fid");
3778 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
3779 "struct zfid_long bigger than struct fid");
3781 static int
3782 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3784 (void) ct;
3785 znode_t *zp = VTOZ(vp);
3786 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3787 uint32_t gen;
3788 uint64_t gen64;
3789 uint64_t object = zp->z_id;
3790 zfid_short_t *zfid;
3791 int size, i, error;
3793 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3794 return (error);
3796 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3797 &gen64, sizeof (uint64_t))) != 0) {
3798 zfs_exit(zfsvfs, FTAG);
3799 return (error);
3802 gen = (uint32_t)gen64;
3804 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3805 fidp->fid_len = size;
3807 zfid = (zfid_short_t *)fidp;
3809 zfid->zf_len = size;
3811 for (i = 0; i < sizeof (zfid->zf_object); i++)
3812 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3814 /* Must have a non-zero generation number to distinguish from .zfs */
3815 if (gen == 0)
3816 gen = 1;
3817 for (i = 0; i < sizeof (zfid->zf_gen); i++)
3818 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3820 if (size == LONG_FID_LEN) {
3821 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
3822 zfid_long_t *zlfid;
3824 zlfid = (zfid_long_t *)fidp;
3826 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3827 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3829 /* XXX - this should be the generation number for the objset */
3830 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3831 zlfid->zf_setgen[i] = 0;
3834 zfs_exit(zfsvfs, FTAG);
3835 return (0);
3838 static int
3839 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3840 caller_context_t *ct)
3842 znode_t *zp;
3843 zfsvfs_t *zfsvfs;
3844 int error;
3846 switch (cmd) {
3847 case _PC_LINK_MAX:
3848 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
3849 return (0);
3851 case _PC_FILESIZEBITS:
3852 *valp = 64;
3853 return (0);
3854 case _PC_MIN_HOLE_SIZE:
3855 *valp = (int)SPA_MINBLOCKSIZE;
3856 return (0);
3857 case _PC_ACL_EXTENDED:
3858 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
3859 zp = VTOZ(vp);
3860 zfsvfs = zp->z_zfsvfs;
3861 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3862 return (error);
3863 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
3864 zfs_exit(zfsvfs, FTAG);
3865 #else
3866 *valp = 0;
3867 #endif
3868 return (0);
3870 case _PC_ACL_NFS4:
3871 zp = VTOZ(vp);
3872 zfsvfs = zp->z_zfsvfs;
3873 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3874 return (error);
3875 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
3876 zfs_exit(zfsvfs, FTAG);
3877 return (0);
3879 case _PC_ACL_PATH_MAX:
3880 *valp = ACL_MAX_ENTRIES;
3881 return (0);
3883 default:
3884 return (EOPNOTSUPP);
3888 static int
3889 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
3890 int *rahead)
3892 znode_t *zp = VTOZ(vp);
3893 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3894 zfs_locked_range_t *lr;
3895 vm_object_t object;
3896 off_t start, end, obj_size;
3897 uint_t blksz;
3898 int pgsin_b, pgsin_a;
3899 int error;
3901 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
3902 return (zfs_vm_pagerret_error);
3904 start = IDX_TO_OFF(ma[0]->pindex);
3905 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
3908 * Lock a range covering all required and optional pages.
3909 * Note that we need to handle the case of the block size growing.
3911 for (;;) {
3912 blksz = zp->z_blksz;
3913 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
3914 rounddown(start, blksz),
3915 roundup(end, blksz) - rounddown(start, blksz), RL_READER);
3916 if (lr == NULL) {
3917 if (rahead != NULL) {
3918 *rahead = 0;
3919 rahead = NULL;
3921 if (rbehind != NULL) {
3922 *rbehind = 0;
3923 rbehind = NULL;
3925 break;
3927 if (blksz == zp->z_blksz)
3928 break;
3929 zfs_rangelock_exit(lr);
3932 object = ma[0]->object;
3933 zfs_vmobject_wlock(object);
3934 obj_size = object->un_pager.vnp.vnp_size;
3935 zfs_vmobject_wunlock(object);
3936 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
3937 if (lr != NULL)
3938 zfs_rangelock_exit(lr);
3939 zfs_exit(zfsvfs, FTAG);
3940 return (zfs_vm_pagerret_bad);
3943 pgsin_b = 0;
3944 if (rbehind != NULL) {
3945 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
3946 pgsin_b = MIN(*rbehind, pgsin_b);
3949 pgsin_a = 0;
3950 if (rahead != NULL) {
3951 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
3952 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
3953 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
3954 pgsin_a = MIN(*rahead, pgsin_a);
3958 * NB: we need to pass the exact byte size of the data that we expect
3959 * to read after accounting for the file size. This is required because
3960 * ZFS will panic if we request DMU to read beyond the end of the last
3961 * allocated block.
3963 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
3964 &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
3966 if (lr != NULL)
3967 zfs_rangelock_exit(lr);
3968 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3970 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
3972 zfs_exit(zfsvfs, FTAG);
3974 if (error != 0)
3975 return (zfs_vm_pagerret_error);
3977 VM_CNT_INC(v_vnodein);
3978 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
3979 if (rbehind != NULL)
3980 *rbehind = pgsin_b;
3981 if (rahead != NULL)
3982 *rahead = pgsin_a;
3983 return (zfs_vm_pagerret_ok);
3986 #ifndef _SYS_SYSPROTO_H_
3987 struct vop_getpages_args {
3988 struct vnode *a_vp;
3989 vm_page_t *a_m;
3990 int a_count;
3991 int *a_rbehind;
3992 int *a_rahead;
3994 #endif
3996 static int
3997 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4000 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4001 ap->a_rahead));
4004 static int
4005 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4006 int *rtvals)
4008 znode_t *zp = VTOZ(vp);
4009 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4010 zfs_locked_range_t *lr;
4011 dmu_tx_t *tx;
4012 struct sf_buf *sf;
4013 vm_object_t object;
4014 vm_page_t m;
4015 caddr_t va;
4016 size_t tocopy;
4017 size_t lo_len;
4018 vm_ooffset_t lo_off;
4019 vm_ooffset_t off;
4020 uint_t blksz;
4021 int ncount;
4022 int pcount;
4023 int err;
4024 int i;
4026 object = vp->v_object;
4027 KASSERT(ma[0]->object == object, ("mismatching object"));
4028 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4030 pcount = btoc(len);
4031 ncount = pcount;
4032 for (i = 0; i < pcount; i++)
4033 rtvals[i] = zfs_vm_pagerret_error;
4035 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4036 return (zfs_vm_pagerret_error);
4038 off = IDX_TO_OFF(ma[0]->pindex);
4039 blksz = zp->z_blksz;
4040 lo_off = rounddown(off, blksz);
4041 lo_len = roundup(len + (off - lo_off), blksz);
4042 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4044 zfs_vmobject_wlock(object);
4045 if (len + off > object->un_pager.vnp.vnp_size) {
4046 if (object->un_pager.vnp.vnp_size > off) {
4047 int pgoff;
4049 len = object->un_pager.vnp.vnp_size - off;
4050 ncount = btoc(len);
4051 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4053 * If the object is locked and the following
4054 * conditions hold, then the page's dirty
4055 * field cannot be concurrently changed by a
4056 * pmap operation.
4058 m = ma[ncount - 1];
4059 vm_page_assert_sbusied(m);
4060 KASSERT(!pmap_page_is_write_mapped(m),
4061 ("zfs_putpages: page %p is not read-only",
4062 m));
4063 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4064 pgoff);
4066 } else {
4067 len = 0;
4068 ncount = 0;
4070 if (ncount < pcount) {
4071 for (i = ncount; i < pcount; i++) {
4072 rtvals[i] = zfs_vm_pagerret_bad;
4076 zfs_vmobject_wunlock(object);
4078 boolean_t commit = (flags & (zfs_vm_pagerput_sync |
4079 zfs_vm_pagerput_inval)) != 0 ||
4080 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
4082 if (ncount == 0)
4083 goto out;
4085 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4086 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4087 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4088 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4089 zp->z_projid))) {
4090 goto out;
4093 tx = dmu_tx_create(zfsvfs->z_os);
4094 dmu_tx_hold_write(tx, zp->z_id, off, len);
4096 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4097 zfs_sa_upgrade_txholds(tx, zp);
4098 err = dmu_tx_assign(tx, TXG_WAIT);
4099 if (err != 0) {
4100 dmu_tx_abort(tx);
4101 goto out;
4104 if (zp->z_blksz < PAGE_SIZE) {
4105 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4106 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4107 va = zfs_map_page(ma[i], &sf);
4108 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4109 zfs_unmap_page(sf);
4111 } else {
4112 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4115 if (err == 0) {
4116 uint64_t mtime[2], ctime[2];
4117 sa_bulk_attr_t bulk[3];
4118 int count = 0;
4120 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4121 &mtime, 16);
4122 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4123 &ctime, 16);
4124 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4125 &zp->z_pflags, 8);
4126 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4127 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4128 ASSERT0(err);
4130 * XXX we should be passing a callback to undirty
4131 * but that would make the locking messier
4133 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4134 len, commit, NULL, NULL);
4136 zfs_vmobject_wlock(object);
4137 for (i = 0; i < ncount; i++) {
4138 rtvals[i] = zfs_vm_pagerret_ok;
4139 vm_page_undirty(ma[i]);
4141 zfs_vmobject_wunlock(object);
4142 VM_CNT_INC(v_vnodeout);
4143 VM_CNT_ADD(v_vnodepgsout, ncount);
4145 dmu_tx_commit(tx);
4147 out:
4148 zfs_rangelock_exit(lr);
4149 if (commit)
4150 zil_commit(zfsvfs->z_log, zp->z_id);
4152 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4154 zfs_exit(zfsvfs, FTAG);
4155 return (rtvals[0]);
4158 #ifndef _SYS_SYSPROTO_H_
4159 struct vop_putpages_args {
4160 struct vnode *a_vp;
4161 vm_page_t *a_m;
4162 int a_count;
4163 int a_sync;
4164 int *a_rtvals;
4166 #endif
4168 static int
4169 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4172 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4173 ap->a_rtvals));
4176 #ifndef _SYS_SYSPROTO_H_
4177 struct vop_bmap_args {
4178 struct vnode *a_vp;
4179 daddr_t a_bn;
4180 struct bufobj **a_bop;
4181 daddr_t *a_bnp;
4182 int *a_runp;
4183 int *a_runb;
4185 #endif
4187 static int
4188 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4191 if (ap->a_bop != NULL)
4192 *ap->a_bop = &ap->a_vp->v_bufobj;
4193 if (ap->a_bnp != NULL)
4194 *ap->a_bnp = ap->a_bn;
4195 if (ap->a_runp != NULL)
4196 *ap->a_runp = 0;
4197 if (ap->a_runb != NULL)
4198 *ap->a_runb = 0;
4200 return (0);
4203 #ifndef _SYS_SYSPROTO_H_
4204 struct vop_open_args {
4205 struct vnode *a_vp;
4206 int a_mode;
4207 struct ucred *a_cred;
4208 struct thread *a_td;
4210 #endif
4212 static int
4213 zfs_freebsd_open(struct vop_open_args *ap)
4215 vnode_t *vp = ap->a_vp;
4216 znode_t *zp = VTOZ(vp);
4217 int error;
4219 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4220 if (error == 0)
4221 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4222 return (error);
4225 #ifndef _SYS_SYSPROTO_H_
4226 struct vop_close_args {
4227 struct vnode *a_vp;
4228 int a_fflag;
4229 struct ucred *a_cred;
4230 struct thread *a_td;
4232 #endif
4234 static int
4235 zfs_freebsd_close(struct vop_close_args *ap)
4238 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4241 #ifndef _SYS_SYSPROTO_H_
4242 struct vop_ioctl_args {
4243 struct vnode *a_vp;
4244 ulong_t a_command;
4245 caddr_t a_data;
4246 int a_fflag;
4247 struct ucred *cred;
4248 struct thread *td;
4250 #endif
4252 static int
4253 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4256 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4257 ap->a_fflag, ap->a_cred, NULL));
4260 static int
4261 ioflags(int ioflags)
4263 int flags = 0;
4265 if (ioflags & IO_APPEND)
4266 flags |= O_APPEND;
4267 if (ioflags & IO_NDELAY)
4268 flags |= O_NONBLOCK;
4269 if (ioflags & IO_SYNC)
4270 flags |= O_SYNC;
4272 return (flags);
4275 #ifndef _SYS_SYSPROTO_H_
4276 struct vop_read_args {
4277 struct vnode *a_vp;
4278 struct uio *a_uio;
4279 int a_ioflag;
4280 struct ucred *a_cred;
4282 #endif
4284 static int
4285 zfs_freebsd_read(struct vop_read_args *ap)
4287 zfs_uio_t uio;
4288 zfs_uio_init(&uio, ap->a_uio);
4289 return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4290 ap->a_cred));
4293 #ifndef _SYS_SYSPROTO_H_
4294 struct vop_write_args {
4295 struct vnode *a_vp;
4296 struct uio *a_uio;
4297 int a_ioflag;
4298 struct ucred *a_cred;
4300 #endif
4302 static int
4303 zfs_freebsd_write(struct vop_write_args *ap)
4305 zfs_uio_t uio;
4306 zfs_uio_init(&uio, ap->a_uio);
4307 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4308 ap->a_cred));
4312 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4313 * the comment above cache_fplookup for details.
4315 static int
4316 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4318 vnode_t *vp;
4319 znode_t *zp;
4320 uint64_t pflags;
4322 vp = v->a_vp;
4323 zp = VTOZ_SMR(vp);
4324 if (__predict_false(zp == NULL))
4325 return (EAGAIN);
4326 pflags = atomic_load_64(&zp->z_pflags);
4327 if (pflags & ZFS_AV_QUARANTINED)
4328 return (EAGAIN);
4329 if (pflags & ZFS_XATTR)
4330 return (EAGAIN);
4331 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4332 return (EAGAIN);
4333 return (0);
4336 static int
4337 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4339 vnode_t *vp;
4340 znode_t *zp;
4341 char *target;
4343 vp = v->a_vp;
4344 zp = VTOZ_SMR(vp);
4345 if (__predict_false(zp == NULL)) {
4346 return (EAGAIN);
4349 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4350 if (target == NULL) {
4351 return (EAGAIN);
4353 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4356 #ifndef _SYS_SYSPROTO_H_
4357 struct vop_access_args {
4358 struct vnode *a_vp;
4359 accmode_t a_accmode;
4360 struct ucred *a_cred;
4361 struct thread *a_td;
4363 #endif
4365 static int
4366 zfs_freebsd_access(struct vop_access_args *ap)
4368 vnode_t *vp = ap->a_vp;
4369 znode_t *zp = VTOZ(vp);
4370 accmode_t accmode;
4371 int error = 0;
4374 if (ap->a_accmode == VEXEC) {
4375 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4376 return (0);
4380 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4382 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4383 if (accmode != 0)
4384 error = zfs_access(zp, accmode, 0, ap->a_cred);
4387 * VADMIN has to be handled by vaccess().
4389 if (error == 0) {
4390 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4391 if (accmode != 0) {
4392 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4393 zp->z_gid, accmode, ap->a_cred);
4398 * For VEXEC, ensure that at least one execute bit is set for
4399 * non-directories.
4401 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4402 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4403 error = EACCES;
4406 return (error);
4409 #ifndef _SYS_SYSPROTO_H_
4410 struct vop_lookup_args {
4411 struct vnode *a_dvp;
4412 struct vnode **a_vpp;
4413 struct componentname *a_cnp;
4415 #endif
4417 static int
4418 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4420 struct componentname *cnp = ap->a_cnp;
4421 char nm[NAME_MAX + 1];
4423 ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4424 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4426 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4427 cnp->cn_cred, 0, cached));
4430 static int
4431 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4434 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4437 #ifndef _SYS_SYSPROTO_H_
4438 struct vop_lookup_args {
4439 struct vnode *a_dvp;
4440 struct vnode **a_vpp;
4441 struct componentname *a_cnp;
4443 #endif
4445 static int
4446 zfs_cache_lookup(struct vop_lookup_args *ap)
4448 zfsvfs_t *zfsvfs;
4450 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4451 if (zfsvfs->z_use_namecache)
4452 return (vfs_cache_lookup(ap));
4453 else
4454 return (zfs_freebsd_lookup(ap, B_FALSE));
4457 #ifndef _SYS_SYSPROTO_H_
4458 struct vop_create_args {
4459 struct vnode *a_dvp;
4460 struct vnode **a_vpp;
4461 struct componentname *a_cnp;
4462 struct vattr *a_vap;
4464 #endif
4466 static int
4467 zfs_freebsd_create(struct vop_create_args *ap)
4469 zfsvfs_t *zfsvfs;
4470 struct componentname *cnp = ap->a_cnp;
4471 vattr_t *vap = ap->a_vap;
4472 znode_t *zp = NULL;
4473 int rc, mode;
4475 #if __FreeBSD_version < 1400068
4476 ASSERT(cnp->cn_flags & SAVENAME);
4477 #endif
4479 vattr_init_mask(vap);
4480 mode = vap->va_mode & ALLPERMS;
4481 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4482 *ap->a_vpp = NULL;
4484 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
4485 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
4486 if (rc == 0)
4487 *ap->a_vpp = ZTOV(zp);
4488 if (zfsvfs->z_use_namecache &&
4489 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4490 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4492 return (rc);
4495 #ifndef _SYS_SYSPROTO_H_
4496 struct vop_remove_args {
4497 struct vnode *a_dvp;
4498 struct vnode *a_vp;
4499 struct componentname *a_cnp;
4501 #endif
4503 static int
4504 zfs_freebsd_remove(struct vop_remove_args *ap)
4507 #if __FreeBSD_version < 1400068
4508 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4509 #endif
4511 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4512 ap->a_cnp->cn_cred));
4515 #ifndef _SYS_SYSPROTO_H_
4516 struct vop_mkdir_args {
4517 struct vnode *a_dvp;
4518 struct vnode **a_vpp;
4519 struct componentname *a_cnp;
4520 struct vattr *a_vap;
4522 #endif
4524 static int
4525 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4527 vattr_t *vap = ap->a_vap;
4528 znode_t *zp = NULL;
4529 int rc;
4531 #if __FreeBSD_version < 1400068
4532 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4533 #endif
4535 vattr_init_mask(vap);
4536 *ap->a_vpp = NULL;
4538 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4539 ap->a_cnp->cn_cred, 0, NULL, NULL);
4541 if (rc == 0)
4542 *ap->a_vpp = ZTOV(zp);
4543 return (rc);
4546 #ifndef _SYS_SYSPROTO_H_
4547 struct vop_rmdir_args {
4548 struct vnode *a_dvp;
4549 struct vnode *a_vp;
4550 struct componentname *a_cnp;
4552 #endif
4554 static int
4555 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4557 struct componentname *cnp = ap->a_cnp;
4559 #if __FreeBSD_version < 1400068
4560 ASSERT(cnp->cn_flags & SAVENAME);
4561 #endif
4563 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4566 #ifndef _SYS_SYSPROTO_H_
4567 struct vop_readdir_args {
4568 struct vnode *a_vp;
4569 struct uio *a_uio;
4570 struct ucred *a_cred;
4571 int *a_eofflag;
4572 int *a_ncookies;
4573 cookie_t **a_cookies;
4575 #endif
4577 static int
4578 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4580 zfs_uio_t uio;
4581 zfs_uio_init(&uio, ap->a_uio);
4582 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4583 ap->a_ncookies, ap->a_cookies));
4586 #ifndef _SYS_SYSPROTO_H_
4587 struct vop_fsync_args {
4588 struct vnode *a_vp;
4589 int a_waitfor;
4590 struct thread *a_td;
4592 #endif
4594 static int
4595 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4598 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4601 #ifndef _SYS_SYSPROTO_H_
4602 struct vop_getattr_args {
4603 struct vnode *a_vp;
4604 struct vattr *a_vap;
4605 struct ucred *a_cred;
4607 #endif
4609 static int
4610 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4612 vattr_t *vap = ap->a_vap;
4613 xvattr_t xvap;
4614 ulong_t fflags = 0;
4615 int error;
4617 xva_init(&xvap);
4618 xvap.xva_vattr = *vap;
4619 xvap.xva_vattr.va_mask |= AT_XVATTR;
4621 /* Convert chflags into ZFS-type flags. */
4622 /* XXX: what about SF_SETTABLE?. */
4623 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4624 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4625 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4626 XVA_SET_REQ(&xvap, XAT_NODUMP);
4627 XVA_SET_REQ(&xvap, XAT_READONLY);
4628 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4629 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4630 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4631 XVA_SET_REQ(&xvap, XAT_REPARSE);
4632 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4633 XVA_SET_REQ(&xvap, XAT_SPARSE);
4635 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4636 if (error != 0)
4637 return (error);
4639 /* Convert ZFS xattr into chflags. */
4640 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4641 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4642 fflags |= (fflag); \
4643 } while (0)
4644 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4645 xvap.xva_xoptattrs.xoa_immutable);
4646 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4647 xvap.xva_xoptattrs.xoa_appendonly);
4648 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4649 xvap.xva_xoptattrs.xoa_nounlink);
4650 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4651 xvap.xva_xoptattrs.xoa_archive);
4652 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4653 xvap.xva_xoptattrs.xoa_nodump);
4654 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4655 xvap.xva_xoptattrs.xoa_readonly);
4656 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4657 xvap.xva_xoptattrs.xoa_system);
4658 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4659 xvap.xva_xoptattrs.xoa_hidden);
4660 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4661 xvap.xva_xoptattrs.xoa_reparse);
4662 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4663 xvap.xva_xoptattrs.xoa_offline);
4664 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4665 xvap.xva_xoptattrs.xoa_sparse);
4667 #undef FLAG_CHECK
4668 *vap = xvap.xva_vattr;
4669 vap->va_flags = fflags;
4670 return (0);
4673 #ifndef _SYS_SYSPROTO_H_
4674 struct vop_setattr_args {
4675 struct vnode *a_vp;
4676 struct vattr *a_vap;
4677 struct ucred *a_cred;
4679 #endif
4681 static int
4682 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4684 vnode_t *vp = ap->a_vp;
4685 vattr_t *vap = ap->a_vap;
4686 cred_t *cred = ap->a_cred;
4687 xvattr_t xvap;
4688 ulong_t fflags;
4689 uint64_t zflags;
4691 vattr_init_mask(vap);
4692 vap->va_mask &= ~AT_NOSET;
4694 xva_init(&xvap);
4695 xvap.xva_vattr = *vap;
4697 zflags = VTOZ(vp)->z_pflags;
4699 if (vap->va_flags != VNOVAL) {
4700 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4701 int error;
4703 if (zfsvfs->z_use_fuids == B_FALSE)
4704 return (EOPNOTSUPP);
4706 fflags = vap->va_flags;
4708 * XXX KDM
4709 * We need to figure out whether it makes sense to allow
4710 * UF_REPARSE through, since we don't really have other
4711 * facilities to handle reparse points and zfs_setattr()
4712 * doesn't currently allow setting that attribute anyway.
4714 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4715 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4716 UF_OFFLINE|UF_SPARSE)) != 0)
4717 return (EOPNOTSUPP);
4719 * Unprivileged processes are not permitted to unset system
4720 * flags, or modify flags if any system flags are set.
4721 * Privileged non-jail processes may not modify system flags
4722 * if securelevel > 0 and any existing system flags are set.
4723 * Privileged jail processes behave like privileged non-jail
4724 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4725 * otherwise, they behave like unprivileged processes.
4727 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4728 priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4729 if (zflags &
4730 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4731 error = securelevel_gt(cred, 0);
4732 if (error != 0)
4733 return (error);
4735 } else {
4737 * Callers may only modify the file flags on
4738 * objects they have VADMIN rights for.
4740 if ((error = VOP_ACCESS(vp, VADMIN, cred,
4741 curthread)) != 0)
4742 return (error);
4743 if (zflags &
4744 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4745 ZFS_NOUNLINK)) {
4746 return (EPERM);
4748 if (fflags &
4749 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4750 return (EPERM);
4754 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
4755 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
4756 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
4757 XVA_SET_REQ(&xvap, (xflag)); \
4758 (xfield) = ((fflags & (fflag)) != 0); \
4760 } while (0)
4761 /* Convert chflags into ZFS-type flags. */
4762 /* XXX: what about SF_SETTABLE?. */
4763 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4764 xvap.xva_xoptattrs.xoa_immutable);
4765 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4766 xvap.xva_xoptattrs.xoa_appendonly);
4767 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4768 xvap.xva_xoptattrs.xoa_nounlink);
4769 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4770 xvap.xva_xoptattrs.xoa_archive);
4771 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4772 xvap.xva_xoptattrs.xoa_nodump);
4773 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4774 xvap.xva_xoptattrs.xoa_readonly);
4775 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4776 xvap.xva_xoptattrs.xoa_system);
4777 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4778 xvap.xva_xoptattrs.xoa_hidden);
4779 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4780 xvap.xva_xoptattrs.xoa_reparse);
4781 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4782 xvap.xva_xoptattrs.xoa_offline);
4783 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4784 xvap.xva_xoptattrs.xoa_sparse);
4785 #undef FLAG_CHANGE
4787 if (vap->va_birthtime.tv_sec != VNOVAL) {
4788 xvap.xva_vattr.va_mask |= AT_XVATTR;
4789 XVA_SET_REQ(&xvap, XAT_CREATETIME);
4791 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
4794 #ifndef _SYS_SYSPROTO_H_
4795 struct vop_rename_args {
4796 struct vnode *a_fdvp;
4797 struct vnode *a_fvp;
4798 struct componentname *a_fcnp;
4799 struct vnode *a_tdvp;
4800 struct vnode *a_tvp;
4801 struct componentname *a_tcnp;
4803 #endif
4805 static int
4806 zfs_freebsd_rename(struct vop_rename_args *ap)
4808 vnode_t *fdvp = ap->a_fdvp;
4809 vnode_t *fvp = ap->a_fvp;
4810 vnode_t *tdvp = ap->a_tdvp;
4811 vnode_t *tvp = ap->a_tvp;
4812 int error;
4814 #if __FreeBSD_version < 1400068
4815 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4816 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4817 #endif
4819 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4820 ap->a_tcnp, ap->a_fcnp->cn_cred);
4822 vrele(fdvp);
4823 vrele(fvp);
4824 vrele(tdvp);
4825 if (tvp != NULL)
4826 vrele(tvp);
4828 return (error);
4831 #ifndef _SYS_SYSPROTO_H_
4832 struct vop_symlink_args {
4833 struct vnode *a_dvp;
4834 struct vnode **a_vpp;
4835 struct componentname *a_cnp;
4836 struct vattr *a_vap;
4837 char *a_target;
4839 #endif
4841 static int
4842 zfs_freebsd_symlink(struct vop_symlink_args *ap)
4844 struct componentname *cnp = ap->a_cnp;
4845 vattr_t *vap = ap->a_vap;
4846 znode_t *zp = NULL;
4847 char *symlink;
4848 size_t symlink_len;
4849 int rc;
4851 #if __FreeBSD_version < 1400068
4852 ASSERT(cnp->cn_flags & SAVENAME);
4853 #endif
4855 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
4856 vattr_init_mask(vap);
4857 *ap->a_vpp = NULL;
4859 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
4860 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
4861 if (rc == 0) {
4862 *ap->a_vpp = ZTOV(zp);
4863 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
4864 MPASS(zp->z_cached_symlink == NULL);
4865 symlink_len = strlen(ap->a_target);
4866 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4867 if (symlink != NULL) {
4868 memcpy(symlink, ap->a_target, symlink_len);
4869 symlink[symlink_len] = '\0';
4870 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4871 (uintptr_t)symlink);
4874 return (rc);
4877 #ifndef _SYS_SYSPROTO_H_
4878 struct vop_readlink_args {
4879 struct vnode *a_vp;
4880 struct uio *a_uio;
4881 struct ucred *a_cred;
4883 #endif
4885 static int
4886 zfs_freebsd_readlink(struct vop_readlink_args *ap)
4888 zfs_uio_t uio;
4889 int error;
4890 znode_t *zp = VTOZ(ap->a_vp);
4891 char *symlink, *base;
4892 size_t symlink_len;
4893 bool trycache;
4895 zfs_uio_init(&uio, ap->a_uio);
4896 trycache = false;
4897 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
4898 zfs_uio_iovcnt(&uio) == 1) {
4899 base = zfs_uio_iovbase(&uio, 0);
4900 symlink_len = zfs_uio_iovlen(&uio, 0);
4901 trycache = true;
4903 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
4904 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
4905 error != 0 || !trycache) {
4906 return (error);
4908 symlink_len -= zfs_uio_resid(&uio);
4909 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4910 if (symlink != NULL) {
4911 memcpy(symlink, base, symlink_len);
4912 symlink[symlink_len] = '\0';
4913 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4914 (uintptr_t)NULL, (uintptr_t)symlink)) {
4915 cache_symlink_free(symlink, symlink_len + 1);
4918 return (error);
4921 #ifndef _SYS_SYSPROTO_H_
4922 struct vop_link_args {
4923 struct vnode *a_tdvp;
4924 struct vnode *a_vp;
4925 struct componentname *a_cnp;
4927 #endif
4929 static int
4930 zfs_freebsd_link(struct vop_link_args *ap)
4932 struct componentname *cnp = ap->a_cnp;
4933 vnode_t *vp = ap->a_vp;
4934 vnode_t *tdvp = ap->a_tdvp;
4936 if (tdvp->v_mount != vp->v_mount)
4937 return (EXDEV);
4939 #if __FreeBSD_version < 1400068
4940 ASSERT(cnp->cn_flags & SAVENAME);
4941 #endif
4943 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
4944 cnp->cn_nameptr, cnp->cn_cred, 0));
4947 #ifndef _SYS_SYSPROTO_H_
4948 struct vop_inactive_args {
4949 struct vnode *a_vp;
4950 struct thread *a_td;
4952 #endif
4954 static int
4955 zfs_freebsd_inactive(struct vop_inactive_args *ap)
4957 vnode_t *vp = ap->a_vp;
4959 zfs_inactive(vp, curthread->td_ucred, NULL);
4960 return (0);
4963 #ifndef _SYS_SYSPROTO_H_
4964 struct vop_need_inactive_args {
4965 struct vnode *a_vp;
4966 struct thread *a_td;
4968 #endif
4970 static int
4971 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
4973 vnode_t *vp = ap->a_vp;
4974 znode_t *zp = VTOZ(vp);
4975 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4976 int need;
4978 if (vn_need_pageq_flush(vp))
4979 return (1);
4981 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
4982 return (1);
4983 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
4984 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
4986 return (need);
4989 #ifndef _SYS_SYSPROTO_H_
4990 struct vop_reclaim_args {
4991 struct vnode *a_vp;
4992 struct thread *a_td;
4994 #endif
4996 static int
4997 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
4999 vnode_t *vp = ap->a_vp;
5000 znode_t *zp = VTOZ(vp);
5001 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5003 ASSERT3P(zp, !=, NULL);
5006 * z_teardown_inactive_lock protects from a race with
5007 * zfs_znode_dmu_fini in zfsvfs_teardown during
5008 * force unmount.
5010 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5011 if (zp->z_sa_hdl == NULL)
5012 zfs_znode_free(zp);
5013 else
5014 zfs_zinactive(zp);
5015 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5017 vp->v_data = NULL;
5018 return (0);
5021 #ifndef _SYS_SYSPROTO_H_
5022 struct vop_fid_args {
5023 struct vnode *a_vp;
5024 struct fid *a_fid;
5026 #endif
5028 static int
5029 zfs_freebsd_fid(struct vop_fid_args *ap)
5032 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5036 #ifndef _SYS_SYSPROTO_H_
5037 struct vop_pathconf_args {
5038 struct vnode *a_vp;
5039 int a_name;
5040 register_t *a_retval;
5041 } *ap;
5042 #endif
5044 static int
5045 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5047 ulong_t val;
5048 int error;
5050 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5051 curthread->td_ucred, NULL);
5052 if (error == 0) {
5053 *ap->a_retval = val;
5054 return (error);
5056 if (error != EOPNOTSUPP)
5057 return (error);
5059 switch (ap->a_name) {
5060 case _PC_NAME_MAX:
5061 *ap->a_retval = NAME_MAX;
5062 return (0);
5063 #if __FreeBSD_version >= 1400032
5064 case _PC_DEALLOC_PRESENT:
5065 *ap->a_retval = 1;
5066 return (0);
5067 #endif
5068 case _PC_PIPE_BUF:
5069 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5070 *ap->a_retval = PIPE_BUF;
5071 return (0);
5073 return (EINVAL);
5074 default:
5075 return (vop_stdpathconf(ap));
5079 static int zfs_xattr_compat = 1;
5081 static int
5082 zfs_check_attrname(const char *name)
5084 /* We don't allow '/' character in attribute name. */
5085 if (strchr(name, '/') != NULL)
5086 return (SET_ERROR(EINVAL));
5087 /* We don't allow attribute names that start with a namespace prefix. */
5088 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5089 return (SET_ERROR(EINVAL));
5090 return (0);
5094 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5095 * extended attribute name:
5097 * NAMESPACE XATTR_COMPAT PREFIX
5098 * system * freebsd:system:
5099 * user 1 (none, can be used to access ZFS
5100 * fsattr(5) attributes created on Solaris)
5101 * user 0 user.
5103 static int
5104 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5105 size_t size, boolean_t compat)
5107 const char *namespace, *prefix, *suffix;
5109 memset(attrname, 0, size);
5111 switch (attrnamespace) {
5112 case EXTATTR_NAMESPACE_USER:
5113 if (compat) {
5115 * This is the default namespace by which we can access
5116 * all attributes created on Solaris.
5118 prefix = namespace = suffix = "";
5119 } else {
5121 * This is compatible with the user namespace encoding
5122 * on Linux prior to xattr_compat, but nothing
5123 * else.
5125 prefix = "";
5126 namespace = "user";
5127 suffix = ".";
5129 break;
5130 case EXTATTR_NAMESPACE_SYSTEM:
5131 prefix = "freebsd:";
5132 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5133 suffix = ":";
5134 break;
5135 case EXTATTR_NAMESPACE_EMPTY:
5136 default:
5137 return (SET_ERROR(EINVAL));
5139 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5140 name) >= size) {
5141 return (SET_ERROR(ENAMETOOLONG));
5143 return (0);
5146 static int
5147 zfs_ensure_xattr_cached(znode_t *zp)
5149 int error = 0;
5151 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5153 if (zp->z_xattr_cached != NULL)
5154 return (0);
5156 if (rw_write_held(&zp->z_xattr_lock))
5157 return (zfs_sa_get_xattr(zp));
5159 if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5160 rw_exit(&zp->z_xattr_lock);
5161 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5163 if (zp->z_xattr_cached == NULL)
5164 error = zfs_sa_get_xattr(zp);
5165 rw_downgrade(&zp->z_xattr_lock);
5166 return (error);
5169 #ifndef _SYS_SYSPROTO_H_
5170 struct vop_getextattr {
5171 IN struct vnode *a_vp;
5172 IN int a_attrnamespace;
5173 IN const char *a_name;
5174 INOUT struct uio *a_uio;
5175 OUT size_t *a_size;
5176 IN struct ucred *a_cred;
5177 IN struct thread *a_td;
5179 #endif
5181 static int
5182 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5184 struct thread *td = ap->a_td;
5185 struct nameidata nd;
5186 struct vattr va;
5187 vnode_t *xvp = NULL, *vp;
5188 int error, flags;
5190 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5191 LOOKUP_XATTR, B_FALSE);
5192 if (error != 0)
5193 return (error);
5195 flags = FREAD;
5196 #if __FreeBSD_version < 1400043
5197 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5198 xvp, td);
5199 #else
5200 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5201 #endif
5202 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5203 if (error != 0)
5204 return (SET_ERROR(error));
5205 vp = nd.ni_vp;
5206 NDFREE_PNBUF(&nd);
5208 if (ap->a_size != NULL) {
5209 error = VOP_GETATTR(vp, &va, ap->a_cred);
5210 if (error == 0)
5211 *ap->a_size = (size_t)va.va_size;
5212 } else if (ap->a_uio != NULL)
5213 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5215 VOP_UNLOCK(vp);
5216 vn_close(vp, flags, ap->a_cred, td);
5217 return (error);
5220 static int
5221 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5223 znode_t *zp = VTOZ(ap->a_vp);
5224 uchar_t *nv_value;
5225 uint_t nv_size;
5226 int error;
5228 error = zfs_ensure_xattr_cached(zp);
5229 if (error != 0)
5230 return (error);
5232 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5233 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5235 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5236 &nv_value, &nv_size);
5237 if (error != 0)
5238 return (SET_ERROR(error));
5240 if (ap->a_size != NULL)
5241 *ap->a_size = nv_size;
5242 else if (ap->a_uio != NULL)
5243 error = uiomove(nv_value, nv_size, ap->a_uio);
5244 if (error != 0)
5245 return (SET_ERROR(error));
5247 return (0);
5250 static int
5251 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
5253 znode_t *zp = VTOZ(ap->a_vp);
5254 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5255 char attrname[EXTATTR_MAXNAMELEN+1];
5256 int error;
5258 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5259 sizeof (attrname), compat);
5260 if (error != 0)
5261 return (error);
5263 error = ENOENT;
5264 if (zfsvfs->z_use_sa && zp->z_is_sa)
5265 error = zfs_getextattr_sa(ap, attrname);
5266 if (error == ENOENT)
5267 error = zfs_getextattr_dir(ap, attrname);
5268 return (error);
5272 * Vnode operation to retrieve a named extended attribute.
5274 static int
5275 zfs_getextattr(struct vop_getextattr_args *ap)
5277 znode_t *zp = VTOZ(ap->a_vp);
5278 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5279 int error;
5282 * If the xattr property is off, refuse the request.
5284 if (!(zfsvfs->z_flags & ZSB_XATTR))
5285 return (SET_ERROR(EOPNOTSUPP));
5287 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5288 ap->a_cred, ap->a_td, VREAD);
5289 if (error != 0)
5290 return (SET_ERROR(error));
5292 error = zfs_check_attrname(ap->a_name);
5293 if (error != 0)
5294 return (error);
5296 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5297 return (error);
5298 error = ENOENT;
5299 rw_enter(&zp->z_xattr_lock, RW_READER);
5301 error = zfs_getextattr_impl(ap, zfs_xattr_compat);
5302 if ((error == ENOENT || error == ENOATTR) &&
5303 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5305 * Fall back to the alternate namespace format if we failed to
5306 * find a user xattr.
5308 error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
5311 rw_exit(&zp->z_xattr_lock);
5312 zfs_exit(zfsvfs, FTAG);
5313 if (error == ENOENT)
5314 error = SET_ERROR(ENOATTR);
5315 return (error);
5318 #ifndef _SYS_SYSPROTO_H_
5319 struct vop_deleteextattr {
5320 IN struct vnode *a_vp;
5321 IN int a_attrnamespace;
5322 IN const char *a_name;
5323 IN struct ucred *a_cred;
5324 IN struct thread *a_td;
5326 #endif
5328 static int
5329 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5331 struct nameidata nd;
5332 vnode_t *xvp = NULL, *vp;
5333 int error;
5335 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5336 LOOKUP_XATTR, B_FALSE);
5337 if (error != 0)
5338 return (error);
5340 #if __FreeBSD_version < 1400043
5341 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5342 UIO_SYSSPACE, attrname, xvp, ap->a_td);
5343 #else
5344 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5345 UIO_SYSSPACE, attrname, xvp);
5346 #endif
5347 error = namei(&nd);
5348 if (error != 0)
5349 return (SET_ERROR(error));
5351 vp = nd.ni_vp;
5352 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5353 NDFREE_PNBUF(&nd);
5355 vput(nd.ni_dvp);
5356 if (vp == nd.ni_dvp)
5357 vrele(vp);
5358 else
5359 vput(vp);
5361 return (error);
5364 static int
5365 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5367 znode_t *zp = VTOZ(ap->a_vp);
5368 nvlist_t *nvl;
5369 int error;
5371 error = zfs_ensure_xattr_cached(zp);
5372 if (error != 0)
5373 return (error);
5375 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5376 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5378 nvl = zp->z_xattr_cached;
5379 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5380 if (error != 0)
5381 error = SET_ERROR(error);
5382 else
5383 error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
5384 if (error != 0) {
5385 zp->z_xattr_cached = NULL;
5386 nvlist_free(nvl);
5388 return (error);
5391 static int
5392 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
5394 znode_t *zp = VTOZ(ap->a_vp);
5395 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5396 char attrname[EXTATTR_MAXNAMELEN+1];
5397 int error;
5399 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5400 sizeof (attrname), compat);
5401 if (error != 0)
5402 return (error);
5404 error = ENOENT;
5405 if (zfsvfs->z_use_sa && zp->z_is_sa)
5406 error = zfs_deleteextattr_sa(ap, attrname);
5407 if (error == ENOENT)
5408 error = zfs_deleteextattr_dir(ap, attrname);
5409 return (error);
5413 * Vnode operation to remove a named attribute.
5415 static int
5416 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5418 znode_t *zp = VTOZ(ap->a_vp);
5419 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5420 int error;
5423 * If the xattr property is off, refuse the request.
5425 if (!(zfsvfs->z_flags & ZSB_XATTR))
5426 return (SET_ERROR(EOPNOTSUPP));
5428 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5429 ap->a_cred, ap->a_td, VWRITE);
5430 if (error != 0)
5431 return (SET_ERROR(error));
5433 error = zfs_check_attrname(ap->a_name);
5434 if (error != 0)
5435 return (error);
5437 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5438 return (error);
5439 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5441 error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
5442 if ((error == ENOENT || error == ENOATTR) &&
5443 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5445 * Fall back to the alternate namespace format if we failed to
5446 * find a user xattr.
5448 error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
5451 rw_exit(&zp->z_xattr_lock);
5452 zfs_exit(zfsvfs, FTAG);
5453 if (error == ENOENT)
5454 error = SET_ERROR(ENOATTR);
5455 return (error);
5458 #ifndef _SYS_SYSPROTO_H_
5459 struct vop_setextattr {
5460 IN struct vnode *a_vp;
5461 IN int a_attrnamespace;
5462 IN const char *a_name;
5463 INOUT struct uio *a_uio;
5464 IN struct ucred *a_cred;
5465 IN struct thread *a_td;
5467 #endif
5469 static int
5470 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5472 struct thread *td = ap->a_td;
5473 struct nameidata nd;
5474 struct vattr va;
5475 vnode_t *xvp = NULL, *vp;
5476 int error, flags;
5478 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5479 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5480 if (error != 0)
5481 return (error);
5483 flags = FFLAGS(O_WRONLY | O_CREAT);
5484 #if __FreeBSD_version < 1400043
5485 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5486 #else
5487 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5488 #endif
5489 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5490 NULL);
5491 if (error != 0)
5492 return (SET_ERROR(error));
5493 vp = nd.ni_vp;
5494 NDFREE_PNBUF(&nd);
5496 VATTR_NULL(&va);
5497 va.va_size = 0;
5498 error = VOP_SETATTR(vp, &va, ap->a_cred);
5499 if (error == 0)
5500 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5502 VOP_UNLOCK(vp);
5503 vn_close(vp, flags, ap->a_cred, td);
5504 return (error);
5507 static int
5508 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5510 znode_t *zp = VTOZ(ap->a_vp);
5511 nvlist_t *nvl;
5512 size_t sa_size;
5513 int error;
5515 error = zfs_ensure_xattr_cached(zp);
5516 if (error != 0)
5517 return (error);
5519 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5520 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5522 nvl = zp->z_xattr_cached;
5523 size_t entry_size = ap->a_uio->uio_resid;
5524 if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5525 return (SET_ERROR(EFBIG));
5526 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5527 if (error != 0)
5528 return (SET_ERROR(error));
5529 if (sa_size > DXATTR_MAX_SA_SIZE)
5530 return (SET_ERROR(EFBIG));
5531 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5532 error = uiomove(buf, entry_size, ap->a_uio);
5533 if (error != 0) {
5534 error = SET_ERROR(error);
5535 } else {
5536 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5537 if (error != 0)
5538 error = SET_ERROR(error);
5540 if (error == 0)
5541 error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
5542 kmem_free(buf, entry_size);
5543 if (error != 0) {
5544 zp->z_xattr_cached = NULL;
5545 nvlist_free(nvl);
5547 return (error);
5550 static int
5551 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
5553 znode_t *zp = VTOZ(ap->a_vp);
5554 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5555 char attrname[EXTATTR_MAXNAMELEN+1];
5556 int error;
5558 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5559 sizeof (attrname), compat);
5560 if (error != 0)
5561 return (error);
5563 struct vop_deleteextattr_args vda = {
5564 .a_vp = ap->a_vp,
5565 .a_attrnamespace = ap->a_attrnamespace,
5566 .a_name = ap->a_name,
5567 .a_cred = ap->a_cred,
5568 .a_td = ap->a_td,
5570 error = ENOENT;
5571 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5572 error = zfs_setextattr_sa(ap, attrname);
5573 if (error == 0) {
5575 * Successfully put into SA, we need to clear the one
5576 * in dir if present.
5578 zfs_deleteextattr_dir(&vda, attrname);
5581 if (error != 0) {
5582 error = zfs_setextattr_dir(ap, attrname);
5583 if (error == 0 && zp->z_is_sa) {
5585 * Successfully put into dir, we need to clear the one
5586 * in SA if present.
5588 zfs_deleteextattr_sa(&vda, attrname);
5591 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5593 * Also clear all versions of the alternate compat name.
5595 zfs_deleteextattr_impl(&vda, !compat);
5597 return (error);
5601 * Vnode operation to set a named attribute.
5603 static int
5604 zfs_setextattr(struct vop_setextattr_args *ap)
5606 znode_t *zp = VTOZ(ap->a_vp);
5607 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5608 int error;
5611 * If the xattr property is off, refuse the request.
5613 if (!(zfsvfs->z_flags & ZSB_XATTR))
5614 return (SET_ERROR(EOPNOTSUPP));
5616 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5617 ap->a_cred, ap->a_td, VWRITE);
5618 if (error != 0)
5619 return (SET_ERROR(error));
5621 error = zfs_check_attrname(ap->a_name);
5622 if (error != 0)
5623 return (error);
5625 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5626 return (error);
5627 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5629 error = zfs_setextattr_impl(ap, zfs_xattr_compat);
5631 rw_exit(&zp->z_xattr_lock);
5632 zfs_exit(zfsvfs, FTAG);
5633 return (error);
5636 #ifndef _SYS_SYSPROTO_H_
5637 struct vop_listextattr {
5638 IN struct vnode *a_vp;
5639 IN int a_attrnamespace;
5640 INOUT struct uio *a_uio;
5641 OUT size_t *a_size;
5642 IN struct ucred *a_cred;
5643 IN struct thread *a_td;
5645 #endif
5647 static int
5648 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5650 struct thread *td = ap->a_td;
5651 struct nameidata nd;
5652 uint8_t dirbuf[sizeof (struct dirent)];
5653 struct iovec aiov;
5654 struct uio auio;
5655 vnode_t *xvp = NULL, *vp;
5656 int error, eof;
5658 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5659 LOOKUP_XATTR, B_FALSE);
5660 if (error != 0) {
5662 * ENOATTR means that the EA directory does not yet exist,
5663 * i.e. there are no extended attributes there.
5665 if (error == ENOATTR)
5666 error = 0;
5667 return (error);
5670 #if __FreeBSD_version < 1400043
5671 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5672 UIO_SYSSPACE, ".", xvp, td);
5673 #else
5674 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5675 UIO_SYSSPACE, ".", xvp);
5676 #endif
5677 error = namei(&nd);
5678 if (error != 0)
5679 return (SET_ERROR(error));
5680 vp = nd.ni_vp;
5681 NDFREE_PNBUF(&nd);
5683 auio.uio_iov = &aiov;
5684 auio.uio_iovcnt = 1;
5685 auio.uio_segflg = UIO_SYSSPACE;
5686 auio.uio_td = td;
5687 auio.uio_rw = UIO_READ;
5688 auio.uio_offset = 0;
5690 size_t plen = strlen(attrprefix);
5692 do {
5693 aiov.iov_base = (void *)dirbuf;
5694 aiov.iov_len = sizeof (dirbuf);
5695 auio.uio_resid = sizeof (dirbuf);
5696 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5697 if (error != 0)
5698 break;
5699 int done = sizeof (dirbuf) - auio.uio_resid;
5700 for (int pos = 0; pos < done; ) {
5701 struct dirent *dp = (struct dirent *)(dirbuf + pos);
5702 pos += dp->d_reclen;
5704 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5705 * is what we get when attribute was created on Solaris.
5707 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5708 continue;
5709 else if (plen == 0 &&
5710 ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
5711 continue;
5712 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5713 continue;
5714 uint8_t nlen = dp->d_namlen - plen;
5715 if (ap->a_size != NULL) {
5716 *ap->a_size += 1 + nlen;
5717 } else if (ap->a_uio != NULL) {
5719 * Format of extattr name entry is one byte for
5720 * length and the rest for name.
5722 error = uiomove(&nlen, 1, ap->a_uio);
5723 if (error == 0) {
5724 char *namep = dp->d_name + plen;
5725 error = uiomove(namep, nlen, ap->a_uio);
5727 if (error != 0) {
5728 error = SET_ERROR(error);
5729 break;
5733 } while (!eof && error == 0);
5735 vput(vp);
5736 return (error);
5739 static int
5740 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5742 znode_t *zp = VTOZ(ap->a_vp);
5743 int error;
5745 error = zfs_ensure_xattr_cached(zp);
5746 if (error != 0)
5747 return (error);
5749 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5750 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5752 size_t plen = strlen(attrprefix);
5753 nvpair_t *nvp = NULL;
5754 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5755 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5757 const char *name = nvpair_name(nvp);
5758 if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5759 continue;
5760 else if (strncmp(name, attrprefix, plen) != 0)
5761 continue;
5762 uint8_t nlen = strlen(name) - plen;
5763 if (ap->a_size != NULL) {
5764 *ap->a_size += 1 + nlen;
5765 } else if (ap->a_uio != NULL) {
5767 * Format of extattr name entry is one byte for
5768 * length and the rest for name.
5770 error = uiomove(&nlen, 1, ap->a_uio);
5771 if (error == 0) {
5772 char *namep = __DECONST(char *, name) + plen;
5773 error = uiomove(namep, nlen, ap->a_uio);
5775 if (error != 0) {
5776 error = SET_ERROR(error);
5777 break;
5782 return (error);
5785 static int
5786 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
5788 znode_t *zp = VTOZ(ap->a_vp);
5789 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5790 char attrprefix[16];
5791 int error;
5793 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5794 sizeof (attrprefix), compat);
5795 if (error != 0)
5796 return (error);
5798 if (zfsvfs->z_use_sa && zp->z_is_sa)
5799 error = zfs_listextattr_sa(ap, attrprefix);
5800 if (error == 0)
5801 error = zfs_listextattr_dir(ap, attrprefix);
5802 return (error);
5806 * Vnode operation to retrieve extended attributes on a vnode.
5808 static int
5809 zfs_listextattr(struct vop_listextattr_args *ap)
5811 znode_t *zp = VTOZ(ap->a_vp);
5812 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5813 int error;
5815 if (ap->a_size != NULL)
5816 *ap->a_size = 0;
5819 * If the xattr property is off, refuse the request.
5821 if (!(zfsvfs->z_flags & ZSB_XATTR))
5822 return (SET_ERROR(EOPNOTSUPP));
5824 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5825 ap->a_cred, ap->a_td, VREAD);
5826 if (error != 0)
5827 return (SET_ERROR(error));
5829 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5830 return (error);
5831 rw_enter(&zp->z_xattr_lock, RW_READER);
5833 error = zfs_listextattr_impl(ap, zfs_xattr_compat);
5834 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5835 /* Also list user xattrs with the alternate format. */
5836 error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
5839 rw_exit(&zp->z_xattr_lock);
5840 zfs_exit(zfsvfs, FTAG);
5841 return (error);
5844 #ifndef _SYS_SYSPROTO_H_
5845 struct vop_getacl_args {
5846 struct vnode *vp;
5847 acl_type_t type;
5848 struct acl *aclp;
5849 struct ucred *cred;
5850 struct thread *td;
5852 #endif
5854 static int
5855 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5857 int error;
5858 vsecattr_t vsecattr;
5860 if (ap->a_type != ACL_TYPE_NFS4)
5861 return (EINVAL);
5863 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5864 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5865 &vsecattr, 0, ap->a_cred)))
5866 return (error);
5868 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5869 vsecattr.vsa_aclcnt);
5870 if (vsecattr.vsa_aclentp != NULL)
5871 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5873 return (error);
5876 #ifndef _SYS_SYSPROTO_H_
5877 struct vop_setacl_args {
5878 struct vnode *vp;
5879 acl_type_t type;
5880 struct acl *aclp;
5881 struct ucred *cred;
5882 struct thread *td;
5884 #endif
5886 static int
5887 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5889 int error;
5890 vsecattr_t vsecattr;
5891 int aclbsize; /* size of acl list in bytes */
5892 aclent_t *aaclp;
5894 if (ap->a_type != ACL_TYPE_NFS4)
5895 return (EINVAL);
5897 if (ap->a_aclp == NULL)
5898 return (EINVAL);
5900 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5901 return (EINVAL);
5904 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
5905 * splitting every entry into two and appending "canonical six"
5906 * entries at the end. Don't allow for setting an ACL that would
5907 * cause chmod(2) to run out of ACL entries.
5909 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
5910 return (ENOSPC);
5912 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
5913 if (error != 0)
5914 return (error);
5916 vsecattr.vsa_mask = VSA_ACE;
5917 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
5918 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
5919 aaclp = vsecattr.vsa_aclentp;
5920 vsecattr.vsa_aclentsz = aclbsize;
5922 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
5923 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
5924 kmem_free(aaclp, aclbsize);
5926 return (error);
5929 #ifndef _SYS_SYSPROTO_H_
5930 struct vop_aclcheck_args {
5931 struct vnode *vp;
5932 acl_type_t type;
5933 struct acl *aclp;
5934 struct ucred *cred;
5935 struct thread *td;
5937 #endif
5939 static int
5940 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
5943 return (EOPNOTSUPP);
5946 static int
5947 zfs_vptocnp(struct vop_vptocnp_args *ap)
5949 vnode_t *covered_vp;
5950 vnode_t *vp = ap->a_vp;
5951 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
5952 znode_t *zp = VTOZ(vp);
5953 int ltype;
5954 int error;
5956 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5957 return (error);
5960 * If we are a snapshot mounted under .zfs, run the operation
5961 * on the covered vnode.
5963 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
5964 char name[MAXNAMLEN + 1];
5965 znode_t *dzp;
5966 size_t len;
5968 error = zfs_znode_parent_and_name(zp, &dzp, name);
5969 if (error == 0) {
5970 len = strlen(name);
5971 if (*ap->a_buflen < len)
5972 error = SET_ERROR(ENOMEM);
5974 if (error == 0) {
5975 *ap->a_buflen -= len;
5976 memcpy(ap->a_buf + *ap->a_buflen, name, len);
5977 *ap->a_vpp = ZTOV(dzp);
5979 zfs_exit(zfsvfs, FTAG);
5980 return (error);
5982 zfs_exit(zfsvfs, FTAG);
5984 covered_vp = vp->v_mount->mnt_vnodecovered;
5985 enum vgetstate vs = vget_prep(covered_vp);
5986 ltype = VOP_ISLOCKED(vp);
5987 VOP_UNLOCK(vp);
5988 error = vget_finish(covered_vp, LK_SHARED, vs);
5989 if (error == 0) {
5990 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
5991 ap->a_buflen);
5992 vput(covered_vp);
5994 vn_lock(vp, ltype | LK_RETRY);
5995 if (VN_IS_DOOMED(vp))
5996 error = SET_ERROR(ENOENT);
5997 return (error);
6000 #if __FreeBSD_version >= 1400032
6001 static int
6002 zfs_deallocate(struct vop_deallocate_args *ap)
6004 znode_t *zp = VTOZ(ap->a_vp);
6005 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6006 zilog_t *zilog;
6007 off_t off, len, file_sz;
6008 int error;
6010 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6011 return (error);
6014 * Callers might not be able to detect properly that we are read-only,
6015 * so check it explicitly here.
6017 if (zfs_is_readonly(zfsvfs)) {
6018 zfs_exit(zfsvfs, FTAG);
6019 return (SET_ERROR(EROFS));
6022 zilog = zfsvfs->z_log;
6023 off = *ap->a_offset;
6024 len = *ap->a_len;
6025 file_sz = zp->z_size;
6026 if (off + len > file_sz)
6027 len = file_sz - off;
6028 /* Fast path for out-of-range request. */
6029 if (len <= 0) {
6030 *ap->a_len = 0;
6031 zfs_exit(zfsvfs, FTAG);
6032 return (0);
6035 error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6036 if (error == 0) {
6037 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6038 (ap->a_ioflag & IO_SYNC) != 0)
6039 zil_commit(zilog, zp->z_id);
6040 *ap->a_offset = off + len;
6041 *ap->a_len = 0;
6044 zfs_exit(zfsvfs, FTAG);
6045 return (error);
6047 #endif
6049 #ifndef _SYS_SYSPROTO_H_
6050 struct vop_copy_file_range_args {
6051 struct vnode *a_invp;
6052 off_t *a_inoffp;
6053 struct vnode *a_outvp;
6054 off_t *a_outoffp;
6055 size_t *a_lenp;
6056 unsigned int a_flags;
6057 struct ucred *a_incred;
6058 struct ucred *a_outcred;
6059 struct thread *a_fsizetd;
6061 #endif
6063 * TODO: FreeBSD will only call file system-specific copy_file_range() if both
6064 * files resides under the same mountpoint. In case of ZFS we want to be called
6065 * even is files are in different datasets (but on the same pools, but we need
6066 * to check that ourselves).
6068 static int
6069 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
6071 zfsvfs_t *outzfsvfs;
6072 struct vnode *invp = ap->a_invp;
6073 struct vnode *outvp = ap->a_outvp;
6074 struct mount *mp;
6075 int error;
6076 uint64_t len = *ap->a_lenp;
6078 if (!zfs_bclone_enabled) {
6079 mp = NULL;
6080 goto bad_write_fallback;
6084 * TODO: If offset/length is not aligned to recordsize, use
6085 * vn_generic_copy_file_range() on this fragment.
6086 * It would be better to do this after we lock the vnodes, but then we
6087 * need something else than vn_generic_copy_file_range().
6090 vn_start_write(outvp, &mp, V_WAIT);
6091 if (__predict_true(mp == outvp->v_mount)) {
6092 outzfsvfs = (zfsvfs_t *)mp->mnt_data;
6093 if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os),
6094 SPA_FEATURE_BLOCK_CLONING)) {
6095 goto bad_write_fallback;
6098 if (invp == outvp) {
6099 if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
6100 goto bad_write_fallback;
6102 } else {
6103 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
6104 __FreeBSD_version >= 1400086
6105 vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false,
6106 LK_EXCLUSIVE);
6107 #else
6108 vn_lock_pair(invp, false, outvp, false);
6109 #endif
6110 if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
6111 goto bad_locked_fallback;
6115 #ifdef MAC
6116 error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
6117 outvp);
6118 if (error != 0)
6119 goto out_locked;
6120 #endif
6122 error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
6123 ap->a_outoffp, &len, ap->a_outcred);
6124 if (error == EXDEV || error == EAGAIN || error == EINVAL ||
6125 error == EOPNOTSUPP)
6126 goto bad_locked_fallback;
6127 *ap->a_lenp = (size_t)len;
6128 out_locked:
6129 if (invp != outvp)
6130 VOP_UNLOCK(invp);
6131 VOP_UNLOCK(outvp);
6132 if (mp != NULL)
6133 vn_finished_write(mp);
6134 return (error);
6136 bad_locked_fallback:
6137 if (invp != outvp)
6138 VOP_UNLOCK(invp);
6139 VOP_UNLOCK(outvp);
6140 bad_write_fallback:
6141 if (mp != NULL)
6142 vn_finished_write(mp);
6143 error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
6144 ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
6145 ap->a_incred, ap->a_outcred, ap->a_fsizetd);
6146 return (error);
6149 struct vop_vector zfs_vnodeops;
6150 struct vop_vector zfs_fifoops;
6151 struct vop_vector zfs_shareops;
6153 struct vop_vector zfs_vnodeops = {
6154 .vop_default = &default_vnodeops,
6155 .vop_inactive = zfs_freebsd_inactive,
6156 .vop_need_inactive = zfs_freebsd_need_inactive,
6157 .vop_reclaim = zfs_freebsd_reclaim,
6158 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6159 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6160 .vop_access = zfs_freebsd_access,
6161 .vop_allocate = VOP_EINVAL,
6162 #if __FreeBSD_version >= 1400032
6163 .vop_deallocate = zfs_deallocate,
6164 #endif
6165 .vop_lookup = zfs_cache_lookup,
6166 .vop_cachedlookup = zfs_freebsd_cachedlookup,
6167 .vop_getattr = zfs_freebsd_getattr,
6168 .vop_setattr = zfs_freebsd_setattr,
6169 .vop_create = zfs_freebsd_create,
6170 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
6171 .vop_mkdir = zfs_freebsd_mkdir,
6172 .vop_readdir = zfs_freebsd_readdir,
6173 .vop_fsync = zfs_freebsd_fsync,
6174 .vop_open = zfs_freebsd_open,
6175 .vop_close = zfs_freebsd_close,
6176 .vop_rmdir = zfs_freebsd_rmdir,
6177 .vop_ioctl = zfs_freebsd_ioctl,
6178 .vop_link = zfs_freebsd_link,
6179 .vop_symlink = zfs_freebsd_symlink,
6180 .vop_readlink = zfs_freebsd_readlink,
6181 .vop_read = zfs_freebsd_read,
6182 .vop_write = zfs_freebsd_write,
6183 .vop_remove = zfs_freebsd_remove,
6184 .vop_rename = zfs_freebsd_rename,
6185 .vop_pathconf = zfs_freebsd_pathconf,
6186 .vop_bmap = zfs_freebsd_bmap,
6187 .vop_fid = zfs_freebsd_fid,
6188 .vop_getextattr = zfs_getextattr,
6189 .vop_deleteextattr = zfs_deleteextattr,
6190 .vop_setextattr = zfs_setextattr,
6191 .vop_listextattr = zfs_listextattr,
6192 .vop_getacl = zfs_freebsd_getacl,
6193 .vop_setacl = zfs_freebsd_setacl,
6194 .vop_aclcheck = zfs_freebsd_aclcheck,
6195 .vop_getpages = zfs_freebsd_getpages,
6196 .vop_putpages = zfs_freebsd_putpages,
6197 .vop_vptocnp = zfs_vptocnp,
6198 .vop_lock1 = vop_lock,
6199 .vop_unlock = vop_unlock,
6200 .vop_islocked = vop_islocked,
6201 #if __FreeBSD_version >= 1400043
6202 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6203 #endif
6204 .vop_copy_file_range = zfs_freebsd_copy_file_range,
6206 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6208 struct vop_vector zfs_fifoops = {
6209 .vop_default = &fifo_specops,
6210 .vop_fsync = zfs_freebsd_fsync,
6211 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6212 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6213 .vop_access = zfs_freebsd_access,
6214 .vop_getattr = zfs_freebsd_getattr,
6215 .vop_inactive = zfs_freebsd_inactive,
6216 .vop_read = VOP_PANIC,
6217 .vop_reclaim = zfs_freebsd_reclaim,
6218 .vop_setattr = zfs_freebsd_setattr,
6219 .vop_write = VOP_PANIC,
6220 .vop_pathconf = zfs_freebsd_pathconf,
6221 .vop_fid = zfs_freebsd_fid,
6222 .vop_getacl = zfs_freebsd_getacl,
6223 .vop_setacl = zfs_freebsd_setacl,
6224 .vop_aclcheck = zfs_freebsd_aclcheck,
6225 #if __FreeBSD_version >= 1400043
6226 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6227 #endif
6229 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6232 * special share hidden files vnode operations template
6234 struct vop_vector zfs_shareops = {
6235 .vop_default = &default_vnodeops,
6236 .vop_fplookup_vexec = VOP_EAGAIN,
6237 .vop_fplookup_symlink = VOP_EAGAIN,
6238 .vop_access = zfs_freebsd_access,
6239 .vop_inactive = zfs_freebsd_inactive,
6240 .vop_reclaim = zfs_freebsd_reclaim,
6241 .vop_fid = zfs_freebsd_fid,
6242 .vop_pathconf = zfs_freebsd_pathconf,
6243 #if __FreeBSD_version >= 1400043
6244 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6245 #endif
6247 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6249 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
6250 "Use legacy ZFS xattr naming for writing new user namespace xattrs");