4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
25 * Copyright 2017 Nexenta Systems, Inc.
28 #include <sys/types.h>
29 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/sysmacros.h>
33 #include <sys/resource.h>
35 #include <sys/vnode.h>
39 #include <sys/cmn_err.h>
40 #include <sys/errno.h>
42 #include <sys/unistd.h>
43 #include <sys/sunddi.h>
44 #include <sys/random.h>
45 #include <sys/policy.h>
46 #include <sys/condvar.h>
47 #include <sys/callb.h>
49 #include <sys/zfs_dir.h>
50 #include <sys/zfs_acl.h>
51 #include <sys/fs/zfs.h>
54 #include <sys/atomic.h>
55 #include <sys/zfs_ctldir.h>
56 #include <sys/zfs_fuid.h>
58 #include <sys/zfs_sa.h>
59 #include <sys/dmu_objset.h>
60 #include <sys/dsl_dir.h>
62 #include <sys/ccompat.h>
65 * zfs_match_find() is used by zfs_dirent_lookup() to perform zap lookups
66 * of names after deciding which is the appropriate lookup interface.
69 zfs_match_find(zfsvfs_t
*zfsvfs
, znode_t
*dzp
, const char *name
,
70 matchtype_t mt
, uint64_t *zoid
)
77 * In the non-mixed case we only expect there would ever
78 * be one match, but we need to use the normalizing lookup.
80 error
= zap_lookup_norm(zfsvfs
->z_os
, dzp
->z_id
, name
, 8, 1,
81 zoid
, mt
, NULL
, 0, NULL
);
83 error
= zap_lookup(zfsvfs
->z_os
, dzp
->z_id
, name
, 8, 1, zoid
);
85 *zoid
= ZFS_DIRENT_OBJ(*zoid
);
91 * Look up a directory entry under a locked vnode.
92 * dvp being locked gives us a guarantee that there are no concurrent
93 * modification of the directory and, thus, if a node can be found in
94 * the directory, then it must not be unlinked.
97 * dzp - znode for directory
98 * name - name of entry to lock
99 * flag - ZNEW: if the entry already exists, fail with EEXIST.
100 * ZEXISTS: if the entry does not exist, fail with ENOENT.
101 * ZXATTR: we want dzp's xattr directory
104 * zpp - pointer to the znode for the entry (NULL if there isn't one)
106 * Return value: 0 on success or errno on failure.
108 * NOTE: Always checks for, and rejects, '.' and '..'.
111 zfs_dirent_lookup(znode_t
*dzp
, const char *name
, znode_t
**zpp
, int flag
)
113 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
119 if (zfsvfs
->z_replay
== B_FALSE
)
120 ASSERT_VOP_LOCKED(ZTOV(dzp
), __func__
);
125 * Verify that we are not trying to lock '.', '..', or '.zfs'
127 if (name
[0] == '.' &&
128 (((name
[1] == '\0') || (name
[1] == '.' && name
[2] == '\0')) ||
129 (zfs_has_ctldir(dzp
) && strcmp(name
, ZFS_CTLDIR_NAME
) == 0)))
130 return (SET_ERROR(EEXIST
));
133 * Case sensitivity and normalization preferences are set when
134 * the file system is created. These are stored in the
135 * zfsvfs->z_case and zfsvfs->z_norm fields. These choices
136 * affect how we perform zap lookups.
138 * When matching we may need to normalize & change case according to
141 * Note that a normalized match is necessary for a case insensitive
142 * filesystem when the lookup request is not exact because normalization
143 * can fold case independent of normalizing code point sequences.
145 * See the table above zfs_dropname().
147 if (zfsvfs
->z_norm
!= 0) {
151 * Determine if the match needs to honor the case specified in
152 * lookup, and if so keep track of that so that during
153 * normalization we don't fold case.
155 if (zfsvfs
->z_case
== ZFS_CASE_MIXED
) {
161 * Only look in or update the DNLC if we are looking for the
162 * name on a file system that does not require normalization
163 * or case folding. We can also look there if we happen to be
164 * on a non-normalizing, mixed sensitivity file system IF we
165 * are looking for the exact name.
167 * NB: we do not need to worry about this flag for ZFS_CASE_SENSITIVE
168 * because in that case MT_EXACT and MT_FIRST should produce exactly
172 if (dzp
->z_unlinked
&& !(flag
& ZXATTR
))
175 error
= sa_lookup(dzp
->z_sa_hdl
, SA_ZPL_XATTR(zfsvfs
), &zoid
,
178 error
= (zoid
== 0 ? ENOENT
: 0);
180 error
= zfs_match_find(zfsvfs
, dzp
, name
, mt
, &zoid
);
183 if (error
!= ENOENT
|| (flag
& ZEXISTS
)) {
188 return (SET_ERROR(EEXIST
));
190 error
= zfs_zget(zfsvfs
, zoid
, &zp
);
193 ASSERT(!zp
->z_unlinked
);
201 zfs_dd_lookup(znode_t
*dzp
, znode_t
**zpp
)
203 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
209 if (zfsvfs
->z_replay
== B_FALSE
)
210 ASSERT_VOP_LOCKED(ZTOV(dzp
), __func__
);
215 if ((error
= sa_lookup(dzp
->z_sa_hdl
,
216 SA_ZPL_PARENT(zfsvfs
), &parent
, sizeof (parent
))) != 0)
219 error
= zfs_zget(zfsvfs
, parent
, &zp
);
226 zfs_dirlook(znode_t
*dzp
, const char *name
, znode_t
**zpp
)
228 zfsvfs_t
*zfsvfs __unused
= dzp
->z_zfsvfs
;
233 if (zfsvfs
->z_replay
== B_FALSE
)
234 ASSERT_VOP_LOCKED(ZTOV(dzp
), __func__
);
237 return (SET_ERROR(ENOENT
));
239 if (name
[0] == 0 || (name
[0] == '.' && name
[1] == 0)) {
241 } else if (name
[0] == '.' && name
[1] == '.' && name
[2] == 0) {
242 error
= zfs_dd_lookup(dzp
, &zp
);
246 error
= zfs_dirent_lookup(dzp
, name
, &zp
, ZEXISTS
);
248 dzp
->z_zn_prefetch
= B_TRUE
; /* enable prefetching */
256 * unlinked Set (formerly known as the "delete queue") Error Handling
258 * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
259 * don't specify the name of the entry that we will be manipulating. We
260 * also fib and say that we won't be adding any new entries to the
261 * unlinked set, even though we might (this is to lower the minimum file
262 * size that can be deleted in a full filesystem). So on the small
263 * chance that the nlink list is using a fat zap (ie. has more than
264 * 2000 entries), we *may* not pre-read a block that's needed.
265 * Therefore it is remotely possible for some of the assertions
266 * regarding the unlinked set below to fail due to i/o error. On a
267 * nondebug system, this will result in the space being leaked.
270 zfs_unlinked_add(znode_t
*zp
, dmu_tx_t
*tx
)
272 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
274 ASSERT(zp
->z_unlinked
);
275 ASSERT3U(zp
->z_links
, ==, 0);
277 VERIFY0(zap_add_int(zfsvfs
->z_os
, zfsvfs
->z_unlinkedobj
, zp
->z_id
, tx
));
279 dataset_kstats_update_nunlinks_kstat(&zfsvfs
->z_kstat
, 1);
283 * Clean up any znodes that had no links when we either crashed or
284 * (force) umounted the file system.
287 zfs_unlinked_drain(zfsvfs_t
*zfsvfs
)
290 zap_attribute_t
*zap
;
291 dmu_object_info_t doi
;
297 * Iterate over the contents of the unlinked set.
299 zap
= zap_attribute_alloc();
300 for (zap_cursor_init(&zc
, zfsvfs
->z_os
, zfsvfs
->z_unlinkedobj
);
301 zap_cursor_retrieve(&zc
, zap
) == 0;
302 zap_cursor_advance(&zc
)) {
305 * See what kind of object we have in list
308 error
= dmu_object_info(zfsvfs
->z_os
,
309 zap
->za_first_integer
, &doi
);
313 ASSERT((doi
.doi_type
== DMU_OT_PLAIN_FILE_CONTENTS
) ||
314 (doi
.doi_type
== DMU_OT_DIRECTORY_CONTENTS
));
316 * We need to re-mark these list entries for deletion,
317 * so we pull them back into core and set zp->z_unlinked.
319 error
= zfs_zget(zfsvfs
, zap
->za_first_integer
, &zp
);
322 * We may pick up znodes that are already marked for deletion.
323 * This could happen during the purge of an extended attribute
324 * directory. All we need to do is skip over them, since they
325 * are already in the system marked z_unlinked.
330 vn_lock(ZTOV(zp
), LK_EXCLUSIVE
| LK_RETRY
);
333 * Due to changes in zfs_rmnode we need to make sure the
334 * link count is set to zero here.
336 if (zp
->z_links
!= 0) {
337 tx
= dmu_tx_create(zfsvfs
->z_os
);
338 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_FALSE
);
339 error
= dmu_tx_assign(tx
, TXG_WAIT
);
346 VERIFY0(sa_update(zp
->z_sa_hdl
, SA_ZPL_LINKS(zfsvfs
),
347 &zp
->z_links
, sizeof (zp
->z_links
), tx
));
351 zp
->z_unlinked
= B_TRUE
;
354 zap_cursor_fini(&zc
);
355 zap_attribute_free(zap
);
359 * Delete the entire contents of a directory. Return a count
360 * of the number of entries that could not be deleted. If we encounter
361 * an error, return a count of at least one so that the directory stays
362 * in the unlinked set.
364 * NOTE: this function assumes that the directory is inactive,
365 * so there is no need to lock its entries before deletion.
366 * Also, it assumes the directory contents is *only* regular
370 zfs_purgedir(znode_t
*dzp
)
373 zap_attribute_t
*zap
;
376 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
380 zap
= zap_attribute_alloc();
381 for (zap_cursor_init(&zc
, zfsvfs
->z_os
, dzp
->z_id
);
382 (error
= zap_cursor_retrieve(&zc
, zap
)) == 0;
383 zap_cursor_advance(&zc
)) {
384 error
= zfs_zget(zfsvfs
,
385 ZFS_DIRENT_OBJ(zap
->za_first_integer
), &xzp
);
391 vn_lock(ZTOV(xzp
), LK_EXCLUSIVE
| LK_RETRY
);
392 ASSERT((ZTOV(xzp
)->v_type
== VREG
) ||
393 (ZTOV(xzp
)->v_type
== VLNK
));
395 tx
= dmu_tx_create(zfsvfs
->z_os
);
396 dmu_tx_hold_sa(tx
, dzp
->z_sa_hdl
, B_FALSE
);
397 dmu_tx_hold_zap(tx
, dzp
->z_id
, FALSE
, zap
->za_name
);
398 dmu_tx_hold_sa(tx
, xzp
->z_sa_hdl
, B_FALSE
);
399 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
400 /* Is this really needed ? */
401 zfs_sa_upgrade_txholds(tx
, xzp
);
402 dmu_tx_mark_netfree(tx
);
403 error
= dmu_tx_assign(tx
, TXG_WAIT
);
411 error
= zfs_link_destroy(dzp
, zap
->za_name
, xzp
, tx
, 0, NULL
);
418 zap_cursor_fini(&zc
);
419 zap_attribute_free(zap
);
425 extern taskq_t
*zfsvfs_taskq
;
428 zfs_rmnode(znode_t
*zp
)
430 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
431 objset_t
*os
= zfsvfs
->z_os
;
433 uint64_t z_id
= zp
->z_id
;
439 ASSERT3U(zp
->z_links
, ==, 0);
440 if (zfsvfs
->z_replay
== B_FALSE
)
441 ASSERT_VOP_ELOCKED(ZTOV(zp
), __func__
);
444 * If this is an attribute directory, purge its contents.
446 if (ZTOV(zp
) != NULL
&& ZTOV(zp
)->v_type
== VDIR
&&
447 (zp
->z_pflags
& ZFS_XATTR
)) {
448 if (zfs_purgedir(zp
) != 0) {
450 * Not enough space to delete some xattrs.
451 * Leave it in the unlinked set.
453 ZFS_OBJ_HOLD_ENTER(zfsvfs
, z_id
);
454 zfs_znode_dmu_fini(zp
);
456 ZFS_OBJ_HOLD_EXIT(zfsvfs
, z_id
);
461 * Free up all the data in the file. We don't do this for
462 * XATTR directories because we need truncate and remove to be
463 * in the same tx, like in zfs_znode_delete(). Otherwise, if
464 * we crash here we'll end up with an inconsistent truncated
465 * zap object in the delete queue. Note a truncated file is
466 * harmless since it only contains user data.
468 error
= dmu_free_long_range(os
, zp
->z_id
, 0, DMU_OBJECT_END
);
471 * Not enough space or we were interrupted by unmount.
472 * Leave the file in the unlinked set.
474 ZFS_OBJ_HOLD_ENTER(zfsvfs
, z_id
);
475 zfs_znode_dmu_fini(zp
);
477 ZFS_OBJ_HOLD_EXIT(zfsvfs
, z_id
);
483 * If the file has extended attributes, we're going to unlink
486 error
= sa_lookup(zp
->z_sa_hdl
, SA_ZPL_XATTR(zfsvfs
),
487 &xattr_obj
, sizeof (xattr_obj
));
491 acl_obj
= zfs_external_acl(zp
);
494 * Set up the final transaction.
496 tx
= dmu_tx_create(os
);
497 dmu_tx_hold_free(tx
, zp
->z_id
, 0, DMU_OBJECT_END
);
498 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, FALSE
, NULL
);
500 dmu_tx_hold_zap(tx
, zfsvfs
->z_unlinkedobj
, TRUE
, NULL
);
502 dmu_tx_hold_free(tx
, acl_obj
, 0, DMU_OBJECT_END
);
504 zfs_sa_upgrade_txholds(tx
, zp
);
505 error
= dmu_tx_assign(tx
, TXG_WAIT
);
508 * Not enough space to delete the file. Leave it in the
509 * unlinked set, leaking it until the fs is remounted (at
510 * which point we'll call zfs_unlinked_drain() to process it).
513 ZFS_OBJ_HOLD_ENTER(zfsvfs
, z_id
);
514 zfs_znode_dmu_fini(zp
);
516 ZFS_OBJ_HOLD_EXIT(zfsvfs
, z_id
);
521 * FreeBSD's implementation of zfs_zget requires a vnode to back it.
522 * This means that we could end up calling into getnewvnode while
523 * calling zfs_rmnode as a result of a prior call to getnewvnode
524 * trying to clear vnodes out of the cache. If this repeats we can
525 * recurse enough that we overflow our stack. To avoid this, we
526 * avoid calling zfs_zget on the xattr znode and instead simply add
527 * it to the unlinked set and schedule a call to zfs_unlinked_drain.
530 /* Add extended attribute directory to the unlinked set. */
532 zap_add_int(os
, zfsvfs
->z_unlinkedobj
, xattr_obj
, tx
));
535 mutex_enter(&os
->os_dsl_dataset
->ds_dir
->dd_activity_lock
);
537 /* Remove this znode from the unlinked set */
539 zap_remove_int(os
, zfsvfs
->z_unlinkedobj
, zp
->z_id
, tx
));
541 if (zap_count(os
, zfsvfs
->z_unlinkedobj
, &count
) == 0 && count
== 0) {
542 cv_broadcast(&os
->os_dsl_dataset
->ds_dir
->dd_activity_cv
);
545 mutex_exit(&os
->os_dsl_dataset
->ds_dir
->dd_activity_lock
);
547 dataset_kstats_update_nunlinked_kstat(&zfsvfs
->z_kstat
, 1);
549 zfs_znode_delete(zp
, tx
);
556 * We're using the FreeBSD taskqueue API here instead of
557 * the Solaris taskq API since the FreeBSD API allows for a
558 * task to be enqueued multiple times but executed once.
560 taskqueue_enqueue(zfsvfs_taskq
->tq_queue
,
561 &zfsvfs
->z_unlinked_drain_task
);
566 zfs_dirent(znode_t
*zp
, uint64_t mode
)
568 uint64_t de
= zp
->z_id
;
570 if (zp
->z_zfsvfs
->z_version
>= ZPL_VERSION_DIRENT_TYPE
)
571 de
|= IFTODT(mode
) << 60;
576 * Link zp into dzp. Can only fail if zp has been unlinked.
579 zfs_link_create(znode_t
*dzp
, const char *name
, znode_t
*zp
, dmu_tx_t
*tx
,
582 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
583 vnode_t
*vp
= ZTOV(zp
);
585 int zp_is_dir
= (vp
->v_type
== VDIR
);
586 sa_bulk_attr_t bulk
[5];
587 uint64_t mtime
[2], ctime
[2];
591 if (zfsvfs
->z_replay
== B_FALSE
) {
592 ASSERT_VOP_ELOCKED(ZTOV(dzp
), __func__
);
593 ASSERT_VOP_ELOCKED(ZTOV(zp
), __func__
);
596 if (dzp
->z_links
>= ZFS_LINK_MAX
)
597 return (SET_ERROR(EMLINK
));
599 if (!(flag
& ZRENAMING
)) {
600 if (zp
->z_unlinked
) { /* no new links to unlinked zp */
601 ASSERT(!(flag
& (ZNEW
| ZEXISTS
)));
602 return (SET_ERROR(ENOENT
));
604 if (zp
->z_links
>= ZFS_LINK_MAX
- zp_is_dir
) {
605 return (SET_ERROR(EMLINK
));
608 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_LINKS(zfsvfs
), NULL
,
609 &zp
->z_links
, sizeof (zp
->z_links
));
612 ASSERT(!zp
->z_unlinked
);
614 value
= zfs_dirent(zp
, zp
->z_mode
);
615 error
= zap_add(zp
->z_zfsvfs
->z_os
, dzp
->z_id
, name
,
619 * zap_add could fail to add the entry if it exceeds the capacity of the
620 * leaf-block and zap_leaf_split() failed to help.
621 * The caller of this routine is responsible for failing the transaction
622 * which will rollback the SA updates done above.
625 if (!(flag
& ZRENAMING
) && !(flag
& ZNEW
))
631 * If we added a longname activate the SPA_FEATURE_LONGNAME.
633 if (strlen(name
) >= ZAP_MAXNAMELEN
) {
634 dsl_dataset_t
*ds
= dmu_objset_ds(zfsvfs
->z_os
);
635 ds
->ds_feature_activation
[SPA_FEATURE_LONGNAME
] =
639 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_PARENT(zfsvfs
), NULL
,
640 &dzp
->z_id
, sizeof (dzp
->z_id
));
641 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
), NULL
,
642 &zp
->z_pflags
, sizeof (zp
->z_pflags
));
644 if (!(flag
& ZNEW
)) {
645 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
,
646 ctime
, sizeof (ctime
));
647 zfs_tstamp_update_setup(zp
, STATE_CHANGED
, mtime
,
650 error
= sa_bulk_update(zp
->z_sa_hdl
, bulk
, count
, tx
);
654 dzp
->z_links
+= zp_is_dir
;
656 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_SIZE(zfsvfs
), NULL
,
657 &dzp
->z_size
, sizeof (dzp
->z_size
));
658 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_LINKS(zfsvfs
), NULL
,
659 &dzp
->z_links
, sizeof (dzp
->z_links
));
660 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
), NULL
,
661 mtime
, sizeof (mtime
));
662 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
), NULL
,
663 ctime
, sizeof (ctime
));
664 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
), NULL
,
665 &dzp
->z_pflags
, sizeof (dzp
->z_pflags
));
666 zfs_tstamp_update_setup(dzp
, CONTENT_MODIFIED
, mtime
, ctime
);
667 error
= sa_bulk_update(dzp
->z_sa_hdl
, bulk
, count
, tx
);
673 * The match type in the code for this function should conform to:
675 * ------------------------------------------------------------------------
676 * fs type | z_norm | lookup type | match type
677 * ---------|-------------|-------------|----------------------------------
678 * CS !norm | 0 | 0 | 0 (exact)
679 * CS norm | formX | 0 | MT_NORMALIZE
680 * CI !norm | upper | !ZCIEXACT | MT_NORMALIZE
681 * CI !norm | upper | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
682 * CI norm | upper|formX | !ZCIEXACT | MT_NORMALIZE
683 * CI norm | upper|formX | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
684 * CM !norm | upper | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
685 * CM !norm | upper | ZCILOOK | MT_NORMALIZE
686 * CM norm | upper|formX | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
687 * CM norm | upper|formX | ZCILOOK | MT_NORMALIZE
690 * CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
691 * upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
692 * formX = unicode normalization form set on fs creation
695 zfs_dropname(znode_t
*dzp
, const char *name
, znode_t
*zp
, dmu_tx_t
*tx
,
700 if (zp
->z_zfsvfs
->z_norm
) {
701 matchtype_t mt
= MT_NORMALIZE
;
703 if (zp
->z_zfsvfs
->z_case
== ZFS_CASE_MIXED
) {
707 error
= zap_remove_norm(zp
->z_zfsvfs
->z_os
, dzp
->z_id
,
710 error
= zap_remove(zp
->z_zfsvfs
->z_os
, dzp
->z_id
, name
, tx
);
717 * Unlink zp from dzp, and mark zp for deletion if this was the last link.
718 * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
719 * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
720 * If it's non-NULL, we use it to indicate whether the znode needs deletion,
721 * and it's the caller's job to do it.
724 zfs_link_destroy(znode_t
*dzp
, const char *name
, znode_t
*zp
, dmu_tx_t
*tx
,
725 int flag
, boolean_t
*unlinkedp
)
727 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
728 vnode_t
*vp
= ZTOV(zp
);
729 int zp_is_dir
= (vp
->v_type
== VDIR
);
730 boolean_t unlinked
= B_FALSE
;
731 sa_bulk_attr_t bulk
[5];
732 uint64_t mtime
[2], ctime
[2];
736 if (zfsvfs
->z_replay
== B_FALSE
) {
737 ASSERT_VOP_ELOCKED(ZTOV(dzp
), __func__
);
738 ASSERT_VOP_ELOCKED(ZTOV(zp
), __func__
);
740 if (!(flag
& ZRENAMING
)) {
742 if (zp_is_dir
&& !zfs_dirempty(zp
))
743 return (SET_ERROR(ENOTEMPTY
));
746 * If we get here, we are going to try to remove the object.
747 * First try removing the name from the directory; if that
748 * fails, return the error.
750 error
= zfs_dropname(dzp
, name
, zp
, tx
, flag
);
755 if (zp
->z_links
<= zp_is_dir
) {
756 zfs_panic_recover("zfs: link count on vnode %p is %u, "
757 "should be at least %u", zp
->z_vnode
,
760 zp
->z_links
= zp_is_dir
+ 1;
762 if (--zp
->z_links
== zp_is_dir
) {
763 zp
->z_unlinked
= B_TRUE
;
767 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
),
768 NULL
, &ctime
, sizeof (ctime
));
769 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
),
770 NULL
, &zp
->z_pflags
, sizeof (zp
->z_pflags
));
771 zfs_tstamp_update_setup(zp
, STATE_CHANGED
, mtime
,
774 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_LINKS(zfsvfs
),
775 NULL
, &zp
->z_links
, sizeof (zp
->z_links
));
776 error
= sa_bulk_update(zp
->z_sa_hdl
, bulk
, count
, tx
);
780 ASSERT(!zp
->z_unlinked
);
781 error
= zfs_dropname(dzp
, name
, zp
, tx
, flag
);
786 dzp
->z_size
--; /* one dirent removed */
787 dzp
->z_links
-= zp_is_dir
; /* ".." link from zp */
788 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_LINKS(zfsvfs
),
789 NULL
, &dzp
->z_links
, sizeof (dzp
->z_links
));
790 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_SIZE(zfsvfs
),
791 NULL
, &dzp
->z_size
, sizeof (dzp
->z_size
));
792 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_CTIME(zfsvfs
),
793 NULL
, ctime
, sizeof (ctime
));
794 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_MTIME(zfsvfs
),
795 NULL
, mtime
, sizeof (mtime
));
796 SA_ADD_BULK_ATTR(bulk
, count
, SA_ZPL_FLAGS(zfsvfs
),
797 NULL
, &dzp
->z_pflags
, sizeof (dzp
->z_pflags
));
798 zfs_tstamp_update_setup(dzp
, CONTENT_MODIFIED
, mtime
, ctime
);
799 error
= sa_bulk_update(dzp
->z_sa_hdl
, bulk
, count
, tx
);
802 if (unlinkedp
!= NULL
)
803 *unlinkedp
= unlinked
;
805 zfs_unlinked_add(zp
, tx
);
811 * Indicate whether the directory is empty.
814 zfs_dirempty(znode_t
*dzp
)
816 return (dzp
->z_size
== 2);
820 zfs_make_xattrdir(znode_t
*zp
, vattr_t
*vap
, znode_t
**xvpp
, cred_t
*cr
)
822 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
826 zfs_acl_ids_t acl_ids
;
827 boolean_t fuid_dirtied
;
828 uint64_t parent __maybe_unused
;
832 if ((error
= zfs_acl_ids_create(zp
, IS_XATTR
, vap
, cr
, NULL
,
833 &acl_ids
, NULL
)) != 0)
835 if (zfs_acl_ids_overquota(zfsvfs
, &acl_ids
, 0)) {
836 zfs_acl_ids_free(&acl_ids
);
837 return (SET_ERROR(EDQUOT
));
840 getnewvnode_reserve();
842 tx
= dmu_tx_create(zfsvfs
->z_os
);
843 dmu_tx_hold_sa_create(tx
, acl_ids
.z_aclp
->z_acl_bytes
+
844 ZFS_SA_BASE_ATTR_SIZE
);
845 dmu_tx_hold_sa(tx
, zp
->z_sa_hdl
, B_TRUE
);
846 dmu_tx_hold_zap(tx
, DMU_NEW_OBJECT
, FALSE
, NULL
);
847 fuid_dirtied
= zfsvfs
->z_fuid_dirty
;
849 zfs_fuid_txhold(zfsvfs
, tx
);
850 error
= dmu_tx_assign(tx
, TXG_WAIT
);
852 zfs_acl_ids_free(&acl_ids
);
854 getnewvnode_drop_reserve();
857 zfs_mknode(zp
, vap
, tx
, cr
, IS_XATTR
, &xzp
, &acl_ids
);
860 zfs_fuid_sync(zfsvfs
, tx
);
862 ASSERT0(sa_lookup(xzp
->z_sa_hdl
, SA_ZPL_PARENT(zfsvfs
), &parent
,
864 ASSERT3U(parent
, ==, zp
->z_id
);
866 VERIFY0(sa_update(zp
->z_sa_hdl
, SA_ZPL_XATTR(zfsvfs
), &xzp
->z_id
,
867 sizeof (xzp
->z_id
), tx
));
869 zfs_log_create(zfsvfs
->z_log
, tx
, TX_MKXATTR
, zp
, xzp
, "", NULL
,
870 acl_ids
.z_fuidp
, vap
);
872 zfs_acl_ids_free(&acl_ids
);
875 getnewvnode_drop_reserve();
883 * Return a znode for the extended attribute directory for zp.
884 * ** If the directory does not already exist, it is created **
886 * IN: zp - znode to obtain attribute directory from
887 * cr - credentials of caller
888 * flags - flags from the VOP_LOOKUP call
890 * OUT: xzpp - pointer to extended attribute znode
892 * RETURN: 0 on success
893 * error number on failure
896 zfs_get_xattrdir(znode_t
*zp
, znode_t
**xzpp
, cred_t
*cr
, int flags
)
898 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
903 error
= zfs_dirent_lookup(zp
, "", &xzp
, ZXATTR
);
913 if (!(flags
& CREATE_XATTR_DIR
))
914 return (SET_ERROR(ENOATTR
));
916 if (zfsvfs
->z_vfs
->vfs_flag
& VFS_RDONLY
) {
917 return (SET_ERROR(EROFS
));
921 * The ability to 'create' files in an attribute
922 * directory comes from the write_xattr permission on the base file.
924 * The ability to 'search' an attribute directory requires
925 * read_xattr permission on the base file.
927 * Once in a directory the ability to read/write attributes
928 * is controlled by the permissions on the attribute file.
930 va
.va_mask
= AT_MODE
| AT_UID
| AT_GID
;
932 va
.va_mode
= S_IFDIR
| S_ISVTX
| 0777;
933 zfs_fuid_map_ids(zp
, cr
, &va
.va_uid
, &va
.va_gid
);
935 error
= zfs_make_xattrdir(zp
, &va
, xzpp
, cr
);
937 if (error
== ERESTART
) {
938 /* NB: we already did dmu_tx_wait() if necessary */
942 VOP_UNLOCK(ZTOV(*xzpp
));
948 * Decide whether it is okay to remove within a sticky directory.
950 * In sticky directories, write access is not sufficient;
951 * you can remove entries from a directory only if:
953 * you own the directory,
955 * the entry is a plain file and you have write access,
956 * or you are privileged (checked in secpolicy...).
958 * The function returns 0 if remove access is granted.
961 zfs_sticky_remove_access(znode_t
*zdp
, znode_t
*zp
, cred_t
*cr
)
966 zfsvfs_t
*zfsvfs
= zdp
->z_zfsvfs
;
968 if (zdp
->z_zfsvfs
->z_replay
)
971 if ((zdp
->z_mode
& S_ISVTX
) == 0)
974 downer
= zfs_fuid_map_id(zfsvfs
, zdp
->z_uid
, cr
, ZFS_OWNER
);
975 fowner
= zfs_fuid_map_id(zfsvfs
, zp
->z_uid
, cr
, ZFS_OWNER
);
977 if ((uid
= crgetuid(cr
)) == downer
|| uid
== fowner
||
978 (ZTOV(zp
)->v_type
== VREG
&&
979 zfs_zaccess(zp
, ACE_WRITE_DATA
, 0, B_FALSE
, cr
, NULL
) == 0))
982 return (secpolicy_vnode_remove(ZTOV(zp
), cr
));