4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Portions Copyright 2007 Jeremy Teo */
29 #include <sys/types.h>
30 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/resource.h>
35 #include <sys/mntent.h>
36 #include <sys/u8_textprep.h>
37 #include <sys/dsl_dataset.h>
39 #include <sys/vnode.h>
42 #include <sys/errno.h>
43 #include <sys/unistd.h>
44 #include <sys/atomic.h>
45 #include <sys/zfs_dir.h>
46 #include <sys/zfs_acl.h>
47 #include <sys/zfs_ioctl.h>
48 #include <sys/zfs_rlock.h>
49 #include <sys/zfs_fuid.h>
50 #include <sys/fs/zfs.h>
51 #include <sys/kidmap.h>
55 #include <sys/refcount.h>
58 #include <sys/zfs_znode.h>
62 #if defined(_KERNEL) && defined(__NetBSD__)
63 #include <miscfs/specfs/specdev.h>
64 static const struct genfs_ops zfs_genfsops
= {
65 .gop_write
= genfs_compat_gop_write
,
70 extern int (**zfs_vnodeop_p
)(void *);
71 extern int (**zfs_fifoop_p
)(void *);
72 extern int (**zfs_specop_p
)(void *);
75 * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
76 * turned on when DEBUG is also defined.
83 #define ZNODE_STAT_ADD(stat) ((stat)++)
85 #define ZNODE_STAT_ADD(stat) /* nothing */
86 #endif /* ZNODE_STATS */
88 #define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3))
89 #define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1))
92 * Functions needed for userland (ie: libzpool) are not put under
93 * #ifdef_KERNEL; the rest of the functions have dependencies
94 * (such as VFS logic) that will not compile easily in userland.
97 static kmem_cache_t
*znode_cache
= NULL
;
101 znode_evict_error(dmu_buf_t
*dbuf
, void *user_ptr
)
104 * We should never drop all dbuf refs without first clearing
105 * the eviction callback.
107 panic("evicting znode %p\n", user_ptr
);
112 zfs_znode_cache_constructor(void *buf
, void *arg
, int kmflags
)
116 ASSERT(!POINTER_IS_VALID(zp
->z_zfsvfs
));
118 list_link_init(&zp
->z_link_node
);
120 mutex_init(&zp
->z_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
121 rw_init(&zp
->z_map_lock
, NULL
, RW_DEFAULT
, NULL
);
122 rw_init(&zp
->z_parent_lock
, NULL
, RW_DEFAULT
, NULL
);
123 rw_init(&zp
->z_name_lock
, NULL
, RW_DEFAULT
, NULL
);
124 mutex_init(&zp
->z_acl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
126 mutex_init(&zp
->z_range_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
127 avl_create(&zp
->z_range_avl
, zfs_range_compare
,
128 sizeof (rl_t
), offsetof(rl_t
, r_node
));
131 zp
->z_dirlocks
= NULL
;
137 zfs_znode_cache_destructor(void *buf
, void *arg
)
141 ASSERT(!POINTER_IS_VALID(zp
->z_zfsvfs
));
142 ASSERT(ZTOV(zp
) == NULL
);
144 ASSERT(!list_link_active(&zp
->z_link_node
));
145 mutex_destroy(&zp
->z_lock
);
146 rw_destroy(&zp
->z_map_lock
);
147 rw_destroy(&zp
->z_parent_lock
);
148 rw_destroy(&zp
->z_name_lock
);
149 mutex_destroy(&zp
->z_acl_lock
);
150 avl_destroy(&zp
->z_range_avl
);
151 mutex_destroy(&zp
->z_range_lock
);
153 ASSERT(zp
->z_dbuf
== NULL
);
154 ASSERT(zp
->z_dirlocks
== NULL
);
159 uint64_t zms_zfsvfs_invalid
;
160 uint64_t zms_zfsvfs_unmounted
;
161 uint64_t zms_zfsvfs_recheck_invalid
;
162 uint64_t zms_obj_held
;
163 uint64_t zms_vnode_locked
;
164 uint64_t zms_not_only_dnlc
;
166 #endif /* ZNODE_STATS */
169 zfs_znode_move_impl(znode_t
*ozp
, znode_t
*nzp
)
174 nzp
->z_zfsvfs
= ozp
->z_zfsvfs
;
178 nzp
->z_vnode
= ozp
->z_vnode
;
179 ozp
->z_vnode
= vp
; /* let destructor free the overwritten vnode */
180 ZTOV(ozp
)->v_data
= ozp
;
181 ZTOV(nzp
)->v_data
= nzp
;
183 nzp
->z_id
= ozp
->z_id
;
184 ASSERT(ozp
->z_dirlocks
== NULL
); /* znode not in use */
185 ASSERT(avl_numnodes(&ozp
->z_range_avl
) == 0);
186 nzp
->z_unlinked
= ozp
->z_unlinked
;
187 nzp
->z_atime_dirty
= ozp
->z_atime_dirty
;
188 nzp
->z_zn_prefetch
= ozp
->z_zn_prefetch
;
189 nzp
->z_blksz
= ozp
->z_blksz
;
190 nzp
->z_seq
= ozp
->z_seq
;
191 nzp
->z_mapcnt
= ozp
->z_mapcnt
;
192 nzp
->z_last_itx
= ozp
->z_last_itx
;
193 nzp
->z_gen
= ozp
->z_gen
;
194 nzp
->z_sync_cnt
= ozp
->z_sync_cnt
;
195 nzp
->z_phys
= ozp
->z_phys
;
196 nzp
->z_dbuf
= ozp
->z_dbuf
;
198 /* Update back pointers. */
199 (void) dmu_buf_update_user(nzp
->z_dbuf
, ozp
, nzp
, &nzp
->z_phys
,
203 * Invalidate the original znode by clearing fields that provide a
204 * pointer back to the znode. Set the low bit of the vfs pointer to
205 * ensure that zfs_znode_move() recognizes the znode as invalid in any
206 * subsequent callback.
209 POINTER_INVALIDATE(&ozp
->z_zfsvfs
);
213 * Wrapper function for ZFS_ENTER that returns 0 if successful and otherwise
214 * returns a non-zero error code.
217 zfs_enter(zfsvfs_t
*zfsvfs
)
226 zfs_znode_move(void *buf
, void *newbuf
, size_t size
, void *arg
)
228 znode_t
*ozp
= buf
, *nzp
= newbuf
;
233 * The znode is on the file system's list of known znodes if the vfs
234 * pointer is valid. We set the low bit of the vfs pointer when freeing
235 * the znode to invalidate it, and the memory patterns written by kmem
236 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
237 * created znode sets the vfs pointer last of all to indicate that the
238 * znode is known and in a valid state to be moved by this function.
240 zfsvfs
= ozp
->z_zfsvfs
;
241 if (!POINTER_IS_VALID(zfsvfs
)) {
242 ZNODE_STAT_ADD(znode_move_stats
.zms_zfsvfs_invalid
);
243 return (KMEM_CBRC_DONT_KNOW
);
247 * Ensure that the filesystem is not unmounted during the move.
249 if (zfs_enter(zfsvfs
) != 0) { /* ZFS_ENTER */
250 ZNODE_STAT_ADD(znode_move_stats
.zms_zfsvfs_unmounted
);
251 return (KMEM_CBRC_DONT_KNOW
);
254 mutex_enter(&zfsvfs
->z_znodes_lock
);
256 * Recheck the vfs pointer in case the znode was removed just before
257 * acquiring the lock.
259 if (zfsvfs
!= ozp
->z_zfsvfs
) {
260 mutex_exit(&zfsvfs
->z_znodes_lock
);
262 ZNODE_STAT_ADD(znode_move_stats
.zms_zfsvfs_recheck_invalid
);
263 return (KMEM_CBRC_DONT_KNOW
);
267 * At this point we know that as long as we hold z_znodes_lock, the
268 * znode cannot be freed and fields within the znode can be safely
269 * accessed. Now, prevent a race with zfs_zget().
271 if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs
, ozp
->z_id
) == 0) {
272 mutex_exit(&zfsvfs
->z_znodes_lock
);
274 ZNODE_STAT_ADD(znode_move_stats
.zms_obj_held
);
275 return (KMEM_CBRC_LATER
);
279 if (mutex_tryenter(&vp
->v_lock
) == 0) {
280 ZFS_OBJ_HOLD_EXIT(zfsvfs
, ozp
->z_id
);
281 mutex_exit(&zfsvfs
->z_znodes_lock
);
283 ZNODE_STAT_ADD(znode_move_stats
.zms_vnode_locked
);
284 return (KMEM_CBRC_LATER
);
287 /* Only move znodes that are referenced _only_ by the DNLC. */
288 if (vp
->v_count
!= 1 || !vn_in_dnlc(vp
)) {
289 mutex_exit(&vp
->v_lock
);
290 ZFS_OBJ_HOLD_EXIT(zfsvfs
, ozp
->z_id
);
291 mutex_exit(&zfsvfs
->z_znodes_lock
);
293 ZNODE_STAT_ADD(znode_move_stats
.zms_not_only_dnlc
);
294 return (KMEM_CBRC_LATER
);
298 * The znode is known and in a valid state to move. We're holding the
299 * locks needed to execute the critical section.
301 zfs_znode_move_impl(ozp
, nzp
);
302 mutex_exit(&vp
->v_lock
);
303 ZFS_OBJ_HOLD_EXIT(zfsvfs
, ozp
->z_id
);
305 list_link_replace(&ozp
->z_link_node
, &nzp
->z_link_node
);
306 mutex_exit(&zfsvfs
->z_znodes_lock
);
309 return (KMEM_CBRC_YES
);
311 #endif /* !__NetBSD__ */
319 ASSERT(znode_cache
== NULL
);
320 znode_cache
= kmem_cache_create("zfs_znode_cache",
321 sizeof (znode_t
), 0, zfs_znode_cache_constructor
,
322 zfs_znode_cache_destructor
, NULL
, NULL
, NULL
, 0);
333 kmem_cache_destroy(znode_cache
);
338 struct vnodeops
*zfs_dvnodeops
;
339 struct vnodeops
*zfs_fvnodeops
;
340 struct vnodeops
*zfs_symvnodeops
;
341 struct vnodeops
*zfs_xdvnodeops
;
342 struct vnodeops
*zfs_evnodeops
;
345 zfs_remove_op_tables()
352 (void) vfs_freevfsops_by_type(zfsfstype
);
359 vn_freevnodeops(zfs_dvnodeops
);
361 vn_freevnodeops(zfs_fvnodeops
);
363 vn_freevnodeops(zfs_symvnodeops
);
365 vn_freevnodeops(zfs_xdvnodeops
);
367 vn_freevnodeops(zfs_evnodeops
);
369 zfs_dvnodeops
= NULL
;
370 zfs_fvnodeops
= NULL
;
371 zfs_symvnodeops
= NULL
;
372 zfs_xdvnodeops
= NULL
;
373 zfs_evnodeops
= NULL
;
377 extern const fs_operation_def_t zfs_dvnodeops_template
[];
378 extern const fs_operation_def_t zfs_fvnodeops_template
[];
379 extern const fs_operation_def_t zfs_xdvnodeops_template
[];
380 extern const fs_operation_def_t zfs_symvnodeops_template
[];
381 extern const fs_operation_def_t zfs_evnodeops_template
[];
384 zfs_create_op_tables()
390 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
391 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
392 * In this case we just return as the ops vectors are already set up.
397 error
= vn_make_ops(MNTTYPE_ZFS
, zfs_dvnodeops_template
,
402 error
= vn_make_ops(MNTTYPE_ZFS
, zfs_fvnodeops_template
,
407 error
= vn_make_ops(MNTTYPE_ZFS
, zfs_symvnodeops_template
,
412 error
= vn_make_ops(MNTTYPE_ZFS
, zfs_xdvnodeops_template
,
417 error
= vn_make_ops(MNTTYPE_ZFS
, zfs_evnodeops_template
,
426 * zfs_init_fs - Initialize the zfsvfs struct and the file system
427 * incore "master" object. Verify version compatibility.
430 zfs_init_fs(zfsvfs_t
*zfsvfs
, znode_t
**zpp
)
432 extern int zfsfstype
;
434 objset_t
*os
= zfsvfs
->z_os
;
441 error
= zfs_get_zplprop(os
, ZFS_PROP_VERSION
, &zfsvfs
->z_version
);
444 } else if (zfsvfs
->z_version
> ZPL_VERSION
) {
445 (void) printf("Mismatched versions: File system "
446 "is version %llu on-disk format, which is "
447 "incompatible with this software version %lld!",
448 (u_longlong_t
)zfsvfs
->z_version
, ZPL_VERSION
);
452 if ((error
= zfs_get_zplprop(os
, ZFS_PROP_NORMALIZE
, &zval
)) != 0)
454 zfsvfs
->z_norm
= (int)zval
;
455 if ((error
= zfs_get_zplprop(os
, ZFS_PROP_UTF8ONLY
, &zval
)) != 0)
457 zfsvfs
->z_utf8
= (zval
!= 0);
458 if ((error
= zfs_get_zplprop(os
, ZFS_PROP_CASE
, &zval
)) != 0)
460 zfsvfs
->z_case
= (uint_t
)zval
;
462 * Fold case on file systems that are always or sometimes case
465 if (zfsvfs
->z_case
== ZFS_CASE_INSENSITIVE
||
466 zfsvfs
->z_case
== ZFS_CASE_MIXED
)
467 zfsvfs
->z_norm
|= U8_TEXTPREP_TOUPPER
;
470 * The fsid is 64 bits, composed of an 8-bit fs type, which
471 * separates our fsid from any other filesystem types, and a
472 * 56-bit objset unique ID. The objset unique ID is unique to
473 * all objsets open on this system, provided by unique_create().
474 * The 8-bit fs type must be put in the low bits of fsid[1]
475 * because that's where other Solaris filesystems put it.
477 fsid_guid
= dmu_objset_fsid_guid(os
);
478 ASSERT((fsid_guid
& ~((1ULL<<56)-1)) == 0);
479 zfsvfs
->z_vfs
->mnt_stat
.f_fsidx
.__fsid_val
[0] = fsid_guid
;
480 zfsvfs
->z_vfs
->mnt_stat
.f_fsidx
.__fsid_val
[1] = ((fsid_guid
>>32) << 8) |
482 zfsvfs
->z_vfs
->mnt_stat
.f_fsid
= fsid_guid
;
484 error
= zap_lookup(os
, MASTER_NODE_OBJ
, ZFS_ROOT_OBJ
, 8, 1,
488 ASSERT(zfsvfs
->z_root
!= 0);
490 error
= zap_lookup(os
, MASTER_NODE_OBJ
, ZFS_UNLINKED_SET
, 8, 1,
491 &zfsvfs
->z_unlinkedobj
);
496 * Initialize zget mutex's
498 for (i
= 0; i
!= ZFS_OBJ_MTX_SZ
; i
++)
499 mutex_init(&zfsvfs
->z_hold_mtx
[i
], NULL
, MUTEX_DEFAULT
, NULL
);
501 error
= zfs_zget(zfsvfs
, zfsvfs
->z_root
, zpp
);
504 * On error, we destroy the mutexes here since it's not
505 * possible for the caller to determine if the mutexes were
506 * initialized properly.
508 for (i
= 0; i
!= ZFS_OBJ_MTX_SZ
; i
++)
509 mutex_destroy(&zfsvfs
->z_hold_mtx
[i
]);
512 ASSERT3U((*zpp
)->z_id
, ==, zfsvfs
->z_root
);
513 error
= zap_lookup(os
, MASTER_NODE_OBJ
, ZFS_FUID_TABLES
, 8, 1,
514 &zfsvfs
->z_fuid_obj
);
522 * define a couple of values we need available
523 * for both 64 and 32 bit environments.
526 #define NBITSMINOR64 32
529 #define MAXMAJ64 0xffffffffUL
532 #define MAXMIN64 0xffffffffUL
536 * Create special expldev for ZFS private use.
537 * Can't use standard expldev since it doesn't do
538 * what we want. The standard expldev() takes a
539 * dev32_t in LP64 and expands it to a long dev_t.
540 * We need an interface that takes a dev32_t in ILP32
541 * and expands it to a long dev_t.
544 zfs_expldev(dev_t dev
)
546 return ((uint64_t)major(dev
) << NBITSMINOR64
) |
551 * Special cmpldev for ZFS private use.
552 * Can't use standard cmpldev since it takes
553 * a long dev_t and compresses it to dev32_t in
554 * LP64. We need to do a compaction of a long dev_t
555 * to a dev32_t in ILP32.
558 zfs_cmpldev(uint64_t dev
)
560 minor_t minor
= (minor_t
)dev
& MAXMIN64
;
561 major_t major
= (major_t
)(dev
>> NBITSMINOR64
) & MAXMAJ64
;
563 return makedev(minor
, major
);
567 zfs_znode_dmu_init(zfsvfs_t
*zfsvfs
, znode_t
*zp
, dmu_buf_t
*db
)
571 ASSERT(!POINTER_IS_VALID(zp
->z_zfsvfs
) || (zfsvfs
== zp
->z_zfsvfs
));
572 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs
, zp
->z_id
)));
574 mutex_enter(&zp
->z_lock
);
576 ASSERT(zp
->z_dbuf
== NULL
);
578 nzp
= dmu_buf_set_user_ie(db
, zp
, &zp
->z_phys
, znode_evict_error
);
582 * concurrent zgets on this object.
585 panic("existing znode %p for dbuf %p", (void *)nzp
, (void *)db
);
588 * Slap on VROOT if we are the root znode
590 if (zp
->z_id
== zfsvfs
->z_root
)
591 ZTOV(zp
)->v_flag
|= VROOT
;
593 mutex_exit(&zp
->z_lock
);
597 zfs_znode_dmu_fini(znode_t
*zp
)
599 dmu_buf_t
*db
= zp
->z_dbuf
;
600 ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp
->z_zfsvfs
, zp
->z_id
)) ||
602 RW_WRITE_HELD(&zp
->z_zfsvfs
->z_teardown_inactive_lock
));
603 ASSERT(zp
->z_dbuf
!= NULL
);
605 VERIFY(zp
== dmu_buf_update_user(db
, zp
, NULL
, NULL
, NULL
));
606 dmu_buf_rele(db
, NULL
);
610 * Construct a new znode/vnode and intialize.
612 * This does not do a call to dmu_set_user() that is
613 * up to the caller to do, in case you don't want to
618 zfs_znode_alloc(zfsvfs_t
*zfsvfs
, dmu_buf_t
*db
, int blksz
)
624 zp
= kmem_cache_alloc(znode_cache
, KM_SLEEP
);
627 error
= getnewvnode(VT_ZFS
, zfsvfs
->z_parent
->z_vfs
,
628 zfs_vnodeop_p
, &zp
->z_vnode
);
629 if (__predict_true(error
== 0))
631 printf("WARNING: zfs_znode_alloc: unable to get vnode, "
632 "error=%d\n", error
);
633 (void)kpause("zfsnewvn", false, hz
, NULL
);
636 ASSERT(zp
->z_dirlocks
== NULL
);
637 ASSERT(zp
->z_dbuf
== NULL
);
638 ASSERT(!POINTER_IS_VALID(zp
->z_zfsvfs
));
641 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
642 * the zfs_znode_move() callback.
646 zp
->z_atime_dirty
= 0;
649 zp
->z_id
= db
->db_object
;
651 zp
->z_seq
= 0x7A4653;
656 zfs_znode_dmu_init(zfsvfs
, zp
, db
);
658 zp
->z_gen
= zp
->z_phys
->zp_gen
;
660 vp
->v_vfsp
= zfsvfs
->z_parent
->z_vfs
;
661 vp
->v_type
= IFTOVT((mode_t
)zp
->z_phys
->zp_mode
);
663 switch (vp
->v_type
) {
665 zp
->z_zn_prefetch
= B_TRUE
; /* z_prefetch default is enabled */
669 /* XXX NetBSD vp->v_op = zfs_specop_p; */
670 spec_node_init(vp
, zfs_cmpldev(zp
->z_phys
->zp_rdev
));
673 /* XXX NetBSD vp->v_op = zfs_fifoop_p; */
677 dprintf("zfs_znode_alloc znode %p -- vnode %p\n", zp
, vp
);
678 dprintf("zfs_znode_alloc z_id %ld\n", zp
->z_id
);
681 uvm_vnp_setsize(vp
, zp
->z_phys
->zp_size
);
683 mutex_enter(&zfsvfs
->z_znodes_lock
);
684 list_insert_tail(&zfsvfs
->z_all_znodes
, zp
);
687 * Everything else must be valid before assigning z_zfsvfs makes the
688 * znode eligible for zfs_znode_move().
690 zp
->z_zfsvfs
= zfsvfs
;
691 mutex_exit(&zfsvfs
->z_znodes_lock
);
697 * Create a new DMU object to hold a zfs znode.
699 * IN: dzp - parent directory for new znode
700 * vap - file attributes for new znode
701 * tx - dmu transaction id for zap operations
702 * cr - credentials of caller
704 * IS_ROOT_NODE - new object will be root
705 * IS_XATTR - new object is an attribute
706 * IS_REPLAY - intent log replay
707 * bonuslen - length of bonus buffer
708 * setaclp - File/Dir initial ACL
709 * fuidp - Tracks fuid allocation.
711 * OUT: zpp - allocated znode
715 zfs_mknode(znode_t
*dzp
, vattr_t
*vap
, dmu_tx_t
*tx
, cred_t
*cr
,
716 uint_t flag
, znode_t
**zpp
, int bonuslen
, zfs_acl_t
*setaclp
,
717 zfs_fuid_info_t
**fuidp
)
721 zfsvfs_t
*zfsvfs
= dzp
->z_zfsvfs
;
726 ASSERT(vap
&& (vap
->va_mask
& (AT_TYPE
|AT_MODE
)) == (AT_TYPE
|AT_MODE
));
728 if (zfsvfs
->z_assign
>= TXG_INITIAL
) { /* ZIL replay */
729 obj
= vap
->va_nodeid
;
731 now
= vap
->va_ctime
; /* see zfs_replay_create() */
732 gen
= vap
->va_nblocks
; /* ditto */
736 gen
= dmu_tx_get_txg(tx
);
740 * Create a new DMU object.
743 * There's currently no mechanism for pre-reading the blocks that will
744 * be to needed allocate a new object, so we accept the small chance
745 * that there will be an i/o error and we will fail one of the
748 if (vap
->va_type
== VDIR
) {
749 if (flag
& IS_REPLAY
) {
750 err
= zap_create_claim_norm(zfsvfs
->z_os
, obj
,
751 zfsvfs
->z_norm
, DMU_OT_DIRECTORY_CONTENTS
,
752 DMU_OT_ZNODE
, sizeof (znode_phys_t
) + bonuslen
, tx
);
753 ASSERT3U(err
, ==, 0);
755 obj
= zap_create_norm(zfsvfs
->z_os
,
756 zfsvfs
->z_norm
, DMU_OT_DIRECTORY_CONTENTS
,
757 DMU_OT_ZNODE
, sizeof (znode_phys_t
) + bonuslen
, tx
);
760 if (flag
& IS_REPLAY
) {
761 err
= dmu_object_claim(zfsvfs
->z_os
, obj
,
762 DMU_OT_PLAIN_FILE_CONTENTS
, 0,
763 DMU_OT_ZNODE
, sizeof (znode_phys_t
) + bonuslen
, tx
);
764 ASSERT3U(err
, ==, 0);
766 obj
= dmu_object_alloc(zfsvfs
->z_os
,
767 DMU_OT_PLAIN_FILE_CONTENTS
, 0,
768 DMU_OT_ZNODE
, sizeof (znode_phys_t
) + bonuslen
, tx
);
771 VERIFY(0 == dmu_bonus_hold(zfsvfs
->z_os
, obj
, NULL
, &db
));
772 dmu_buf_will_dirty(db
, tx
);
775 * Initialize the znode physical data to zero.
777 ASSERT(db
->db_size
>= sizeof (znode_phys_t
));
778 bzero(db
->db_data
, db
->db_size
);
782 * If this is the root, fix up the half-initialized parent pointer
783 * to reference the just-allocated physical data area.
785 if (flag
& IS_ROOT_NODE
) {
792 * If parent is an xattr, so am I.
794 if (dzp
->z_phys
->zp_flags
& ZFS_XATTR
)
797 if (vap
->va_type
== VBLK
|| vap
->va_type
== VCHR
) {
798 pzp
->zp_rdev
= zfs_expldev(vap
->va_rdev
);
801 if (zfsvfs
->z_use_fuids
)
802 pzp
->zp_flags
= ZFS_ARCHIVE
| ZFS_AV_MODIFIED
;
804 if (vap
->va_type
== VDIR
) {
805 pzp
->zp_size
= 2; /* contents ("." and "..") */
806 pzp
->zp_links
= (flag
& (IS_ROOT_NODE
| IS_XATTR
)) ? 2 : 1;
809 pzp
->zp_parent
= dzp
->z_id
;
811 pzp
->zp_flags
|= ZFS_XATTR
;
815 ZFS_TIME_ENCODE(&now
, pzp
->zp_crtime
);
816 ZFS_TIME_ENCODE(&now
, pzp
->zp_ctime
);
818 if (vap
->va_mask
& AT_ATIME
) {
819 ZFS_TIME_ENCODE(&vap
->va_atime
, pzp
->zp_atime
);
821 ZFS_TIME_ENCODE(&now
, pzp
->zp_atime
);
824 if (vap
->va_mask
& AT_MTIME
) {
825 ZFS_TIME_ENCODE(&vap
->va_mtime
, pzp
->zp_mtime
);
827 ZFS_TIME_ENCODE(&now
, pzp
->zp_mtime
);
830 pzp
->zp_mode
= MAKEIMODE(vap
->va_type
, vap
->va_mode
);
831 if (!(flag
& IS_ROOT_NODE
)) {
832 dprintf("zfs_mknode parent vp %p - zp %p\n", ZTOV(dzp
), dzp
);
833 dprintf("Going to lock %p with %ld\n", ZFS_OBJ_MUTEX(zfsvfs
, obj
), obj
);
835 ZFS_OBJ_HOLD_ENTER(zfsvfs
, obj
);
836 *zpp
= zfs_znode_alloc(zfsvfs
, db
, 0);
838 genfs_node_init(ZTOV(*zpp
), &zfs_genfsops
);
840 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj
);
843 * If we are creating the root node, the "parent" we
844 * passed in is the znode for the root.
848 zfs_perm_init(*zpp
, dzp
, flag
, vap
, tx
, cr
, setaclp
, fuidp
);
852 zfs_xvattr_set(znode_t
*zp
, xvattr_t
*xvap
)
856 xoap
= xva_getxoptattr(xvap
);
859 if (XVA_ISSET_REQ(xvap
, XAT_CREATETIME
)) {
860 ZFS_TIME_ENCODE(&xoap
->xoa_createtime
, zp
->z_phys
->zp_crtime
);
861 XVA_SET_RTN(xvap
, XAT_CREATETIME
);
863 if (XVA_ISSET_REQ(xvap
, XAT_READONLY
)) {
864 ZFS_ATTR_SET(zp
, ZFS_READONLY
, xoap
->xoa_readonly
);
865 XVA_SET_RTN(xvap
, XAT_READONLY
);
867 if (XVA_ISSET_REQ(xvap
, XAT_HIDDEN
)) {
868 ZFS_ATTR_SET(zp
, ZFS_HIDDEN
, xoap
->xoa_hidden
);
869 XVA_SET_RTN(xvap
, XAT_HIDDEN
);
871 if (XVA_ISSET_REQ(xvap
, XAT_SYSTEM
)) {
872 ZFS_ATTR_SET(zp
, ZFS_SYSTEM
, xoap
->xoa_system
);
873 XVA_SET_RTN(xvap
, XAT_SYSTEM
);
875 if (XVA_ISSET_REQ(xvap
, XAT_ARCHIVE
)) {
876 ZFS_ATTR_SET(zp
, ZFS_ARCHIVE
, xoap
->xoa_archive
);
877 XVA_SET_RTN(xvap
, XAT_ARCHIVE
);
879 if (XVA_ISSET_REQ(xvap
, XAT_IMMUTABLE
)) {
880 ZFS_ATTR_SET(zp
, ZFS_IMMUTABLE
, xoap
->xoa_immutable
);
881 XVA_SET_RTN(xvap
, XAT_IMMUTABLE
);
883 if (XVA_ISSET_REQ(xvap
, XAT_NOUNLINK
)) {
884 ZFS_ATTR_SET(zp
, ZFS_NOUNLINK
, xoap
->xoa_nounlink
);
885 XVA_SET_RTN(xvap
, XAT_NOUNLINK
);
887 if (XVA_ISSET_REQ(xvap
, XAT_APPENDONLY
)) {
888 ZFS_ATTR_SET(zp
, ZFS_APPENDONLY
, xoap
->xoa_appendonly
);
889 XVA_SET_RTN(xvap
, XAT_APPENDONLY
);
891 if (XVA_ISSET_REQ(xvap
, XAT_NODUMP
)) {
892 ZFS_ATTR_SET(zp
, ZFS_NODUMP
, xoap
->xoa_nodump
);
893 XVA_SET_RTN(xvap
, XAT_NODUMP
);
895 if (XVA_ISSET_REQ(xvap
, XAT_OPAQUE
)) {
896 ZFS_ATTR_SET(zp
, ZFS_OPAQUE
, xoap
->xoa_opaque
);
897 XVA_SET_RTN(xvap
, XAT_OPAQUE
);
899 if (XVA_ISSET_REQ(xvap
, XAT_AV_QUARANTINED
)) {
900 ZFS_ATTR_SET(zp
, ZFS_AV_QUARANTINED
,
901 xoap
->xoa_av_quarantined
);
902 XVA_SET_RTN(xvap
, XAT_AV_QUARANTINED
);
904 if (XVA_ISSET_REQ(xvap
, XAT_AV_MODIFIED
)) {
905 ZFS_ATTR_SET(zp
, ZFS_AV_MODIFIED
, xoap
->xoa_av_modified
);
906 XVA_SET_RTN(xvap
, XAT_AV_MODIFIED
);
908 if (XVA_ISSET_REQ(xvap
, XAT_AV_SCANSTAMP
)) {
909 (void) memcpy(zp
->z_phys
+ 1, xoap
->xoa_av_scanstamp
,
910 sizeof (xoap
->xoa_av_scanstamp
));
911 zp
->z_phys
->zp_flags
|= ZFS_BONUS_SCANSTAMP
;
912 XVA_SET_RTN(xvap
, XAT_AV_SCANSTAMP
);
917 zfs_zget(zfsvfs_t
*zfsvfs
, uint64_t obj_num
, znode_t
**zpp
)
919 dmu_object_info_t doi
;
927 ZFS_OBJ_HOLD_ENTER(zfsvfs
, obj_num
);
929 err
= dmu_bonus_hold(zfsvfs
->z_os
, obj_num
, NULL
, &db
);
931 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
935 dmu_object_info_from_db(db
, &doi
);
936 if (doi
.doi_bonus_type
!= DMU_OT_ZNODE
||
937 doi
.doi_bonus_size
< sizeof (znode_phys_t
)) {
938 dmu_buf_rele(db
, NULL
);
939 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
943 zp
= dmu_buf_get_user(db
);
945 mutex_enter(&zp
->z_lock
);
948 * Since we do immediate eviction of the z_dbuf, we
949 * should never find a dbuf with a znode that doesn't
950 * know about the dbuf.
952 ASSERT3P(zp
->z_dbuf
, ==, db
);
953 ASSERT3U(zp
->z_id
, ==, obj_num
);
954 if (zp
->z_unlinked
) {
957 if ((vp
= ZTOV(zp
)) != NULL
) {
958 mutex_enter(&vp
->v_interlock
);
959 mutex_exit(&zp
->z_lock
);
960 if (vget(vp
, LK_INTERLOCK
) != 0) {
961 dmu_buf_rele(db
, NULL
);
962 mutex_exit(&vp
->v_interlock
);
965 mutex_enter(&zp
->z_lock
);
968 ZFS_LOG(1, "dying znode detected (zp=%p)", zp
);
972 * znode is dying so we can't reuse it, we must
973 * wait until destruction is completed.
975 dmu_buf_rele(db
, NULL
);
976 mutex_exit(&zp
->z_lock
);
977 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
978 kpause("zcollide", 0, 1, NULL
);
985 dmu_buf_rele(db
, NULL
);
986 mutex_exit(&zp
->z_lock
);
987 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
992 * Not found create new znode/vnode
994 zp
= zfs_znode_alloc(zfsvfs
, db
, doi
.doi_data_block_size
);
997 genfs_node_init(vp
, &zfs_genfsops
);
1001 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
1007 zfs_rezget(znode_t
*zp
)
1009 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1010 dmu_object_info_t doi
;
1012 uint64_t obj_num
= zp
->z_id
;
1015 ZFS_OBJ_HOLD_ENTER(zfsvfs
, obj_num
);
1017 err
= dmu_bonus_hold(zfsvfs
->z_os
, obj_num
, NULL
, &db
);
1019 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
1023 dmu_object_info_from_db(db
, &doi
);
1024 if (doi
.doi_bonus_type
!= DMU_OT_ZNODE
||
1025 doi
.doi_bonus_size
< sizeof (znode_phys_t
)) {
1026 dmu_buf_rele(db
, NULL
);
1027 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
1031 if (((znode_phys_t
*)db
->db_data
)->zp_gen
!= zp
->z_gen
) {
1032 dmu_buf_rele(db
, NULL
);
1033 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
1037 zfs_znode_dmu_init(zfsvfs
, zp
, db
);
1038 zp
->z_unlinked
= (zp
->z_phys
->zp_links
== 0);
1039 zp
->z_blksz
= doi
.doi_data_block_size
;
1041 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj_num
);
1047 zfs_znode_delete(znode_t
*zp
, dmu_tx_t
*tx
)
1049 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1050 objset_t
*os
= zfsvfs
->z_os
;
1051 uint64_t obj
= zp
->z_id
;
1052 uint64_t acl_obj
= zp
->z_phys
->zp_acl
.z_acl_extern_obj
;
1053 ZFS_OBJ_HOLD_ENTER(zfsvfs
, obj
);
1055 VERIFY(0 == dmu_object_free(os
, acl_obj
, tx
));
1056 VERIFY(0 == dmu_object_free(os
, obj
, tx
));
1057 zfs_znode_dmu_fini(zp
);
1058 ZFS_OBJ_HOLD_EXIT(zfsvfs
, obj
);
1063 * zfs_zinactive must be called with ZFS_OBJ_HOLD_ENTER held. And this lock
1064 * will be released in zfs_zinactive.
1067 zfs_zinactive(znode_t
*zp
)
1069 vnode_t
*vp
= ZTOV(zp
);
1070 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1072 ASSERT(zp
->z_dbuf
&& zp
->z_phys
);
1074 //printf("zfs_zinactive vp %p - zp %p\n", vp, zp);
1075 //printf("Going to lock %p with %ld\n", ZFS_OBJ_MUTEX(zfsvfs, z_id), z_id);
1077 mutex_enter(&zp
->z_lock
);
1079 * If this was the last reference to a file with no links,
1080 * remove the file from the file system.
1082 if (zp
->z_unlinked
) {
1083 mutex_exit(&zp
->z_lock
);
1084 ZFS_OBJ_HOLD_EXIT(zfsvfs
, zp
->z_id
);
1089 mutex_exit(&zp
->z_lock
);
1090 ZFS_OBJ_HOLD_EXIT(zfsvfs
, zp
->z_id
);
1095 zfs_znode_free(znode_t
*zp
)
1097 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1098 ASSERT(ZTOV(zp
) == NULL
);
1100 dprintf("destroying znode %p\n", zp
);
1102 mutex_enter(&zfsvfs
->z_znodes_lock
);
1103 POINTER_INVALIDATE(&zp
->z_zfsvfs
);
1104 list_remove(&zfsvfs
->z_all_znodes
, zp
);
1105 mutex_exit(&zfsvfs
->z_znodes_lock
);
1107 kmem_cache_free(znode_cache
, zp
);
1109 VFS_RELE(zfsvfs
->z_vfs
);
1113 zfs_time_stamper_locked(znode_t
*zp
, uint_t flag
, dmu_tx_t
*tx
)
1117 ASSERT(MUTEX_HELD(&zp
->z_lock
));
1122 dmu_buf_will_dirty(zp
->z_dbuf
, tx
);
1123 zp
->z_atime_dirty
= 0;
1126 zp
->z_atime_dirty
= 1;
1129 if (flag
& AT_ATIME
)
1130 ZFS_TIME_ENCODE(&now
, zp
->z_phys
->zp_atime
);
1132 if (flag
& AT_MTIME
) {
1133 ZFS_TIME_ENCODE(&now
, zp
->z_phys
->zp_mtime
);
1134 if (zp
->z_zfsvfs
->z_use_fuids
)
1135 zp
->z_phys
->zp_flags
|= (ZFS_ARCHIVE
| ZFS_AV_MODIFIED
);
1138 if (flag
& AT_CTIME
) {
1139 ZFS_TIME_ENCODE(&now
, zp
->z_phys
->zp_ctime
);
1140 if (zp
->z_zfsvfs
->z_use_fuids
)
1141 zp
->z_phys
->zp_flags
|= ZFS_ARCHIVE
;
1146 * Update the requested znode timestamps with the current time.
1147 * If we are in a transaction, then go ahead and mark the znode
1148 * dirty in the transaction so the timestamps will go to disk.
1149 * Otherwise, we will get pushed next time the znode is updated
1150 * in a transaction, or when this znode eventually goes inactive.
1153 * 1 - Only the ACCESS time is ever updated outside of a transaction.
1154 * 2 - Multiple consecutive updates will be collapsed into a single
1155 * znode update by the transaction grouping semantics of the DMU.
1158 zfs_time_stamper(znode_t
*zp
, uint_t flag
, dmu_tx_t
*tx
)
1160 mutex_enter(&zp
->z_lock
);
1161 zfs_time_stamper_locked(zp
, flag
, tx
);
1162 mutex_exit(&zp
->z_lock
);
1166 * Grow the block size for a file.
1168 * IN: zp - znode of file to free data in.
1169 * size - requested block size
1170 * tx - open transaction.
1172 * NOTE: this function assumes that the znode is write locked.
1175 zfs_grow_blocksize(znode_t
*zp
, uint64_t size
, dmu_tx_t
*tx
)
1180 if (size
<= zp
->z_blksz
)
1183 * If the file size is already greater than the current blocksize,
1184 * we will not grow. If there is more than one block in a file,
1185 * the blocksize cannot change.
1187 if (zp
->z_blksz
&& zp
->z_phys
->zp_size
> zp
->z_blksz
)
1190 error
= dmu_object_set_blocksize(zp
->z_zfsvfs
->z_os
, zp
->z_id
,
1192 if (error
== ENOTSUP
)
1194 ASSERT3U(error
, ==, 0);
1196 /* What blocksize did we actually get? */
1197 dmu_object_size_from_db(zp
->z_dbuf
, &zp
->z_blksz
, &dummy
);
1201 * Increase the file length
1203 * IN: zp - znode of file to free data in.
1204 * end - new end-of-file
1206 * RETURN: 0 if success
1207 * error code if failure
1210 zfs_extend(znode_t
*zp
, uint64_t end
)
1212 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1219 * We will change zp_size, lock the whole file.
1221 rl
= zfs_range_lock(zp
, 0, UINT64_MAX
, RL_WRITER
);
1224 * Nothing to do if file already at desired length.
1226 if (end
<= zp
->z_phys
->zp_size
) {
1227 zfs_range_unlock(rl
);
1231 tx
= dmu_tx_create(zfsvfs
->z_os
);
1232 dmu_tx_hold_bonus(tx
, zp
->z_id
);
1233 if (end
> zp
->z_blksz
&&
1234 (!ISP2(zp
->z_blksz
) || zp
->z_blksz
< zfsvfs
->z_max_blksz
)) {
1236 * We are growing the file past the current block size.
1238 if (zp
->z_blksz
> zp
->z_zfsvfs
->z_max_blksz
) {
1239 ASSERT(!ISP2(zp
->z_blksz
));
1240 newblksz
= MIN(end
, SPA_MAXBLOCKSIZE
);
1242 newblksz
= MIN(end
, zp
->z_zfsvfs
->z_max_blksz
);
1244 dmu_tx_hold_write(tx
, zp
->z_id
, 0, newblksz
);
1249 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1251 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
1257 zfs_range_unlock(rl
);
1260 dmu_buf_will_dirty(zp
->z_dbuf
, tx
);
1263 zfs_grow_blocksize(zp
, newblksz
, tx
);
1265 zp
->z_phys
->zp_size
= end
;
1267 zfs_range_unlock(rl
);
1271 rw_enter(&zp
->z_map_lock
, RW_WRITER
);
1272 uvm_vnp_setsize(ZTOV(zp
), end
);
1273 rw_exit(&zp
->z_map_lock
);
1279 * Free space in a file.
1281 * IN: zp - znode of file to free data in.
1282 * off - start of section to free.
1283 * len - length of section to free.
1285 * RETURN: 0 if success
1286 * error code if failure
1289 zfs_free_range(znode_t
*zp
, uint64_t off
, uint64_t len
)
1291 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1296 * Lock the range being freed.
1298 rl
= zfs_range_lock(zp
, off
, len
, RL_WRITER
);
1301 * Nothing to do if file already at desired length.
1303 if (off
>= zp
->z_phys
->zp_size
) {
1304 zfs_range_unlock(rl
);
1308 if (off
+ len
> zp
->z_phys
->zp_size
)
1309 len
= zp
->z_phys
->zp_size
- off
;
1311 error
= dmu_free_long_range(zfsvfs
->z_os
, zp
->z_id
, off
, len
);
1315 * In NetBSD we cannot free block in the middle of a file,
1316 * but only at the end of a file.
1318 rw_enter(&zp
->z_map_lock
, RW_WRITER
);
1319 uvm_vnp_setsize(ZTOV(zp
), off
);
1320 rw_exit(&zp
->z_map_lock
);
1323 zfs_range_unlock(rl
);
1331 * IN: zp - znode of file to free data in.
1332 * end - new end-of-file.
1334 * RETURN: 0 if success
1335 * error code if failure
1338 zfs_trunc(znode_t
*zp
, uint64_t end
)
1340 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1341 vnode_t
*vp
= ZTOV(zp
);
1347 * We will change zp_size, lock the whole file.
1349 rl
= zfs_range_lock(zp
, 0, UINT64_MAX
, RL_WRITER
);
1352 * Nothing to do if file already at desired length.
1354 if (end
>= zp
->z_phys
->zp_size
) {
1355 zfs_range_unlock(rl
);
1359 error
= dmu_free_long_range(zfsvfs
->z_os
, zp
->z_id
, end
, -1);
1361 zfs_range_unlock(rl
);
1365 tx
= dmu_tx_create(zfsvfs
->z_os
);
1366 dmu_tx_hold_bonus(tx
, zp
->z_id
);
1367 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1369 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
1375 zfs_range_unlock(rl
);
1378 dmu_buf_will_dirty(zp
->z_dbuf
, tx
);
1380 zp
->z_phys
->zp_size
= end
;
1384 zfs_range_unlock(rl
);
1387 * Clear any mapped pages in the truncated region. This has to
1388 * happen outside of the transaction to avoid the possibility of
1389 * a deadlock with someone trying to push a page that we are
1390 * about to invalidate.
1392 rw_enter(&zp
->z_map_lock
, RW_WRITER
);
1393 uvm_vnp_setsize(vp
, end
);
1394 rw_exit(&zp
->z_map_lock
);
1400 * Free space in a file
1402 * IN: zp - znode of file to free data in.
1403 * off - start of range
1404 * len - end of range (0 => EOF)
1405 * flag - current file open mode flags.
1406 * log - TRUE if this action should be logged
1408 * RETURN: 0 if success
1409 * error code if failure
1412 zfs_freesp(znode_t
*zp
, uint64_t off
, uint64_t len
, int flag
, boolean_t log
)
1414 vnode_t
*vp
= ZTOV(zp
);
1416 zfsvfs_t
*zfsvfs
= zp
->z_zfsvfs
;
1417 zilog_t
*zilog
= zfsvfs
->z_log
;
1420 if (off
> zp
->z_phys
->zp_size
) {
1421 error
= zfs_extend(zp
, off
+len
);
1422 if (error
== 0 && log
)
1429 error
= zfs_trunc(zp
, off
);
1431 if ((error
= zfs_free_range(zp
, off
, len
)) == 0 &&
1432 off
+ len
> zp
->z_phys
->zp_size
)
1433 error
= zfs_extend(zp
, off
+len
);
1438 tx
= dmu_tx_create(zfsvfs
->z_os
);
1439 dmu_tx_hold_bonus(tx
, zp
->z_id
);
1440 error
= dmu_tx_assign(tx
, zfsvfs
->z_assign
);
1442 if (error
== ERESTART
&& zfsvfs
->z_assign
== TXG_NOWAIT
) {
1451 zfs_time_stamper(zp
, CONTENT_MODIFIED
, tx
);
1452 zfs_log_truncate(zilog
, tx
, TX_TRUNCATE
, zp
, off
, len
);
1459 zfs_create_fs(objset_t
*os
, cred_t
*cr
, nvlist_t
*zplprops
, dmu_tx_t
*tx
)
1462 uint64_t moid
, doid
, version
;
1463 uint64_t sense
= ZFS_CASE_SENSITIVE
;
1467 znode_t
*rootzp
= NULL
;
1473 * First attempt to create master node.
1476 * In an empty objset, there are no blocks to read and thus
1477 * there can be no i/o errors (which we assert below).
1479 moid
= MASTER_NODE_OBJ
;
1480 error
= zap_create_claim(os
, moid
, DMU_OT_MASTER_NODE
,
1481 DMU_OT_NONE
, 0, tx
);
1485 * Set starting attributes.
1487 if (spa_version(dmu_objset_spa(os
)) >= SPA_VERSION_FUID
)
1488 version
= ZPL_VERSION
;
1490 version
= ZPL_VERSION_FUID
- 1;
1491 error
= zap_update(os
, moid
, ZPL_VERSION_STR
,
1492 8, 1, &version
, tx
);
1494 while ((elem
= nvlist_next_nvpair(zplprops
, elem
)) != NULL
) {
1495 /* For the moment we expect all zpl props to be uint64_ts */
1499 ASSERT(nvpair_type(elem
) == DATA_TYPE_UINT64
);
1500 VERIFY(nvpair_value_uint64(elem
, &val
) == 0);
1501 name
= nvpair_name(elem
);
1502 if (strcmp(name
, zfs_prop_to_name(ZFS_PROP_VERSION
)) == 0) {
1504 error
= zap_update(os
, moid
, ZPL_VERSION_STR
,
1505 8, 1, &version
, tx
);
1507 error
= zap_update(os
, moid
, name
, 8, 1, &val
, tx
);
1510 if (strcmp(name
, zfs_prop_to_name(ZFS_PROP_NORMALIZE
)) == 0)
1512 else if (strcmp(name
, zfs_prop_to_name(ZFS_PROP_CASE
)) == 0)
1515 ASSERT(version
!= 0);
1518 * Create a delete queue.
1520 doid
= zap_create(os
, DMU_OT_UNLINKED_SET
, DMU_OT_NONE
, 0, tx
);
1522 error
= zap_add(os
, moid
, ZFS_UNLINKED_SET
, 8, 1, &doid
, tx
);
1526 * Create root znode. Create minimal znode/vnode/zfsvfs
1527 * to allow zfs_mknode to work.
1530 vattr
.va_mask
= AT_MODE
|AT_UID
|AT_GID
|AT_TYPE
;
1531 vattr
.va_type
= VDIR
;
1532 vattr
.va_mode
= S_IFDIR
|0755;
1533 vattr
.va_uid
= crgetuid(cr
);
1534 vattr
.va_gid
= crgetgid(cr
);
1536 rootzp
= kmem_cache_alloc(znode_cache
, KM_SLEEP
);
1537 rootzp
->z_unlinked
= 0;
1538 rootzp
->z_atime_dirty
= 0;
1541 error
= getnewvnode(VT_ZFS
, NULL
, zfs_vnodeop_p
,
1545 printf("WARNING: zfs_create_fs: unable to get vnode, "
1546 "error=%d\n", error
);
1547 kpause("zfsvn", false, hz
, NULL
);
1553 bzero(&zfsvfs
, sizeof (zfsvfs_t
));
1556 zfsvfs
.z_assign
= TXG_NOWAIT
;
1557 zfsvfs
.z_parent
= &zfsvfs
;
1558 zfsvfs
.z_version
= version
;
1559 zfsvfs
.z_use_fuids
= USE_FUIDS(version
, os
);
1560 zfsvfs
.z_norm
= norm
;
1562 * Fold case on file systems that are always or sometimes case
1565 if (sense
== ZFS_CASE_INSENSITIVE
|| sense
== ZFS_CASE_MIXED
)
1566 zfsvfs
.z_norm
|= U8_TEXTPREP_TOUPPER
;
1568 mutex_init(&zfsvfs
.z_znodes_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1569 list_create(&zfsvfs
.z_all_znodes
, sizeof (znode_t
),
1570 offsetof(znode_t
, z_link_node
));
1572 ASSERT(!POINTER_IS_VALID(rootzp
->z_zfsvfs
));
1573 rootzp
->z_zfsvfs
= &zfsvfs
;
1574 zfs_mknode(rootzp
, &vattr
, tx
, cr
, IS_ROOT_NODE
, &zp
, 0, NULL
, NULL
);
1575 ASSERT3P(zp
, ==, rootzp
);
1576 error
= zap_add(os
, moid
, ZFS_ROOT_OBJ
, 8, 1, &rootzp
->z_id
, tx
);
1578 POINTER_INVALIDATE(&rootzp
->z_zfsvfs
);
1580 dmu_buf_rele(rootzp
->z_dbuf
, NULL
);
1581 rootzp
->z_dbuf
= NULL
;
1583 kmem_cache_free(znode_cache
, rootzp
);
1586 #endif /* _KERNEL */
1588 * Given an object number, return its parent object number and whether
1589 * or not the object is an extended attribute directory.
1592 zfs_obj_to_pobj(objset_t
*osp
, uint64_t obj
, uint64_t *pobjp
, int *is_xattrdir
)
1595 dmu_object_info_t doi
;
1599 if ((error
= dmu_bonus_hold(osp
, obj
, FTAG
, &db
)) != 0)
1602 dmu_object_info_from_db(db
, &doi
);
1603 if (doi
.doi_bonus_type
!= DMU_OT_ZNODE
||
1604 doi
.doi_bonus_size
< sizeof (znode_phys_t
)) {
1605 dmu_buf_rele(db
, FTAG
);
1610 *pobjp
= zp
->zp_parent
;
1611 *is_xattrdir
= ((zp
->zp_flags
& ZFS_XATTR
) != 0) &&
1612 S_ISDIR(zp
->zp_mode
);
1613 dmu_buf_rele(db
, FTAG
);
1619 zfs_obj_to_path(objset_t
*osp
, uint64_t obj
, char *buf
, int len
)
1621 char *path
= buf
+ len
- 1;
1628 char component
[MAXNAMELEN
+ 2];
1632 if ((error
= zfs_obj_to_pobj(osp
, obj
, &pobj
,
1633 &is_xattrdir
)) != 0)
1644 (void) sprintf(component
+ 1, "<xattrdir>");
1646 error
= zap_value_search(osp
, pobj
, obj
,
1647 ZFS_DIRENT_OBJ(-1ULL), component
+ 1);
1652 complen
= strlen(component
);
1654 ASSERT(path
>= buf
);
1655 bcopy(component
, path
, complen
);
1660 (void) memmove(buf
, path
, buf
+ len
- path
);