4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
26 * Copyright 2017 Nexenta Systems, Inc.
32 #include <sys/zfs_context.h>
34 #include <sys/zap_impl.h>
35 #include <sys/zap_leaf.h>
38 #include <sys/dmu_objset.h>
41 #include <sys/sunddi.h>
44 static int mzap_upgrade(zap_t
**zapp
,
45 void *tag
, dmu_tx_t
*tx
, zap_flags_t flags
);
48 zap_getflags(zap_t
*zap
)
52 return (zap_f_phys(zap
)->zap_flags
);
56 zap_hashbits(zap_t
*zap
)
58 if (zap_getflags(zap
) & ZAP_FLAG_HASH64
)
67 if (zap_getflags(zap
) & ZAP_FLAG_HASH64
)
74 zap_hash(zap_name_t
*zn
)
76 zap_t
*zap
= zn
->zn_zap
;
79 if (zap_getflags(zap
) & ZAP_FLAG_PRE_HASHED_KEY
) {
80 ASSERT(zap_getflags(zap
) & ZAP_FLAG_UINT64_KEY
);
81 h
= *(uint64_t *)zn
->zn_key_orig
;
85 ASSERT(zfs_crc64_table
[128] == ZFS_CRC64_POLY
);
87 if (zap_getflags(zap
) & ZAP_FLAG_UINT64_KEY
) {
88 const uint64_t *wp
= zn
->zn_key_norm
;
90 ASSERT(zn
->zn_key_intlen
== 8);
91 for (int i
= 0; i
< zn
->zn_key_norm_numints
;
95 for (int j
= 0; j
< zn
->zn_key_intlen
; j
++) {
97 zfs_crc64_table
[(h
^ word
) & 0xFF];
102 const uint8_t *cp
= zn
->zn_key_norm
;
105 * We previously stored the terminating null on
106 * disk, but didn't hash it, so we need to
107 * continue to not hash it. (The
108 * zn_key_*_numints includes the terminating
109 * null for non-binary keys.)
111 int len
= zn
->zn_key_norm_numints
- 1;
113 ASSERT(zn
->zn_key_intlen
== 1);
114 for (int i
= 0; i
< len
; cp
++, i
++) {
116 zfs_crc64_table
[(h
^ *cp
) & 0xFF];
121 * Don't use all 64 bits, since we need some in the cookie for
122 * the collision differentiator. We MUST use the high bits,
123 * since those are the ones that we first pay attention to when
124 * choosing the bucket.
126 h
&= ~((1ULL << (64 - zap_hashbits(zap
))) - 1);
132 zap_normalize(zap_t
*zap
, const char *name
, char *namenorm
, int normflags
)
134 ASSERT(!(zap_getflags(zap
) & ZAP_FLAG_UINT64_KEY
));
136 size_t inlen
= strlen(name
) + 1;
137 size_t outlen
= ZAP_MAXNAMELEN
;
140 (void) u8_textprep_str((char *)name
, &inlen
, namenorm
, &outlen
,
141 normflags
| U8_TEXTPREP_IGNORE_NULL
| U8_TEXTPREP_IGNORE_INVALID
,
142 U8_UNICODE_LATEST
, &err
);
148 zap_match(zap_name_t
*zn
, const char *matchname
)
150 ASSERT(!(zap_getflags(zn
->zn_zap
) & ZAP_FLAG_UINT64_KEY
));
152 if (zn
->zn_matchtype
& MT_NORMALIZE
) {
153 char norm
[ZAP_MAXNAMELEN
];
155 if (zap_normalize(zn
->zn_zap
, matchname
, norm
,
156 zn
->zn_normflags
) != 0)
159 return (strcmp(zn
->zn_key_norm
, norm
) == 0);
161 return (strcmp(zn
->zn_key_orig
, matchname
) == 0);
166 zap_name_free(zap_name_t
*zn
)
168 kmem_free(zn
, sizeof (zap_name_t
));
172 zap_name_alloc(zap_t
*zap
, const char *key
, matchtype_t mt
)
174 zap_name_t
*zn
= kmem_alloc(sizeof (zap_name_t
), KM_SLEEP
);
177 zn
->zn_key_intlen
= sizeof (*key
);
178 zn
->zn_key_orig
= key
;
179 zn
->zn_key_orig_numints
= strlen(zn
->zn_key_orig
) + 1;
180 zn
->zn_matchtype
= mt
;
181 zn
->zn_normflags
= zap
->zap_normflags
;
184 * If we're dealing with a case sensitive lookup on a mixed or
185 * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
186 * will fold case to all caps overriding the lookup request.
188 if (mt
& MT_MATCH_CASE
)
189 zn
->zn_normflags
&= ~U8_TEXTPREP_TOUPPER
;
191 if (zap
->zap_normflags
) {
193 * We *must* use zap_normflags because this normalization is
194 * what the hash is computed from.
196 if (zap_normalize(zap
, key
, zn
->zn_normbuf
,
197 zap
->zap_normflags
) != 0) {
201 zn
->zn_key_norm
= zn
->zn_normbuf
;
202 zn
->zn_key_norm_numints
= strlen(zn
->zn_key_norm
) + 1;
208 zn
->zn_key_norm
= zn
->zn_key_orig
;
209 zn
->zn_key_norm_numints
= zn
->zn_key_orig_numints
;
212 zn
->zn_hash
= zap_hash(zn
);
214 if (zap
->zap_normflags
!= zn
->zn_normflags
) {
216 * We *must* use zn_normflags because this normalization is
217 * what the matching is based on. (Not the hash!)
219 if (zap_normalize(zap
, key
, zn
->zn_normbuf
,
220 zn
->zn_normflags
) != 0) {
224 zn
->zn_key_norm_numints
= strlen(zn
->zn_key_norm
) + 1;
231 zap_name_alloc_uint64(zap_t
*zap
, const uint64_t *key
, int numints
)
233 zap_name_t
*zn
= kmem_alloc(sizeof (zap_name_t
), KM_SLEEP
);
235 ASSERT(zap
->zap_normflags
== 0);
237 zn
->zn_key_intlen
= sizeof (*key
);
238 zn
->zn_key_orig
= zn
->zn_key_norm
= key
;
239 zn
->zn_key_orig_numints
= zn
->zn_key_norm_numints
= numints
;
240 zn
->zn_matchtype
= 0;
242 zn
->zn_hash
= zap_hash(zn
);
247 mzap_byteswap(mzap_phys_t
*buf
, size_t size
)
249 buf
->mz_block_type
= BSWAP_64(buf
->mz_block_type
);
250 buf
->mz_salt
= BSWAP_64(buf
->mz_salt
);
251 buf
->mz_normflags
= BSWAP_64(buf
->mz_normflags
);
252 int max
= (size
/ MZAP_ENT_LEN
) - 1;
253 for (int i
= 0; i
< max
; i
++) {
254 buf
->mz_chunk
[i
].mze_value
=
255 BSWAP_64(buf
->mz_chunk
[i
].mze_value
);
256 buf
->mz_chunk
[i
].mze_cd
=
257 BSWAP_32(buf
->mz_chunk
[i
].mze_cd
);
262 zap_byteswap(void *buf
, size_t size
)
264 uint64_t block_type
= *(uint64_t *)buf
;
266 if (block_type
== ZBT_MICRO
|| block_type
== BSWAP_64(ZBT_MICRO
)) {
267 /* ASSERT(magic == ZAP_LEAF_MAGIC); */
268 mzap_byteswap(buf
, size
);
270 fzap_byteswap(buf
, size
);
275 mze_compare(const void *arg1
, const void *arg2
)
277 const mzap_ent_t
*mze1
= arg1
;
278 const mzap_ent_t
*mze2
= arg2
;
280 int cmp
= TREE_CMP(mze1
->mze_hash
, mze2
->mze_hash
);
284 return (TREE_CMP(mze1
->mze_cd
, mze2
->mze_cd
));
288 mze_insert(zap_t
*zap
, int chunkid
, uint64_t hash
)
290 ASSERT(zap
->zap_ismicro
);
291 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
293 mzap_ent_t
*mze
= kmem_alloc(sizeof (mzap_ent_t
), KM_SLEEP
);
294 mze
->mze_chunkid
= chunkid
;
295 mze
->mze_hash
= hash
;
296 mze
->mze_cd
= MZE_PHYS(zap
, mze
)->mze_cd
;
297 ASSERT(MZE_PHYS(zap
, mze
)->mze_name
[0] != 0);
298 avl_add(&zap
->zap_m
.zap_avl
, mze
);
302 mze_find(zap_name_t
*zn
)
304 mzap_ent_t mze_tofind
;
307 avl_tree_t
*avl
= &zn
->zn_zap
->zap_m
.zap_avl
;
309 ASSERT(zn
->zn_zap
->zap_ismicro
);
310 ASSERT(RW_LOCK_HELD(&zn
->zn_zap
->zap_rwlock
));
312 mze_tofind
.mze_hash
= zn
->zn_hash
;
313 mze_tofind
.mze_cd
= 0;
315 mze
= avl_find(avl
, &mze_tofind
, &idx
);
317 mze
= avl_nearest(avl
, idx
, AVL_AFTER
);
318 for (; mze
&& mze
->mze_hash
== zn
->zn_hash
; mze
= AVL_NEXT(avl
, mze
)) {
319 ASSERT3U(mze
->mze_cd
, ==, MZE_PHYS(zn
->zn_zap
, mze
)->mze_cd
);
320 if (zap_match(zn
, MZE_PHYS(zn
->zn_zap
, mze
)->mze_name
))
328 mze_find_unused_cd(zap_t
*zap
, uint64_t hash
)
330 mzap_ent_t mze_tofind
;
332 avl_tree_t
*avl
= &zap
->zap_m
.zap_avl
;
334 ASSERT(zap
->zap_ismicro
);
335 ASSERT(RW_LOCK_HELD(&zap
->zap_rwlock
));
337 mze_tofind
.mze_hash
= hash
;
338 mze_tofind
.mze_cd
= 0;
341 for (mzap_ent_t
*mze
= avl_find(avl
, &mze_tofind
, &idx
);
342 mze
&& mze
->mze_hash
== hash
; mze
= AVL_NEXT(avl
, mze
)) {
343 if (mze
->mze_cd
!= cd
)
352 * Each mzap entry requires at max : 4 chunks
353 * 3 chunks for names + 1 chunk for value.
355 #define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
356 ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
359 * Check if the current entry keeps the colliding entries under the fatzap leaf
363 mze_canfit_fzap_leaf(zap_name_t
*zn
, uint64_t hash
)
365 zap_t
*zap
= zn
->zn_zap
;
366 mzap_ent_t mze_tofind
;
369 avl_tree_t
*avl
= &zap
->zap_m
.zap_avl
;
370 uint32_t mzap_ents
= 0;
372 mze_tofind
.mze_hash
= hash
;
373 mze_tofind
.mze_cd
= 0;
375 for (mze
= avl_find(avl
, &mze_tofind
, &idx
);
376 mze
&& mze
->mze_hash
== hash
; mze
= AVL_NEXT(avl
, mze
)) {
380 /* Include the new entry being added */
383 return (ZAP_LEAF_NUMCHUNKS_DEF
> (mzap_ents
* MZAP_ENT_CHUNKS
));
387 mze_remove(zap_t
*zap
, mzap_ent_t
*mze
)
389 ASSERT(zap
->zap_ismicro
);
390 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
392 avl_remove(&zap
->zap_m
.zap_avl
, mze
);
393 kmem_free(mze
, sizeof (mzap_ent_t
));
397 mze_destroy(zap_t
*zap
)
400 void *avlcookie
= NULL
;
402 while ((mze
= avl_destroy_nodes(&zap
->zap_m
.zap_avl
, &avlcookie
)))
403 kmem_free(mze
, sizeof (mzap_ent_t
));
404 avl_destroy(&zap
->zap_m
.zap_avl
);
408 mzap_open(objset_t
*os
, uint64_t obj
, dmu_buf_t
*db
)
411 uint64_t *zap_hdr
= (uint64_t *)db
->db_data
;
412 uint64_t zap_block_type
= zap_hdr
[0];
413 uint64_t zap_magic
= zap_hdr
[1];
415 ASSERT3U(MZAP_ENT_LEN
, ==, sizeof (mzap_ent_phys_t
));
417 zap_t
*zap
= kmem_zalloc(sizeof (zap_t
), KM_SLEEP
);
418 rw_init(&zap
->zap_rwlock
, NULL
, RW_DEFAULT
, NULL
);
419 rw_enter(&zap
->zap_rwlock
, RW_WRITER
);
420 zap
->zap_objset
= os
;
421 zap
->zap_object
= obj
;
424 if (zap_block_type
!= ZBT_MICRO
) {
425 mutex_init(&zap
->zap_f
.zap_num_entries_mtx
, 0, MUTEX_DEFAULT
,
427 zap
->zap_f
.zap_block_shift
= highbit64(db
->db_size
) - 1;
428 if (zap_block_type
!= ZBT_HEADER
|| zap_magic
!= ZAP_MAGIC
) {
429 winner
= NULL
; /* No actual winner here... */
433 zap
->zap_ismicro
= TRUE
;
437 * Make sure that zap_ismicro is set before we let others see
438 * it, because zap_lockdir() checks zap_ismicro without the lock
441 dmu_buf_init_user(&zap
->zap_dbu
, zap_evict_sync
, NULL
, &zap
->zap_dbuf
);
442 winner
= dmu_buf_set_user(db
, &zap
->zap_dbu
);
447 if (zap
->zap_ismicro
) {
448 zap
->zap_salt
= zap_m_phys(zap
)->mz_salt
;
449 zap
->zap_normflags
= zap_m_phys(zap
)->mz_normflags
;
450 zap
->zap_m
.zap_num_chunks
= db
->db_size
/ MZAP_ENT_LEN
- 1;
451 avl_create(&zap
->zap_m
.zap_avl
, mze_compare
,
452 sizeof (mzap_ent_t
), offsetof(mzap_ent_t
, mze_node
));
454 for (int i
= 0; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
455 mzap_ent_phys_t
*mze
=
456 &zap_m_phys(zap
)->mz_chunk
[i
];
457 if (mze
->mze_name
[0]) {
460 zap
->zap_m
.zap_num_entries
++;
461 zn
= zap_name_alloc(zap
, mze
->mze_name
, 0);
462 mze_insert(zap
, i
, zn
->zn_hash
);
467 zap
->zap_salt
= zap_f_phys(zap
)->zap_salt
;
468 zap
->zap_normflags
= zap_f_phys(zap
)->zap_normflags
;
470 ASSERT3U(sizeof (struct zap_leaf_header
), ==,
471 2*ZAP_LEAF_CHUNKSIZE
);
474 * The embedded pointer table should not overlap the
477 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap
, 0), >,
478 &zap_f_phys(zap
)->zap_salt
);
481 * The embedded pointer table should end at the end of
484 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap
,
485 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap
)) -
486 (uintptr_t)zap_f_phys(zap
), ==,
487 zap
->zap_dbuf
->db_size
);
489 rw_exit(&zap
->zap_rwlock
);
493 rw_exit(&zap
->zap_rwlock
);
494 rw_destroy(&zap
->zap_rwlock
);
495 if (!zap
->zap_ismicro
)
496 mutex_destroy(&zap
->zap_f
.zap_num_entries_mtx
);
497 kmem_free(zap
, sizeof (zap_t
));
502 * This routine "consumes" the caller's hold on the dbuf, which must
503 * have the specified tag.
506 zap_lockdir_impl(dmu_buf_t
*db
, void *tag
, dmu_tx_t
*tx
,
507 krw_t lti
, boolean_t fatreader
, boolean_t adding
, zap_t
**zapp
)
509 ASSERT0(db
->db_offset
);
510 objset_t
*os
= dmu_buf_get_objset(db
);
511 uint64_t obj
= db
->db_object
;
512 dmu_object_info_t doi
;
516 dmu_object_info_from_db(db
, &doi
);
517 if (DMU_OT_BYTESWAP(doi
.doi_type
) != DMU_BSWAP_ZAP
)
518 return (SET_ERROR(EINVAL
));
520 zap_t
*zap
= dmu_buf_get_user(db
);
522 zap
= mzap_open(os
, obj
, db
);
525 * mzap_open() didn't like what it saw on-disk.
526 * Check for corruption!
528 return (SET_ERROR(EIO
));
533 * We're checking zap_ismicro without the lock held, in order to
534 * tell what type of lock we want. Once we have some sort of
535 * lock, see if it really is the right type. In practice this
536 * can only be different if it was upgraded from micro to fat,
537 * and micro wanted WRITER but fat only needs READER.
539 krw_t lt
= (!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
;
540 rw_enter(&zap
->zap_rwlock
, lt
);
541 if (lt
!= ((!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
)) {
542 /* it was upgraded, now we only need reader */
543 ASSERT(lt
== RW_WRITER
);
545 ((!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
));
546 rw_downgrade(&zap
->zap_rwlock
);
550 zap
->zap_objset
= os
;
553 dmu_buf_will_dirty(db
, tx
);
555 ASSERT3P(zap
->zap_dbuf
, ==, db
);
557 ASSERT(!zap
->zap_ismicro
||
558 zap
->zap_m
.zap_num_entries
<= zap
->zap_m
.zap_num_chunks
);
559 if (zap
->zap_ismicro
&& tx
&& adding
&&
560 zap
->zap_m
.zap_num_entries
== zap
->zap_m
.zap_num_chunks
) {
561 uint64_t newsz
= db
->db_size
+ SPA_MINBLOCKSIZE
;
562 if (newsz
> MZAP_MAX_BLKSZ
) {
563 dprintf("upgrading obj %llu: num_entries=%u\n",
564 (u_longlong_t
)obj
, zap
->zap_m
.zap_num_entries
);
566 int err
= mzap_upgrade(zapp
, tag
, tx
, 0);
568 rw_exit(&zap
->zap_rwlock
);
571 VERIFY0(dmu_object_set_blocksize(os
, obj
, newsz
, 0, tx
));
572 zap
->zap_m
.zap_num_chunks
=
573 db
->db_size
/ MZAP_ENT_LEN
- 1;
581 zap_lockdir_by_dnode(dnode_t
*dn
, dmu_tx_t
*tx
,
582 krw_t lti
, boolean_t fatreader
, boolean_t adding
, void *tag
, zap_t
**zapp
)
586 int err
= dmu_buf_hold_by_dnode(dn
, 0, tag
, &db
, DMU_READ_NO_PREFETCH
);
592 dmu_object_info_t doi
;
593 dmu_object_info_from_db(db
, &doi
);
594 ASSERT3U(DMU_OT_BYTESWAP(doi
.doi_type
), ==, DMU_BSWAP_ZAP
);
598 err
= zap_lockdir_impl(db
, tag
, tx
, lti
, fatreader
, adding
, zapp
);
600 dmu_buf_rele(db
, tag
);
606 zap_lockdir(objset_t
*os
, uint64_t obj
, dmu_tx_t
*tx
,
607 krw_t lti
, boolean_t fatreader
, boolean_t adding
, void *tag
, zap_t
**zapp
)
611 int err
= dmu_buf_hold(os
, obj
, 0, tag
, &db
, DMU_READ_NO_PREFETCH
);
616 dmu_object_info_t doi
;
617 dmu_object_info_from_db(db
, &doi
);
618 ASSERT3U(DMU_OT_BYTESWAP(doi
.doi_type
), ==, DMU_BSWAP_ZAP
);
621 err
= zap_lockdir_impl(db
, tag
, tx
, lti
, fatreader
, adding
, zapp
);
623 dmu_buf_rele(db
, tag
);
628 zap_unlockdir(zap_t
*zap
, void *tag
)
630 rw_exit(&zap
->zap_rwlock
);
631 dmu_buf_rele(zap
->zap_dbuf
, tag
);
635 mzap_upgrade(zap_t
**zapp
, void *tag
, dmu_tx_t
*tx
, zap_flags_t flags
)
640 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
642 int sz
= zap
->zap_dbuf
->db_size
;
643 mzap_phys_t
*mzp
= vmem_alloc(sz
, KM_SLEEP
);
644 memcpy(mzp
, zap
->zap_dbuf
->db_data
, sz
);
645 int nchunks
= zap
->zap_m
.zap_num_chunks
;
648 err
= dmu_object_set_blocksize(zap
->zap_objset
, zap
->zap_object
,
649 1ULL << fzap_default_block_shift
, 0, tx
);
656 dprintf("upgrading obj=%llu with %u chunks\n",
657 (u_longlong_t
)zap
->zap_object
, nchunks
);
658 /* XXX destroy the avl later, so we can use the stored hash value */
661 fzap_upgrade(zap
, tx
, flags
);
663 for (int i
= 0; i
< nchunks
; i
++) {
664 mzap_ent_phys_t
*mze
= &mzp
->mz_chunk
[i
];
665 if (mze
->mze_name
[0] == 0)
667 dprintf("adding %s=%llu\n",
668 mze
->mze_name
, (u_longlong_t
)mze
->mze_value
);
669 zap_name_t
*zn
= zap_name_alloc(zap
, mze
->mze_name
, 0);
670 /* If we fail here, we would end up losing entries */
671 VERIFY0(fzap_add_cd(zn
, 8, 1, &mze
->mze_value
, mze
->mze_cd
,
673 zap
= zn
->zn_zap
; /* fzap_add_cd() may change zap */
682 * The "normflags" determine the behavior of the matchtype_t which is
683 * passed to zap_lookup_norm(). Names which have the same normalized
684 * version will be stored with the same hash value, and therefore we can
685 * perform normalization-insensitive lookups. We can be Unicode form-
686 * insensitive and/or case-insensitive. The following flags are valid for
693 * U8_TEXTPREP_TOUPPER
695 * The *_NF* (Normalization Form) flags are mutually exclusive; at most one
696 * of them may be supplied.
699 mzap_create_impl(dnode_t
*dn
, int normflags
, zap_flags_t flags
, dmu_tx_t
*tx
)
703 VERIFY0(dmu_buf_hold_by_dnode(dn
, 0, FTAG
, &db
, DMU_READ_NO_PREFETCH
));
705 dmu_buf_will_dirty(db
, tx
);
706 mzap_phys_t
*zp
= db
->db_data
;
707 zp
->mz_block_type
= ZBT_MICRO
;
709 ((uintptr_t)db
^ (uintptr_t)tx
^ (dn
->dn_object
<< 1)) | 1ULL;
710 zp
->mz_normflags
= normflags
;
714 /* Only fat zap supports flags; upgrade immediately. */
715 VERIFY0(zap_lockdir_impl(db
, FTAG
, tx
, RW_WRITER
,
716 B_FALSE
, B_FALSE
, &zap
));
717 VERIFY0(mzap_upgrade(&zap
, FTAG
, tx
, flags
));
718 zap_unlockdir(zap
, FTAG
);
720 dmu_buf_rele(db
, FTAG
);
725 zap_create_impl(objset_t
*os
, int normflags
, zap_flags_t flags
,
726 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
727 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
,
728 dnode_t
**allocated_dnode
, void *tag
, dmu_tx_t
*tx
)
732 ASSERT3U(DMU_OT_BYTESWAP(ot
), ==, DMU_BSWAP_ZAP
);
734 if (allocated_dnode
== NULL
) {
736 obj
= dmu_object_alloc_hold(os
, ot
, 1ULL << leaf_blockshift
,
737 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
,
739 mzap_create_impl(dn
, normflags
, flags
, tx
);
740 dnode_rele(dn
, FTAG
);
742 obj
= dmu_object_alloc_hold(os
, ot
, 1ULL << leaf_blockshift
,
743 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
,
744 allocated_dnode
, tag
, tx
);
745 mzap_create_impl(*allocated_dnode
, normflags
, flags
, tx
);
752 zap_create_claim(objset_t
*os
, uint64_t obj
, dmu_object_type_t ot
,
753 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
755 return (zap_create_claim_dnsize(os
, obj
, ot
, bonustype
, bonuslen
,
760 zap_create_claim_dnsize(objset_t
*os
, uint64_t obj
, dmu_object_type_t ot
,
761 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
763 return (zap_create_claim_norm_dnsize(os
, obj
,
764 0, ot
, bonustype
, bonuslen
, dnodesize
, tx
));
768 zap_create_claim_norm(objset_t
*os
, uint64_t obj
, int normflags
,
769 dmu_object_type_t ot
,
770 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
772 return (zap_create_claim_norm_dnsize(os
, obj
, normflags
, ot
, bonustype
,
777 zap_create_claim_norm_dnsize(objset_t
*os
, uint64_t obj
, int normflags
,
778 dmu_object_type_t ot
, dmu_object_type_t bonustype
, int bonuslen
,
779 int dnodesize
, dmu_tx_t
*tx
)
784 ASSERT3U(DMU_OT_BYTESWAP(ot
), ==, DMU_BSWAP_ZAP
);
785 error
= dmu_object_claim_dnsize(os
, obj
, ot
, 0, bonustype
, bonuslen
,
790 error
= dnode_hold(os
, obj
, FTAG
, &dn
);
794 mzap_create_impl(dn
, normflags
, 0, tx
);
796 dnode_rele(dn
, FTAG
);
802 zap_create(objset_t
*os
, dmu_object_type_t ot
,
803 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
805 return (zap_create_norm(os
, 0, ot
, bonustype
, bonuslen
, tx
));
809 zap_create_dnsize(objset_t
*os
, dmu_object_type_t ot
,
810 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
812 return (zap_create_norm_dnsize(os
, 0, ot
, bonustype
, bonuslen
,
817 zap_create_norm(objset_t
*os
, int normflags
, dmu_object_type_t ot
,
818 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
820 return (zap_create_norm_dnsize(os
, normflags
, ot
, bonustype
, bonuslen
,
825 zap_create_norm_dnsize(objset_t
*os
, int normflags
, dmu_object_type_t ot
,
826 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
828 return (zap_create_impl(os
, normflags
, 0, ot
, 0, 0,
829 bonustype
, bonuslen
, dnodesize
, NULL
, NULL
, tx
));
833 zap_create_flags(objset_t
*os
, int normflags
, zap_flags_t flags
,
834 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
835 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
837 return (zap_create_flags_dnsize(os
, normflags
, flags
, ot
,
838 leaf_blockshift
, indirect_blockshift
, bonustype
, bonuslen
, 0, tx
));
842 zap_create_flags_dnsize(objset_t
*os
, int normflags
, zap_flags_t flags
,
843 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
844 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
846 return (zap_create_impl(os
, normflags
, flags
, ot
, leaf_blockshift
,
847 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
, NULL
, NULL
,
852 * Create a zap object and return a pointer to the newly allocated dnode via
853 * the allocated_dnode argument. The returned dnode will be held and the
854 * caller is responsible for releasing the hold by calling dnode_rele().
857 zap_create_hold(objset_t
*os
, int normflags
, zap_flags_t flags
,
858 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
859 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
,
860 dnode_t
**allocated_dnode
, void *tag
, dmu_tx_t
*tx
)
862 return (zap_create_impl(os
, normflags
, flags
, ot
, leaf_blockshift
,
863 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
,
864 allocated_dnode
, tag
, tx
));
868 zap_destroy(objset_t
*os
, uint64_t zapobj
, dmu_tx_t
*tx
)
871 * dmu_object_free will free the object number and free the
872 * data. Freeing the data will cause our pageout function to be
873 * called, which will destroy our data (zap_leaf_t's and zap_t).
876 return (dmu_object_free(os
, zapobj
, tx
));
880 zap_evict_sync(void *dbu
)
884 rw_destroy(&zap
->zap_rwlock
);
886 if (zap
->zap_ismicro
)
889 mutex_destroy(&zap
->zap_f
.zap_num_entries_mtx
);
891 kmem_free(zap
, sizeof (zap_t
));
895 zap_count(objset_t
*os
, uint64_t zapobj
, uint64_t *count
)
900 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
903 if (!zap
->zap_ismicro
) {
904 err
= fzap_count(zap
, count
);
906 *count
= zap
->zap_m
.zap_num_entries
;
908 zap_unlockdir(zap
, FTAG
);
913 * zn may be NULL; if not specified, it will be computed if needed.
914 * See also the comment above zap_entry_normalization_conflict().
917 mzap_normalization_conflict(zap_t
*zap
, zap_name_t
*zn
, mzap_ent_t
*mze
)
919 int direction
= AVL_BEFORE
;
920 boolean_t allocdzn
= B_FALSE
;
922 if (zap
->zap_normflags
== 0)
926 for (mzap_ent_t
*other
= avl_walk(&zap
->zap_m
.zap_avl
, mze
, direction
);
927 other
&& other
->mze_hash
== mze
->mze_hash
;
928 other
= avl_walk(&zap
->zap_m
.zap_avl
, other
, direction
)) {
931 zn
= zap_name_alloc(zap
, MZE_PHYS(zap
, mze
)->mze_name
,
935 if (zap_match(zn
, MZE_PHYS(zap
, other
)->mze_name
)) {
942 if (direction
== AVL_BEFORE
) {
943 direction
= AVL_AFTER
;
953 * Routines for manipulating attributes.
957 zap_lookup(objset_t
*os
, uint64_t zapobj
, const char *name
,
958 uint64_t integer_size
, uint64_t num_integers
, void *buf
)
960 return (zap_lookup_norm(os
, zapobj
, name
, integer_size
,
961 num_integers
, buf
, 0, NULL
, 0, NULL
));
965 zap_lookup_impl(zap_t
*zap
, const char *name
,
966 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
967 matchtype_t mt
, char *realname
, int rn_len
,
972 zap_name_t
*zn
= zap_name_alloc(zap
, name
, mt
);
974 return (SET_ERROR(ENOTSUP
));
976 if (!zap
->zap_ismicro
) {
977 err
= fzap_lookup(zn
, integer_size
, num_integers
, buf
,
978 realname
, rn_len
, ncp
);
980 mzap_ent_t
*mze
= mze_find(zn
);
982 err
= SET_ERROR(ENOENT
);
984 if (num_integers
< 1) {
985 err
= SET_ERROR(EOVERFLOW
);
986 } else if (integer_size
!= 8) {
987 err
= SET_ERROR(EINVAL
);
990 MZE_PHYS(zap
, mze
)->mze_value
;
991 (void) strlcpy(realname
,
992 MZE_PHYS(zap
, mze
)->mze_name
, rn_len
);
994 *ncp
= mzap_normalization_conflict(zap
,
1005 zap_lookup_norm(objset_t
*os
, uint64_t zapobj
, const char *name
,
1006 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
1007 matchtype_t mt
, char *realname
, int rn_len
,
1013 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1016 err
= zap_lookup_impl(zap
, name
, integer_size
,
1017 num_integers
, buf
, mt
, realname
, rn_len
, ncp
);
1018 zap_unlockdir(zap
, FTAG
);
1023 zap_prefetch(objset_t
*os
, uint64_t zapobj
, const char *name
)
1029 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1032 zn
= zap_name_alloc(zap
, name
, 0);
1034 zap_unlockdir(zap
, FTAG
);
1035 return (SET_ERROR(ENOTSUP
));
1040 zap_unlockdir(zap
, FTAG
);
1045 zap_lookup_by_dnode(dnode_t
*dn
, const char *name
,
1046 uint64_t integer_size
, uint64_t num_integers
, void *buf
)
1048 return (zap_lookup_norm_by_dnode(dn
, name
, integer_size
,
1049 num_integers
, buf
, 0, NULL
, 0, NULL
));
1053 zap_lookup_norm_by_dnode(dnode_t
*dn
, const char *name
,
1054 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
1055 matchtype_t mt
, char *realname
, int rn_len
,
1060 int err
= zap_lockdir_by_dnode(dn
, NULL
, RW_READER
, TRUE
, FALSE
,
1064 err
= zap_lookup_impl(zap
, name
, integer_size
,
1065 num_integers
, buf
, mt
, realname
, rn_len
, ncp
);
1066 zap_unlockdir(zap
, FTAG
);
1071 zap_prefetch_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1077 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1080 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1082 zap_unlockdir(zap
, FTAG
);
1083 return (SET_ERROR(ENOTSUP
));
1088 zap_unlockdir(zap
, FTAG
);
1093 zap_lookup_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1094 int key_numints
, uint64_t integer_size
, uint64_t num_integers
, void *buf
)
1099 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1102 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1104 zap_unlockdir(zap
, FTAG
);
1105 return (SET_ERROR(ENOTSUP
));
1108 err
= fzap_lookup(zn
, integer_size
, num_integers
, buf
,
1111 zap_unlockdir(zap
, FTAG
);
1116 zap_contains(objset_t
*os
, uint64_t zapobj
, const char *name
)
1118 int err
= zap_lookup_norm(os
, zapobj
, name
, 0,
1119 0, NULL
, 0, NULL
, 0, NULL
);
1120 if (err
== EOVERFLOW
|| err
== EINVAL
)
1121 err
= 0; /* found, but skipped reading the value */
1126 zap_length(objset_t
*os
, uint64_t zapobj
, const char *name
,
1127 uint64_t *integer_size
, uint64_t *num_integers
)
1132 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1135 zap_name_t
*zn
= zap_name_alloc(zap
, name
, 0);
1137 zap_unlockdir(zap
, FTAG
);
1138 return (SET_ERROR(ENOTSUP
));
1140 if (!zap
->zap_ismicro
) {
1141 err
= fzap_length(zn
, integer_size
, num_integers
);
1143 mzap_ent_t
*mze
= mze_find(zn
);
1145 err
= SET_ERROR(ENOENT
);
1154 zap_unlockdir(zap
, FTAG
);
1159 zap_length_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1160 int key_numints
, uint64_t *integer_size
, uint64_t *num_integers
)
1165 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1168 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1170 zap_unlockdir(zap
, FTAG
);
1171 return (SET_ERROR(ENOTSUP
));
1173 err
= fzap_length(zn
, integer_size
, num_integers
);
1175 zap_unlockdir(zap
, FTAG
);
1180 mzap_addent(zap_name_t
*zn
, uint64_t value
)
1182 zap_t
*zap
= zn
->zn_zap
;
1183 int start
= zap
->zap_m
.zap_alloc_next
;
1185 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
1188 for (int i
= 0; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
1189 mzap_ent_phys_t
*mze
= &zap_m_phys(zap
)->mz_chunk
[i
];
1190 ASSERT(strcmp(zn
->zn_key_orig
, mze
->mze_name
) != 0);
1194 uint32_t cd
= mze_find_unused_cd(zap
, zn
->zn_hash
);
1195 /* given the limited size of the microzap, this can't happen */
1196 ASSERT(cd
< zap_maxcd(zap
));
1199 for (int i
= start
; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
1200 mzap_ent_phys_t
*mze
= &zap_m_phys(zap
)->mz_chunk
[i
];
1201 if (mze
->mze_name
[0] == 0) {
1202 mze
->mze_value
= value
;
1204 (void) strlcpy(mze
->mze_name
, zn
->zn_key_orig
,
1205 sizeof (mze
->mze_name
));
1206 zap
->zap_m
.zap_num_entries
++;
1207 zap
->zap_m
.zap_alloc_next
= i
+1;
1208 if (zap
->zap_m
.zap_alloc_next
==
1209 zap
->zap_m
.zap_num_chunks
)
1210 zap
->zap_m
.zap_alloc_next
= 0;
1211 mze_insert(zap
, i
, zn
->zn_hash
);
1219 cmn_err(CE_PANIC
, "out of entries!");
1223 zap_add_impl(zap_t
*zap
, const char *key
,
1224 int integer_size
, uint64_t num_integers
,
1225 const void *val
, dmu_tx_t
*tx
, void *tag
)
1227 const uint64_t *intval
= val
;
1230 zap_name_t
*zn
= zap_name_alloc(zap
, key
, 0);
1232 zap_unlockdir(zap
, tag
);
1233 return (SET_ERROR(ENOTSUP
));
1235 if (!zap
->zap_ismicro
) {
1236 err
= fzap_add(zn
, integer_size
, num_integers
, val
, tag
, tx
);
1237 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
1238 } else if (integer_size
!= 8 || num_integers
!= 1 ||
1239 strlen(key
) >= MZAP_NAME_LEN
||
1240 !mze_canfit_fzap_leaf(zn
, zn
->zn_hash
)) {
1241 err
= mzap_upgrade(&zn
->zn_zap
, tag
, tx
, 0);
1243 err
= fzap_add(zn
, integer_size
, num_integers
, val
,
1246 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
1248 if (mze_find(zn
) != NULL
) {
1249 err
= SET_ERROR(EEXIST
);
1251 mzap_addent(zn
, *intval
);
1254 ASSERT(zap
== zn
->zn_zap
);
1256 if (zap
!= NULL
) /* may be NULL if fzap_add() failed */
1257 zap_unlockdir(zap
, tag
);
1262 zap_add(objset_t
*os
, uint64_t zapobj
, const char *key
,
1263 int integer_size
, uint64_t num_integers
,
1264 const void *val
, dmu_tx_t
*tx
)
1269 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1272 err
= zap_add_impl(zap
, key
, integer_size
, num_integers
, val
, tx
, FTAG
);
1273 /* zap_add_impl() calls zap_unlockdir() */
1278 zap_add_by_dnode(dnode_t
*dn
, const char *key
,
1279 int integer_size
, uint64_t num_integers
,
1280 const void *val
, dmu_tx_t
*tx
)
1285 err
= zap_lockdir_by_dnode(dn
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1288 err
= zap_add_impl(zap
, key
, integer_size
, num_integers
, val
, tx
, FTAG
);
1289 /* zap_add_impl() calls zap_unlockdir() */
1294 zap_add_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1295 int key_numints
, int integer_size
, uint64_t num_integers
,
1296 const void *val
, dmu_tx_t
*tx
)
1301 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1304 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1306 zap_unlockdir(zap
, FTAG
);
1307 return (SET_ERROR(ENOTSUP
));
1309 err
= fzap_add(zn
, integer_size
, num_integers
, val
, FTAG
, tx
);
1310 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
1312 if (zap
!= NULL
) /* may be NULL if fzap_add() failed */
1313 zap_unlockdir(zap
, FTAG
);
1318 zap_update(objset_t
*os
, uint64_t zapobj
, const char *name
,
1319 int integer_size
, uint64_t num_integers
, const void *val
, dmu_tx_t
*tx
)
1322 const uint64_t *intval
= val
;
1325 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1328 zap_name_t
*zn
= zap_name_alloc(zap
, name
, 0);
1330 zap_unlockdir(zap
, FTAG
);
1331 return (SET_ERROR(ENOTSUP
));
1333 if (!zap
->zap_ismicro
) {
1334 err
= fzap_update(zn
, integer_size
, num_integers
, val
,
1336 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
1337 } else if (integer_size
!= 8 || num_integers
!= 1 ||
1338 strlen(name
) >= MZAP_NAME_LEN
) {
1339 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
1340 (u_longlong_t
)zapobj
, integer_size
,
1341 (u_longlong_t
)num_integers
, name
);
1342 err
= mzap_upgrade(&zn
->zn_zap
, FTAG
, tx
, 0);
1344 err
= fzap_update(zn
, integer_size
, num_integers
,
1347 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
1349 mzap_ent_t
*mze
= mze_find(zn
);
1351 MZE_PHYS(zap
, mze
)->mze_value
= *intval
;
1353 mzap_addent(zn
, *intval
);
1356 ASSERT(zap
== zn
->zn_zap
);
1358 if (zap
!= NULL
) /* may be NULL if fzap_upgrade() failed */
1359 zap_unlockdir(zap
, FTAG
);
1364 zap_update_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1366 int integer_size
, uint64_t num_integers
, const void *val
, dmu_tx_t
*tx
)
1371 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1374 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1376 zap_unlockdir(zap
, FTAG
);
1377 return (SET_ERROR(ENOTSUP
));
1379 err
= fzap_update(zn
, integer_size
, num_integers
, val
, FTAG
, tx
);
1380 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
1382 if (zap
!= NULL
) /* may be NULL if fzap_upgrade() failed */
1383 zap_unlockdir(zap
, FTAG
);
1388 zap_remove(objset_t
*os
, uint64_t zapobj
, const char *name
, dmu_tx_t
*tx
)
1390 return (zap_remove_norm(os
, zapobj
, name
, 0, tx
));
1394 zap_remove_impl(zap_t
*zap
, const char *name
,
1395 matchtype_t mt
, dmu_tx_t
*tx
)
1399 zap_name_t
*zn
= zap_name_alloc(zap
, name
, mt
);
1401 return (SET_ERROR(ENOTSUP
));
1402 if (!zap
->zap_ismicro
) {
1403 err
= fzap_remove(zn
, tx
);
1405 mzap_ent_t
*mze
= mze_find(zn
);
1407 err
= SET_ERROR(ENOENT
);
1409 zap
->zap_m
.zap_num_entries
--;
1410 memset(&zap_m_phys(zap
)->mz_chunk
[mze
->mze_chunkid
], 0,
1411 sizeof (mzap_ent_phys_t
));
1412 mze_remove(zap
, mze
);
1420 zap_remove_norm(objset_t
*os
, uint64_t zapobj
, const char *name
,
1421 matchtype_t mt
, dmu_tx_t
*tx
)
1426 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, FALSE
, FTAG
, &zap
);
1429 err
= zap_remove_impl(zap
, name
, mt
, tx
);
1430 zap_unlockdir(zap
, FTAG
);
1435 zap_remove_by_dnode(dnode_t
*dn
, const char *name
, dmu_tx_t
*tx
)
1440 err
= zap_lockdir_by_dnode(dn
, tx
, RW_WRITER
, TRUE
, FALSE
, FTAG
, &zap
);
1443 err
= zap_remove_impl(zap
, name
, 0, tx
);
1444 zap_unlockdir(zap
, FTAG
);
1449 zap_remove_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1450 int key_numints
, dmu_tx_t
*tx
)
1455 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, FALSE
, FTAG
, &zap
);
1458 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1460 zap_unlockdir(zap
, FTAG
);
1461 return (SET_ERROR(ENOTSUP
));
1463 err
= fzap_remove(zn
, tx
);
1465 zap_unlockdir(zap
, FTAG
);
1470 * Routines for iterating over the attributes.
1474 zap_cursor_init_impl(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
,
1475 uint64_t serialized
, boolean_t prefetch
)
1480 zc
->zc_zapobj
= zapobj
;
1481 zc
->zc_serialized
= serialized
;
1484 zc
->zc_prefetch
= prefetch
;
1487 zap_cursor_init_serialized(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
,
1488 uint64_t serialized
)
1490 zap_cursor_init_impl(zc
, os
, zapobj
, serialized
, B_TRUE
);
1494 * Initialize a cursor at the beginning of the ZAP object. The entire
1495 * ZAP object will be prefetched.
1498 zap_cursor_init(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
)
1500 zap_cursor_init_impl(zc
, os
, zapobj
, 0, B_TRUE
);
1504 * Initialize a cursor at the beginning, but request that we not prefetch
1505 * the entire ZAP object.
1508 zap_cursor_init_noprefetch(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
)
1510 zap_cursor_init_impl(zc
, os
, zapobj
, 0, B_FALSE
);
1514 zap_cursor_fini(zap_cursor_t
*zc
)
1517 rw_enter(&zc
->zc_zap
->zap_rwlock
, RW_READER
);
1518 zap_unlockdir(zc
->zc_zap
, NULL
);
1522 rw_enter(&zc
->zc_leaf
->l_rwlock
, RW_READER
);
1523 zap_put_leaf(zc
->zc_leaf
);
1526 zc
->zc_objset
= NULL
;
1530 zap_cursor_serialize(zap_cursor_t
*zc
)
1532 if (zc
->zc_hash
== -1ULL)
1534 if (zc
->zc_zap
== NULL
)
1535 return (zc
->zc_serialized
);
1536 ASSERT((zc
->zc_hash
& zap_maxcd(zc
->zc_zap
)) == 0);
1537 ASSERT(zc
->zc_cd
< zap_maxcd(zc
->zc_zap
));
1540 * We want to keep the high 32 bits of the cursor zero if we can, so
1541 * that 32-bit programs can access this. So usually use a small
1542 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
1545 * [ collision differentiator | zap_hashbits()-bit hash value ]
1547 return ((zc
->zc_hash
>> (64 - zap_hashbits(zc
->zc_zap
))) |
1548 ((uint64_t)zc
->zc_cd
<< zap_hashbits(zc
->zc_zap
)));
1552 zap_cursor_retrieve(zap_cursor_t
*zc
, zap_attribute_t
*za
)
1556 if (zc
->zc_hash
== -1ULL)
1557 return (SET_ERROR(ENOENT
));
1559 if (zc
->zc_zap
== NULL
) {
1561 err
= zap_lockdir(zc
->zc_objset
, zc
->zc_zapobj
, NULL
,
1562 RW_READER
, TRUE
, FALSE
, NULL
, &zc
->zc_zap
);
1567 * To support zap_cursor_init_serialized, advance, retrieve,
1568 * we must add to the existing zc_cd, which may already
1569 * be 1 due to the zap_cursor_advance.
1571 ASSERT(zc
->zc_hash
== 0);
1572 hb
= zap_hashbits(zc
->zc_zap
);
1573 zc
->zc_hash
= zc
->zc_serialized
<< (64 - hb
);
1574 zc
->zc_cd
+= zc
->zc_serialized
>> hb
;
1575 if (zc
->zc_cd
>= zap_maxcd(zc
->zc_zap
)) /* corrupt serialized */
1578 rw_enter(&zc
->zc_zap
->zap_rwlock
, RW_READER
);
1580 if (!zc
->zc_zap
->zap_ismicro
) {
1581 err
= fzap_cursor_retrieve(zc
->zc_zap
, zc
, za
);
1584 mzap_ent_t mze_tofind
;
1586 mze_tofind
.mze_hash
= zc
->zc_hash
;
1587 mze_tofind
.mze_cd
= zc
->zc_cd
;
1590 avl_find(&zc
->zc_zap
->zap_m
.zap_avl
, &mze_tofind
, &idx
);
1592 mze
= avl_nearest(&zc
->zc_zap
->zap_m
.zap_avl
,
1596 mzap_ent_phys_t
*mzep
= MZE_PHYS(zc
->zc_zap
, mze
);
1597 ASSERT3U(mze
->mze_cd
, ==, mzep
->mze_cd
);
1598 za
->za_normalization_conflict
=
1599 mzap_normalization_conflict(zc
->zc_zap
, NULL
, mze
);
1600 za
->za_integer_length
= 8;
1601 za
->za_num_integers
= 1;
1602 za
->za_first_integer
= mzep
->mze_value
;
1603 (void) strlcpy(za
->za_name
, mzep
->mze_name
,
1604 sizeof (za
->za_name
));
1605 zc
->zc_hash
= mze
->mze_hash
;
1606 zc
->zc_cd
= mze
->mze_cd
;
1609 zc
->zc_hash
= -1ULL;
1610 err
= SET_ERROR(ENOENT
);
1613 rw_exit(&zc
->zc_zap
->zap_rwlock
);
1618 zap_cursor_advance(zap_cursor_t
*zc
)
1620 if (zc
->zc_hash
== -1ULL)
1626 zap_get_stats(objset_t
*os
, uint64_t zapobj
, zap_stats_t
*zs
)
1631 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1635 memset(zs
, 0, sizeof (zap_stats_t
));
1637 if (zap
->zap_ismicro
) {
1638 zs
->zs_blocksize
= zap
->zap_dbuf
->db_size
;
1639 zs
->zs_num_entries
= zap
->zap_m
.zap_num_entries
;
1640 zs
->zs_num_blocks
= 1;
1642 fzap_get_stats(zap
, zs
);
1644 zap_unlockdir(zap
, FTAG
);
1648 #if defined(_KERNEL)
1649 EXPORT_SYMBOL(zap_create
);
1650 EXPORT_SYMBOL(zap_create_dnsize
);
1651 EXPORT_SYMBOL(zap_create_norm
);
1652 EXPORT_SYMBOL(zap_create_norm_dnsize
);
1653 EXPORT_SYMBOL(zap_create_flags
);
1654 EXPORT_SYMBOL(zap_create_flags_dnsize
);
1655 EXPORT_SYMBOL(zap_create_claim
);
1656 EXPORT_SYMBOL(zap_create_claim_norm
);
1657 EXPORT_SYMBOL(zap_create_claim_norm_dnsize
);
1658 EXPORT_SYMBOL(zap_create_hold
);
1659 EXPORT_SYMBOL(zap_destroy
);
1660 EXPORT_SYMBOL(zap_lookup
);
1661 EXPORT_SYMBOL(zap_lookup_by_dnode
);
1662 EXPORT_SYMBOL(zap_lookup_norm
);
1663 EXPORT_SYMBOL(zap_lookup_uint64
);
1664 EXPORT_SYMBOL(zap_contains
);
1665 EXPORT_SYMBOL(zap_prefetch
);
1666 EXPORT_SYMBOL(zap_prefetch_uint64
);
1667 EXPORT_SYMBOL(zap_add
);
1668 EXPORT_SYMBOL(zap_add_by_dnode
);
1669 EXPORT_SYMBOL(zap_add_uint64
);
1670 EXPORT_SYMBOL(zap_update
);
1671 EXPORT_SYMBOL(zap_update_uint64
);
1672 EXPORT_SYMBOL(zap_length
);
1673 EXPORT_SYMBOL(zap_length_uint64
);
1674 EXPORT_SYMBOL(zap_remove
);
1675 EXPORT_SYMBOL(zap_remove_by_dnode
);
1676 EXPORT_SYMBOL(zap_remove_norm
);
1677 EXPORT_SYMBOL(zap_remove_uint64
);
1678 EXPORT_SYMBOL(zap_count
);
1679 EXPORT_SYMBOL(zap_value_search
);
1680 EXPORT_SYMBOL(zap_join
);
1681 EXPORT_SYMBOL(zap_join_increment
);
1682 EXPORT_SYMBOL(zap_add_int
);
1683 EXPORT_SYMBOL(zap_remove_int
);
1684 EXPORT_SYMBOL(zap_lookup_int
);
1685 EXPORT_SYMBOL(zap_increment_int
);
1686 EXPORT_SYMBOL(zap_add_int_key
);
1687 EXPORT_SYMBOL(zap_lookup_int_key
);
1688 EXPORT_SYMBOL(zap_increment
);
1689 EXPORT_SYMBOL(zap_cursor_init
);
1690 EXPORT_SYMBOL(zap_cursor_fini
);
1691 EXPORT_SYMBOL(zap_cursor_retrieve
);
1692 EXPORT_SYMBOL(zap_cursor_advance
);
1693 EXPORT_SYMBOL(zap_cursor_serialize
);
1694 EXPORT_SYMBOL(zap_cursor_init_serialized
);
1695 EXPORT_SYMBOL(zap_get_stats
);