4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
25 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
26 * Copyright 2017 Nexenta Systems, Inc.
32 #include <sys/zfs_context.h>
34 #include <sys/zap_impl.h>
35 #include <sys/zap_leaf.h>
36 #include <sys/btree.h>
38 #include <sys/dmu_objset.h>
41 #include <sys/sunddi.h>
44 int zap_micro_max_size
= MZAP_MAX_BLKSZ
;
46 static int mzap_upgrade(zap_t
**zapp
,
47 const void *tag
, dmu_tx_t
*tx
, zap_flags_t flags
);
50 zap_getflags(zap_t
*zap
)
54 return (zap_f_phys(zap
)->zap_flags
);
58 zap_hashbits(zap_t
*zap
)
60 if (zap_getflags(zap
) & ZAP_FLAG_HASH64
)
69 if (zap_getflags(zap
) & ZAP_FLAG_HASH64
)
76 zap_hash(zap_name_t
*zn
)
78 zap_t
*zap
= zn
->zn_zap
;
81 if (zap_getflags(zap
) & ZAP_FLAG_PRE_HASHED_KEY
) {
82 ASSERT(zap_getflags(zap
) & ZAP_FLAG_UINT64_KEY
);
83 h
= *(uint64_t *)zn
->zn_key_orig
;
87 ASSERT(zfs_crc64_table
[128] == ZFS_CRC64_POLY
);
89 if (zap_getflags(zap
) & ZAP_FLAG_UINT64_KEY
) {
90 const uint64_t *wp
= zn
->zn_key_norm
;
92 ASSERT(zn
->zn_key_intlen
== 8);
93 for (int i
= 0; i
< zn
->zn_key_norm_numints
;
97 for (int j
= 0; j
< 8; j
++) {
99 zfs_crc64_table
[(h
^ word
) & 0xFF];
104 const uint8_t *cp
= zn
->zn_key_norm
;
107 * We previously stored the terminating null on
108 * disk, but didn't hash it, so we need to
109 * continue to not hash it. (The
110 * zn_key_*_numints includes the terminating
111 * null for non-binary keys.)
113 int len
= zn
->zn_key_norm_numints
- 1;
115 ASSERT(zn
->zn_key_intlen
== 1);
116 for (int i
= 0; i
< len
; cp
++, i
++) {
118 zfs_crc64_table
[(h
^ *cp
) & 0xFF];
123 * Don't use all 64 bits, since we need some in the cookie for
124 * the collision differentiator. We MUST use the high bits,
125 * since those are the ones that we first pay attention to when
126 * choosing the bucket.
128 h
&= ~((1ULL << (64 - zap_hashbits(zap
))) - 1);
134 zap_normalize(zap_t
*zap
, const char *name
, char *namenorm
, int normflags
)
136 ASSERT(!(zap_getflags(zap
) & ZAP_FLAG_UINT64_KEY
));
138 size_t inlen
= strlen(name
) + 1;
139 size_t outlen
= ZAP_MAXNAMELEN
;
142 (void) u8_textprep_str((char *)name
, &inlen
, namenorm
, &outlen
,
143 normflags
| U8_TEXTPREP_IGNORE_NULL
| U8_TEXTPREP_IGNORE_INVALID
,
144 U8_UNICODE_LATEST
, &err
);
150 zap_match(zap_name_t
*zn
, const char *matchname
)
152 ASSERT(!(zap_getflags(zn
->zn_zap
) & ZAP_FLAG_UINT64_KEY
));
154 if (zn
->zn_matchtype
& MT_NORMALIZE
) {
155 char norm
[ZAP_MAXNAMELEN
];
157 if (zap_normalize(zn
->zn_zap
, matchname
, norm
,
158 zn
->zn_normflags
) != 0)
161 return (strcmp(zn
->zn_key_norm
, norm
) == 0);
163 return (strcmp(zn
->zn_key_orig
, matchname
) == 0);
168 zap_name_alloc(zap_t
*zap
)
170 zap_name_t
*zn
= kmem_alloc(sizeof (zap_name_t
), KM_SLEEP
);
176 zap_name_free(zap_name_t
*zn
)
178 kmem_free(zn
, sizeof (zap_name_t
));
182 zap_name_init_str(zap_name_t
*zn
, const char *key
, matchtype_t mt
)
184 zap_t
*zap
= zn
->zn_zap
;
186 zn
->zn_key_intlen
= sizeof (*key
);
187 zn
->zn_key_orig
= key
;
188 zn
->zn_key_orig_numints
= strlen(zn
->zn_key_orig
) + 1;
189 zn
->zn_matchtype
= mt
;
190 zn
->zn_normflags
= zap
->zap_normflags
;
193 * If we're dealing with a case sensitive lookup on a mixed or
194 * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
195 * will fold case to all caps overriding the lookup request.
197 if (mt
& MT_MATCH_CASE
)
198 zn
->zn_normflags
&= ~U8_TEXTPREP_TOUPPER
;
200 if (zap
->zap_normflags
) {
202 * We *must* use zap_normflags because this normalization is
203 * what the hash is computed from.
205 if (zap_normalize(zap
, key
, zn
->zn_normbuf
,
206 zap
->zap_normflags
) != 0)
207 return (SET_ERROR(ENOTSUP
));
208 zn
->zn_key_norm
= zn
->zn_normbuf
;
209 zn
->zn_key_norm_numints
= strlen(zn
->zn_key_norm
) + 1;
212 return (SET_ERROR(ENOTSUP
));
213 zn
->zn_key_norm
= zn
->zn_key_orig
;
214 zn
->zn_key_norm_numints
= zn
->zn_key_orig_numints
;
217 zn
->zn_hash
= zap_hash(zn
);
219 if (zap
->zap_normflags
!= zn
->zn_normflags
) {
221 * We *must* use zn_normflags because this normalization is
222 * what the matching is based on. (Not the hash!)
224 if (zap_normalize(zap
, key
, zn
->zn_normbuf
,
225 zn
->zn_normflags
) != 0)
226 return (SET_ERROR(ENOTSUP
));
227 zn
->zn_key_norm_numints
= strlen(zn
->zn_key_norm
) + 1;
234 zap_name_alloc_str(zap_t
*zap
, const char *key
, matchtype_t mt
)
236 zap_name_t
*zn
= zap_name_alloc(zap
);
237 if (zap_name_init_str(zn
, key
, mt
) != 0) {
245 zap_name_alloc_uint64(zap_t
*zap
, const uint64_t *key
, int numints
)
247 zap_name_t
*zn
= kmem_alloc(sizeof (zap_name_t
), KM_SLEEP
);
249 ASSERT(zap
->zap_normflags
== 0);
251 zn
->zn_key_intlen
= sizeof (*key
);
252 zn
->zn_key_orig
= zn
->zn_key_norm
= key
;
253 zn
->zn_key_orig_numints
= zn
->zn_key_norm_numints
= numints
;
254 zn
->zn_matchtype
= 0;
256 zn
->zn_hash
= zap_hash(zn
);
261 mzap_byteswap(mzap_phys_t
*buf
, size_t size
)
263 buf
->mz_block_type
= BSWAP_64(buf
->mz_block_type
);
264 buf
->mz_salt
= BSWAP_64(buf
->mz_salt
);
265 buf
->mz_normflags
= BSWAP_64(buf
->mz_normflags
);
266 int max
= (size
/ MZAP_ENT_LEN
) - 1;
267 for (int i
= 0; i
< max
; i
++) {
268 buf
->mz_chunk
[i
].mze_value
=
269 BSWAP_64(buf
->mz_chunk
[i
].mze_value
);
270 buf
->mz_chunk
[i
].mze_cd
=
271 BSWAP_32(buf
->mz_chunk
[i
].mze_cd
);
276 zap_byteswap(void *buf
, size_t size
)
278 uint64_t block_type
= *(uint64_t *)buf
;
280 if (block_type
== ZBT_MICRO
|| block_type
== BSWAP_64(ZBT_MICRO
)) {
281 /* ASSERT(magic == ZAP_LEAF_MAGIC); */
282 mzap_byteswap(buf
, size
);
284 fzap_byteswap(buf
, size
);
288 __attribute__((always_inline
)) inline
290 mze_compare(const void *arg1
, const void *arg2
)
292 const mzap_ent_t
*mze1
= arg1
;
293 const mzap_ent_t
*mze2
= arg2
;
295 return (TREE_CMP((uint64_t)(mze1
->mze_hash
) << 32 | mze1
->mze_cd
,
296 (uint64_t)(mze2
->mze_hash
) << 32 | mze2
->mze_cd
));
299 ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf
, mzap_ent_t
,
303 mze_insert(zap_t
*zap
, uint16_t chunkid
, uint64_t hash
)
307 ASSERT(zap
->zap_ismicro
);
308 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
310 mze
.mze_chunkid
= chunkid
;
311 ASSERT0(hash
& 0xffffffff);
312 mze
.mze_hash
= hash
>> 32;
313 ASSERT3U(MZE_PHYS(zap
, &mze
)->mze_cd
, <=, 0xffff);
314 mze
.mze_cd
= (uint16_t)MZE_PHYS(zap
, &mze
)->mze_cd
;
315 ASSERT(MZE_PHYS(zap
, &mze
)->mze_name
[0] != 0);
316 zfs_btree_add(&zap
->zap_m
.zap_tree
, &mze
);
320 mze_find(zap_name_t
*zn
, zfs_btree_index_t
*idx
)
322 mzap_ent_t mze_tofind
;
324 zfs_btree_t
*tree
= &zn
->zn_zap
->zap_m
.zap_tree
;
326 ASSERT(zn
->zn_zap
->zap_ismicro
);
327 ASSERT(RW_LOCK_HELD(&zn
->zn_zap
->zap_rwlock
));
329 ASSERT0(zn
->zn_hash
& 0xffffffff);
330 mze_tofind
.mze_hash
= zn
->zn_hash
>> 32;
331 mze_tofind
.mze_cd
= 0;
333 mze
= zfs_btree_find(tree
, &mze_tofind
, idx
);
335 mze
= zfs_btree_next(tree
, idx
, idx
);
336 for (; mze
&& mze
->mze_hash
== mze_tofind
.mze_hash
;
337 mze
= zfs_btree_next(tree
, idx
, idx
)) {
338 ASSERT3U(mze
->mze_cd
, ==, MZE_PHYS(zn
->zn_zap
, mze
)->mze_cd
);
339 if (zap_match(zn
, MZE_PHYS(zn
->zn_zap
, mze
)->mze_name
))
347 mze_find_unused_cd(zap_t
*zap
, uint64_t hash
)
349 mzap_ent_t mze_tofind
;
350 zfs_btree_index_t idx
;
351 zfs_btree_t
*tree
= &zap
->zap_m
.zap_tree
;
353 ASSERT(zap
->zap_ismicro
);
354 ASSERT(RW_LOCK_HELD(&zap
->zap_rwlock
));
356 ASSERT0(hash
& 0xffffffff);
358 mze_tofind
.mze_hash
= hash
;
359 mze_tofind
.mze_cd
= 0;
362 for (mzap_ent_t
*mze
= zfs_btree_find(tree
, &mze_tofind
, &idx
);
363 mze
&& mze
->mze_hash
== hash
;
364 mze
= zfs_btree_next(tree
, &idx
, &idx
)) {
365 if (mze
->mze_cd
!= cd
)
374 * Each mzap entry requires at max : 4 chunks
375 * 3 chunks for names + 1 chunk for value.
377 #define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
378 ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
381 * Check if the current entry keeps the colliding entries under the fatzap leaf
385 mze_canfit_fzap_leaf(zap_name_t
*zn
, uint64_t hash
)
387 zap_t
*zap
= zn
->zn_zap
;
388 mzap_ent_t mze_tofind
;
389 zfs_btree_index_t idx
;
390 zfs_btree_t
*tree
= &zap
->zap_m
.zap_tree
;
391 uint32_t mzap_ents
= 0;
393 ASSERT0(hash
& 0xffffffff);
395 mze_tofind
.mze_hash
= hash
;
396 mze_tofind
.mze_cd
= 0;
398 for (mzap_ent_t
*mze
= zfs_btree_find(tree
, &mze_tofind
, &idx
);
399 mze
&& mze
->mze_hash
== hash
;
400 mze
= zfs_btree_next(tree
, &idx
, &idx
)) {
404 /* Include the new entry being added */
407 return (ZAP_LEAF_NUMCHUNKS_DEF
> (mzap_ents
* MZAP_ENT_CHUNKS
));
411 mze_destroy(zap_t
*zap
)
413 zfs_btree_clear(&zap
->zap_m
.zap_tree
);
414 zfs_btree_destroy(&zap
->zap_m
.zap_tree
);
418 mzap_open(objset_t
*os
, uint64_t obj
, dmu_buf_t
*db
)
421 uint64_t *zap_hdr
= (uint64_t *)db
->db_data
;
422 uint64_t zap_block_type
= zap_hdr
[0];
423 uint64_t zap_magic
= zap_hdr
[1];
425 ASSERT3U(MZAP_ENT_LEN
, ==, sizeof (mzap_ent_phys_t
));
427 zap_t
*zap
= kmem_zalloc(sizeof (zap_t
), KM_SLEEP
);
428 rw_init(&zap
->zap_rwlock
, NULL
, RW_DEFAULT
, NULL
);
429 rw_enter(&zap
->zap_rwlock
, RW_WRITER
);
430 zap
->zap_objset
= os
;
431 zap
->zap_object
= obj
;
434 if (zap_block_type
!= ZBT_MICRO
) {
435 mutex_init(&zap
->zap_f
.zap_num_entries_mtx
, 0, MUTEX_DEFAULT
,
437 zap
->zap_f
.zap_block_shift
= highbit64(db
->db_size
) - 1;
438 if (zap_block_type
!= ZBT_HEADER
|| zap_magic
!= ZAP_MAGIC
) {
439 winner
= NULL
; /* No actual winner here... */
443 zap
->zap_ismicro
= TRUE
;
447 * Make sure that zap_ismicro is set before we let others see
448 * it, because zap_lockdir() checks zap_ismicro without the lock
451 dmu_buf_init_user(&zap
->zap_dbu
, zap_evict_sync
, NULL
, &zap
->zap_dbuf
);
452 winner
= dmu_buf_set_user(db
, &zap
->zap_dbu
);
457 if (zap
->zap_ismicro
) {
458 zap
->zap_salt
= zap_m_phys(zap
)->mz_salt
;
459 zap
->zap_normflags
= zap_m_phys(zap
)->mz_normflags
;
460 zap
->zap_m
.zap_num_chunks
= db
->db_size
/ MZAP_ENT_LEN
- 1;
463 * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove()
464 * overhead on massive inserts below. It still allows to store
465 * 62 entries before we have to add 2KB B-tree core node.
467 zfs_btree_create_custom(&zap
->zap_m
.zap_tree
, mze_compare
,
468 mze_find_in_buf
, sizeof (mzap_ent_t
), 512);
470 zap_name_t
*zn
= zap_name_alloc(zap
);
471 for (uint16_t i
= 0; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
472 mzap_ent_phys_t
*mze
=
473 &zap_m_phys(zap
)->mz_chunk
[i
];
474 if (mze
->mze_name
[0]) {
475 zap
->zap_m
.zap_num_entries
++;
476 zap_name_init_str(zn
, mze
->mze_name
, 0);
477 mze_insert(zap
, i
, zn
->zn_hash
);
482 zap
->zap_salt
= zap_f_phys(zap
)->zap_salt
;
483 zap
->zap_normflags
= zap_f_phys(zap
)->zap_normflags
;
485 ASSERT3U(sizeof (struct zap_leaf_header
), ==,
486 2*ZAP_LEAF_CHUNKSIZE
);
489 * The embedded pointer table should not overlap the
492 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap
, 0), >,
493 &zap_f_phys(zap
)->zap_salt
);
496 * The embedded pointer table should end at the end of
499 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap
,
500 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap
)) -
501 (uintptr_t)zap_f_phys(zap
), ==,
502 zap
->zap_dbuf
->db_size
);
504 rw_exit(&zap
->zap_rwlock
);
508 rw_exit(&zap
->zap_rwlock
);
509 rw_destroy(&zap
->zap_rwlock
);
510 if (!zap
->zap_ismicro
)
511 mutex_destroy(&zap
->zap_f
.zap_num_entries_mtx
);
512 kmem_free(zap
, sizeof (zap_t
));
517 * This routine "consumes" the caller's hold on the dbuf, which must
518 * have the specified tag.
521 zap_lockdir_impl(dmu_buf_t
*db
, const void *tag
, dmu_tx_t
*tx
,
522 krw_t lti
, boolean_t fatreader
, boolean_t adding
, zap_t
**zapp
)
524 ASSERT0(db
->db_offset
);
525 objset_t
*os
= dmu_buf_get_objset(db
);
526 uint64_t obj
= db
->db_object
;
527 dmu_object_info_t doi
;
531 dmu_object_info_from_db(db
, &doi
);
532 if (DMU_OT_BYTESWAP(doi
.doi_type
) != DMU_BSWAP_ZAP
)
533 return (SET_ERROR(EINVAL
));
535 zap_t
*zap
= dmu_buf_get_user(db
);
537 zap
= mzap_open(os
, obj
, db
);
540 * mzap_open() didn't like what it saw on-disk.
541 * Check for corruption!
543 return (SET_ERROR(EIO
));
548 * We're checking zap_ismicro without the lock held, in order to
549 * tell what type of lock we want. Once we have some sort of
550 * lock, see if it really is the right type. In practice this
551 * can only be different if it was upgraded from micro to fat,
552 * and micro wanted WRITER but fat only needs READER.
554 krw_t lt
= (!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
;
555 rw_enter(&zap
->zap_rwlock
, lt
);
556 if (lt
!= ((!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
)) {
557 /* it was upgraded, now we only need reader */
558 ASSERT(lt
== RW_WRITER
);
560 ((!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
));
561 rw_downgrade(&zap
->zap_rwlock
);
565 zap
->zap_objset
= os
;
568 dmu_buf_will_dirty(db
, tx
);
570 ASSERT3P(zap
->zap_dbuf
, ==, db
);
572 ASSERT(!zap
->zap_ismicro
||
573 zap
->zap_m
.zap_num_entries
<= zap
->zap_m
.zap_num_chunks
);
574 if (zap
->zap_ismicro
&& tx
&& adding
&&
575 zap
->zap_m
.zap_num_entries
== zap
->zap_m
.zap_num_chunks
) {
576 uint64_t newsz
= db
->db_size
+ SPA_MINBLOCKSIZE
;
577 if (newsz
> zap_micro_max_size
) {
578 dprintf("upgrading obj %llu: num_entries=%u\n",
579 (u_longlong_t
)obj
, zap
->zap_m
.zap_num_entries
);
581 int err
= mzap_upgrade(zapp
, tag
, tx
, 0);
583 rw_exit(&zap
->zap_rwlock
);
586 VERIFY0(dmu_object_set_blocksize(os
, obj
, newsz
, 0, tx
));
587 zap
->zap_m
.zap_num_chunks
=
588 db
->db_size
/ MZAP_ENT_LEN
- 1;
596 zap_lockdir_by_dnode(dnode_t
*dn
, dmu_tx_t
*tx
,
597 krw_t lti
, boolean_t fatreader
, boolean_t adding
, const void *tag
,
602 int err
= dmu_buf_hold_by_dnode(dn
, 0, tag
, &db
, DMU_READ_NO_PREFETCH
);
608 dmu_object_info_t doi
;
609 dmu_object_info_from_db(db
, &doi
);
610 ASSERT3U(DMU_OT_BYTESWAP(doi
.doi_type
), ==, DMU_BSWAP_ZAP
);
614 err
= zap_lockdir_impl(db
, tag
, tx
, lti
, fatreader
, adding
, zapp
);
616 dmu_buf_rele(db
, tag
);
622 zap_lockdir(objset_t
*os
, uint64_t obj
, dmu_tx_t
*tx
,
623 krw_t lti
, boolean_t fatreader
, boolean_t adding
, const void *tag
,
628 int err
= dmu_buf_hold(os
, obj
, 0, tag
, &db
, DMU_READ_NO_PREFETCH
);
633 dmu_object_info_t doi
;
634 dmu_object_info_from_db(db
, &doi
);
635 ASSERT3U(DMU_OT_BYTESWAP(doi
.doi_type
), ==, DMU_BSWAP_ZAP
);
638 err
= zap_lockdir_impl(db
, tag
, tx
, lti
, fatreader
, adding
, zapp
);
640 dmu_buf_rele(db
, tag
);
645 zap_unlockdir(zap_t
*zap
, const void *tag
)
647 rw_exit(&zap
->zap_rwlock
);
648 dmu_buf_rele(zap
->zap_dbuf
, tag
);
652 mzap_upgrade(zap_t
**zapp
, const void *tag
, dmu_tx_t
*tx
, zap_flags_t flags
)
657 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
659 int sz
= zap
->zap_dbuf
->db_size
;
660 mzap_phys_t
*mzp
= vmem_alloc(sz
, KM_SLEEP
);
661 memcpy(mzp
, zap
->zap_dbuf
->db_data
, sz
);
662 int nchunks
= zap
->zap_m
.zap_num_chunks
;
665 err
= dmu_object_set_blocksize(zap
->zap_objset
, zap
->zap_object
,
666 1ULL << fzap_default_block_shift
, 0, tx
);
673 dprintf("upgrading obj=%llu with %u chunks\n",
674 (u_longlong_t
)zap
->zap_object
, nchunks
);
675 /* XXX destroy the tree later, so we can use the stored hash value */
678 fzap_upgrade(zap
, tx
, flags
);
680 zap_name_t
*zn
= zap_name_alloc(zap
);
681 for (int i
= 0; i
< nchunks
; i
++) {
682 mzap_ent_phys_t
*mze
= &mzp
->mz_chunk
[i
];
683 if (mze
->mze_name
[0] == 0)
685 dprintf("adding %s=%llu\n",
686 mze
->mze_name
, (u_longlong_t
)mze
->mze_value
);
687 zap_name_init_str(zn
, mze
->mze_name
, 0);
688 /* If we fail here, we would end up losing entries */
689 VERIFY0(fzap_add_cd(zn
, 8, 1, &mze
->mze_value
, mze
->mze_cd
,
691 zap
= zn
->zn_zap
; /* fzap_add_cd() may change zap */
700 * The "normflags" determine the behavior of the matchtype_t which is
701 * passed to zap_lookup_norm(). Names which have the same normalized
702 * version will be stored with the same hash value, and therefore we can
703 * perform normalization-insensitive lookups. We can be Unicode form-
704 * insensitive and/or case-insensitive. The following flags are valid for
711 * U8_TEXTPREP_TOUPPER
713 * The *_NF* (Normalization Form) flags are mutually exclusive; at most one
714 * of them may be supplied.
717 mzap_create_impl(dnode_t
*dn
, int normflags
, zap_flags_t flags
, dmu_tx_t
*tx
)
721 VERIFY0(dmu_buf_hold_by_dnode(dn
, 0, FTAG
, &db
, DMU_READ_NO_PREFETCH
));
723 dmu_buf_will_dirty(db
, tx
);
724 mzap_phys_t
*zp
= db
->db_data
;
725 zp
->mz_block_type
= ZBT_MICRO
;
727 ((uintptr_t)db
^ (uintptr_t)tx
^ (dn
->dn_object
<< 1)) | 1ULL;
728 zp
->mz_normflags
= normflags
;
732 /* Only fat zap supports flags; upgrade immediately. */
733 VERIFY0(zap_lockdir_impl(db
, FTAG
, tx
, RW_WRITER
,
734 B_FALSE
, B_FALSE
, &zap
));
735 VERIFY0(mzap_upgrade(&zap
, FTAG
, tx
, flags
));
736 zap_unlockdir(zap
, FTAG
);
738 dmu_buf_rele(db
, FTAG
);
743 zap_create_impl(objset_t
*os
, int normflags
, zap_flags_t flags
,
744 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
745 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
,
746 dnode_t
**allocated_dnode
, const void *tag
, dmu_tx_t
*tx
)
750 ASSERT3U(DMU_OT_BYTESWAP(ot
), ==, DMU_BSWAP_ZAP
);
752 if (allocated_dnode
== NULL
) {
754 obj
= dmu_object_alloc_hold(os
, ot
, 1ULL << leaf_blockshift
,
755 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
,
757 mzap_create_impl(dn
, normflags
, flags
, tx
);
758 dnode_rele(dn
, FTAG
);
760 obj
= dmu_object_alloc_hold(os
, ot
, 1ULL << leaf_blockshift
,
761 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
,
762 allocated_dnode
, tag
, tx
);
763 mzap_create_impl(*allocated_dnode
, normflags
, flags
, tx
);
770 zap_create_claim(objset_t
*os
, uint64_t obj
, dmu_object_type_t ot
,
771 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
773 return (zap_create_claim_dnsize(os
, obj
, ot
, bonustype
, bonuslen
,
778 zap_create_claim_dnsize(objset_t
*os
, uint64_t obj
, dmu_object_type_t ot
,
779 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
781 return (zap_create_claim_norm_dnsize(os
, obj
,
782 0, ot
, bonustype
, bonuslen
, dnodesize
, tx
));
786 zap_create_claim_norm(objset_t
*os
, uint64_t obj
, int normflags
,
787 dmu_object_type_t ot
,
788 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
790 return (zap_create_claim_norm_dnsize(os
, obj
, normflags
, ot
, bonustype
,
795 zap_create_claim_norm_dnsize(objset_t
*os
, uint64_t obj
, int normflags
,
796 dmu_object_type_t ot
, dmu_object_type_t bonustype
, int bonuslen
,
797 int dnodesize
, dmu_tx_t
*tx
)
802 ASSERT3U(DMU_OT_BYTESWAP(ot
), ==, DMU_BSWAP_ZAP
);
803 error
= dmu_object_claim_dnsize(os
, obj
, ot
, 0, bonustype
, bonuslen
,
808 error
= dnode_hold(os
, obj
, FTAG
, &dn
);
812 mzap_create_impl(dn
, normflags
, 0, tx
);
814 dnode_rele(dn
, FTAG
);
820 zap_create(objset_t
*os
, dmu_object_type_t ot
,
821 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
823 return (zap_create_norm(os
, 0, ot
, bonustype
, bonuslen
, tx
));
827 zap_create_dnsize(objset_t
*os
, dmu_object_type_t ot
,
828 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
830 return (zap_create_norm_dnsize(os
, 0, ot
, bonustype
, bonuslen
,
835 zap_create_norm(objset_t
*os
, int normflags
, dmu_object_type_t ot
,
836 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
838 return (zap_create_norm_dnsize(os
, normflags
, ot
, bonustype
, bonuslen
,
843 zap_create_norm_dnsize(objset_t
*os
, int normflags
, dmu_object_type_t ot
,
844 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
846 return (zap_create_impl(os
, normflags
, 0, ot
, 0, 0,
847 bonustype
, bonuslen
, dnodesize
, NULL
, NULL
, tx
));
851 zap_create_flags(objset_t
*os
, int normflags
, zap_flags_t flags
,
852 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
853 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
855 return (zap_create_flags_dnsize(os
, normflags
, flags
, ot
,
856 leaf_blockshift
, indirect_blockshift
, bonustype
, bonuslen
, 0, tx
));
860 zap_create_flags_dnsize(objset_t
*os
, int normflags
, zap_flags_t flags
,
861 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
862 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
, dmu_tx_t
*tx
)
864 return (zap_create_impl(os
, normflags
, flags
, ot
, leaf_blockshift
,
865 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
, NULL
, NULL
,
870 * Create a zap object and return a pointer to the newly allocated dnode via
871 * the allocated_dnode argument. The returned dnode will be held and the
872 * caller is responsible for releasing the hold by calling dnode_rele().
875 zap_create_hold(objset_t
*os
, int normflags
, zap_flags_t flags
,
876 dmu_object_type_t ot
, int leaf_blockshift
, int indirect_blockshift
,
877 dmu_object_type_t bonustype
, int bonuslen
, int dnodesize
,
878 dnode_t
**allocated_dnode
, const void *tag
, dmu_tx_t
*tx
)
880 return (zap_create_impl(os
, normflags
, flags
, ot
, leaf_blockshift
,
881 indirect_blockshift
, bonustype
, bonuslen
, dnodesize
,
882 allocated_dnode
, tag
, tx
));
886 zap_destroy(objset_t
*os
, uint64_t zapobj
, dmu_tx_t
*tx
)
889 * dmu_object_free will free the object number and free the
890 * data. Freeing the data will cause our pageout function to be
891 * called, which will destroy our data (zap_leaf_t's and zap_t).
894 return (dmu_object_free(os
, zapobj
, tx
));
898 zap_evict_sync(void *dbu
)
902 rw_destroy(&zap
->zap_rwlock
);
904 if (zap
->zap_ismicro
)
907 mutex_destroy(&zap
->zap_f
.zap_num_entries_mtx
);
909 kmem_free(zap
, sizeof (zap_t
));
913 zap_count(objset_t
*os
, uint64_t zapobj
, uint64_t *count
)
918 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
921 if (!zap
->zap_ismicro
) {
922 err
= fzap_count(zap
, count
);
924 *count
= zap
->zap_m
.zap_num_entries
;
926 zap_unlockdir(zap
, FTAG
);
931 * zn may be NULL; if not specified, it will be computed if needed.
932 * See also the comment above zap_entry_normalization_conflict().
935 mzap_normalization_conflict(zap_t
*zap
, zap_name_t
*zn
, mzap_ent_t
*mze
,
936 zfs_btree_index_t
*idx
)
938 boolean_t allocdzn
= B_FALSE
;
940 zfs_btree_index_t oidx
;
942 if (zap
->zap_normflags
== 0)
945 for (other
= zfs_btree_prev(&zap
->zap_m
.zap_tree
, idx
, &oidx
);
946 other
&& other
->mze_hash
== mze
->mze_hash
;
947 other
= zfs_btree_prev(&zap
->zap_m
.zap_tree
, &oidx
, &oidx
)) {
950 zn
= zap_name_alloc_str(zap
,
951 MZE_PHYS(zap
, mze
)->mze_name
, MT_NORMALIZE
);
954 if (zap_match(zn
, MZE_PHYS(zap
, other
)->mze_name
)) {
961 for (other
= zfs_btree_next(&zap
->zap_m
.zap_tree
, idx
, &oidx
);
962 other
&& other
->mze_hash
== mze
->mze_hash
;
963 other
= zfs_btree_next(&zap
->zap_m
.zap_tree
, &oidx
, &oidx
)) {
966 zn
= zap_name_alloc_str(zap
,
967 MZE_PHYS(zap
, mze
)->mze_name
, MT_NORMALIZE
);
970 if (zap_match(zn
, MZE_PHYS(zap
, other
)->mze_name
)) {
983 * Routines for manipulating attributes.
987 zap_lookup(objset_t
*os
, uint64_t zapobj
, const char *name
,
988 uint64_t integer_size
, uint64_t num_integers
, void *buf
)
990 return (zap_lookup_norm(os
, zapobj
, name
, integer_size
,
991 num_integers
, buf
, 0, NULL
, 0, NULL
));
995 zap_lookup_impl(zap_t
*zap
, const char *name
,
996 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
997 matchtype_t mt
, char *realname
, int rn_len
,
1002 zap_name_t
*zn
= zap_name_alloc_str(zap
, name
, mt
);
1004 return (SET_ERROR(ENOTSUP
));
1006 if (!zap
->zap_ismicro
) {
1007 err
= fzap_lookup(zn
, integer_size
, num_integers
, buf
,
1008 realname
, rn_len
, ncp
);
1010 zfs_btree_index_t idx
;
1011 mzap_ent_t
*mze
= mze_find(zn
, &idx
);
1013 err
= SET_ERROR(ENOENT
);
1015 if (num_integers
< 1) {
1016 err
= SET_ERROR(EOVERFLOW
);
1017 } else if (integer_size
!= 8) {
1018 err
= SET_ERROR(EINVAL
);
1021 MZE_PHYS(zap
, mze
)->mze_value
;
1022 if (realname
!= NULL
)
1023 (void) strlcpy(realname
,
1024 MZE_PHYS(zap
, mze
)->mze_name
,
1027 *ncp
= mzap_normalization_conflict(zap
,
1038 zap_lookup_norm(objset_t
*os
, uint64_t zapobj
, const char *name
,
1039 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
1040 matchtype_t mt
, char *realname
, int rn_len
,
1046 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1049 err
= zap_lookup_impl(zap
, name
, integer_size
,
1050 num_integers
, buf
, mt
, realname
, rn_len
, ncp
);
1051 zap_unlockdir(zap
, FTAG
);
1056 zap_prefetch(objset_t
*os
, uint64_t zapobj
, const char *name
)
1062 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1065 zn
= zap_name_alloc_str(zap
, name
, 0);
1067 zap_unlockdir(zap
, FTAG
);
1068 return (SET_ERROR(ENOTSUP
));
1073 zap_unlockdir(zap
, FTAG
);
1078 zap_lookup_by_dnode(dnode_t
*dn
, const char *name
,
1079 uint64_t integer_size
, uint64_t num_integers
, void *buf
)
1081 return (zap_lookup_norm_by_dnode(dn
, name
, integer_size
,
1082 num_integers
, buf
, 0, NULL
, 0, NULL
));
1086 zap_lookup_norm_by_dnode(dnode_t
*dn
, const char *name
,
1087 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
1088 matchtype_t mt
, char *realname
, int rn_len
,
1093 int err
= zap_lockdir_by_dnode(dn
, NULL
, RW_READER
, TRUE
, FALSE
,
1097 err
= zap_lookup_impl(zap
, name
, integer_size
,
1098 num_integers
, buf
, mt
, realname
, rn_len
, ncp
);
1099 zap_unlockdir(zap
, FTAG
);
1104 zap_prefetch_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1110 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1113 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1115 zap_unlockdir(zap
, FTAG
);
1116 return (SET_ERROR(ENOTSUP
));
1121 zap_unlockdir(zap
, FTAG
);
1126 zap_lookup_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1127 int key_numints
, uint64_t integer_size
, uint64_t num_integers
, void *buf
)
1132 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1135 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1137 zap_unlockdir(zap
, FTAG
);
1138 return (SET_ERROR(ENOTSUP
));
1141 err
= fzap_lookup(zn
, integer_size
, num_integers
, buf
,
1144 zap_unlockdir(zap
, FTAG
);
1149 zap_contains(objset_t
*os
, uint64_t zapobj
, const char *name
)
1151 int err
= zap_lookup_norm(os
, zapobj
, name
, 0,
1152 0, NULL
, 0, NULL
, 0, NULL
);
1153 if (err
== EOVERFLOW
|| err
== EINVAL
)
1154 err
= 0; /* found, but skipped reading the value */
1159 zap_length(objset_t
*os
, uint64_t zapobj
, const char *name
,
1160 uint64_t *integer_size
, uint64_t *num_integers
)
1165 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1168 zap_name_t
*zn
= zap_name_alloc_str(zap
, name
, 0);
1170 zap_unlockdir(zap
, FTAG
);
1171 return (SET_ERROR(ENOTSUP
));
1173 if (!zap
->zap_ismicro
) {
1174 err
= fzap_length(zn
, integer_size
, num_integers
);
1176 zfs_btree_index_t idx
;
1177 mzap_ent_t
*mze
= mze_find(zn
, &idx
);
1179 err
= SET_ERROR(ENOENT
);
1188 zap_unlockdir(zap
, FTAG
);
1193 zap_length_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1194 int key_numints
, uint64_t *integer_size
, uint64_t *num_integers
)
1199 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1202 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1204 zap_unlockdir(zap
, FTAG
);
1205 return (SET_ERROR(ENOTSUP
));
1207 err
= fzap_length(zn
, integer_size
, num_integers
);
1209 zap_unlockdir(zap
, FTAG
);
1214 mzap_addent(zap_name_t
*zn
, uint64_t value
)
1216 zap_t
*zap
= zn
->zn_zap
;
1217 uint16_t start
= zap
->zap_m
.zap_alloc_next
;
1219 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
1222 for (int i
= 0; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
1223 mzap_ent_phys_t
*mze
= &zap_m_phys(zap
)->mz_chunk
[i
];
1224 ASSERT(strcmp(zn
->zn_key_orig
, mze
->mze_name
) != 0);
1228 uint32_t cd
= mze_find_unused_cd(zap
, zn
->zn_hash
);
1229 /* given the limited size of the microzap, this can't happen */
1230 ASSERT(cd
< zap_maxcd(zap
));
1233 for (uint16_t i
= start
; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
1234 mzap_ent_phys_t
*mze
= &zap_m_phys(zap
)->mz_chunk
[i
];
1235 if (mze
->mze_name
[0] == 0) {
1236 mze
->mze_value
= value
;
1238 (void) strlcpy(mze
->mze_name
, zn
->zn_key_orig
,
1239 sizeof (mze
->mze_name
));
1240 zap
->zap_m
.zap_num_entries
++;
1241 zap
->zap_m
.zap_alloc_next
= i
+1;
1242 if (zap
->zap_m
.zap_alloc_next
==
1243 zap
->zap_m
.zap_num_chunks
)
1244 zap
->zap_m
.zap_alloc_next
= 0;
1245 mze_insert(zap
, i
, zn
->zn_hash
);
1253 cmn_err(CE_PANIC
, "out of entries!");
1257 zap_add_impl(zap_t
*zap
, const char *key
,
1258 int integer_size
, uint64_t num_integers
,
1259 const void *val
, dmu_tx_t
*tx
, const void *tag
)
1261 const uint64_t *intval
= val
;
1264 zap_name_t
*zn
= zap_name_alloc_str(zap
, key
, 0);
1266 zap_unlockdir(zap
, tag
);
1267 return (SET_ERROR(ENOTSUP
));
1269 if (!zap
->zap_ismicro
) {
1270 err
= fzap_add(zn
, integer_size
, num_integers
, val
, tag
, tx
);
1271 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
1272 } else if (integer_size
!= 8 || num_integers
!= 1 ||
1273 strlen(key
) >= MZAP_NAME_LEN
||
1274 !mze_canfit_fzap_leaf(zn
, zn
->zn_hash
)) {
1275 err
= mzap_upgrade(&zn
->zn_zap
, tag
, tx
, 0);
1277 err
= fzap_add(zn
, integer_size
, num_integers
, val
,
1280 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
1282 zfs_btree_index_t idx
;
1283 if (mze_find(zn
, &idx
) != NULL
) {
1284 err
= SET_ERROR(EEXIST
);
1286 mzap_addent(zn
, *intval
);
1289 ASSERT(zap
== zn
->zn_zap
);
1291 if (zap
!= NULL
) /* may be NULL if fzap_add() failed */
1292 zap_unlockdir(zap
, tag
);
1297 zap_add(objset_t
*os
, uint64_t zapobj
, const char *key
,
1298 int integer_size
, uint64_t num_integers
,
1299 const void *val
, dmu_tx_t
*tx
)
1304 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1307 err
= zap_add_impl(zap
, key
, integer_size
, num_integers
, val
, tx
, FTAG
);
1308 /* zap_add_impl() calls zap_unlockdir() */
1313 zap_add_by_dnode(dnode_t
*dn
, const char *key
,
1314 int integer_size
, uint64_t num_integers
,
1315 const void *val
, dmu_tx_t
*tx
)
1320 err
= zap_lockdir_by_dnode(dn
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1323 err
= zap_add_impl(zap
, key
, integer_size
, num_integers
, val
, tx
, FTAG
);
1324 /* zap_add_impl() calls zap_unlockdir() */
1329 zap_add_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1330 int key_numints
, int integer_size
, uint64_t num_integers
,
1331 const void *val
, dmu_tx_t
*tx
)
1336 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1339 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1341 zap_unlockdir(zap
, FTAG
);
1342 return (SET_ERROR(ENOTSUP
));
1344 err
= fzap_add(zn
, integer_size
, num_integers
, val
, FTAG
, tx
);
1345 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
1347 if (zap
!= NULL
) /* may be NULL if fzap_add() failed */
1348 zap_unlockdir(zap
, FTAG
);
1353 zap_update(objset_t
*os
, uint64_t zapobj
, const char *name
,
1354 int integer_size
, uint64_t num_integers
, const void *val
, dmu_tx_t
*tx
)
1357 const uint64_t *intval
= val
;
1360 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1363 zap_name_t
*zn
= zap_name_alloc_str(zap
, name
, 0);
1365 zap_unlockdir(zap
, FTAG
);
1366 return (SET_ERROR(ENOTSUP
));
1368 if (!zap
->zap_ismicro
) {
1369 err
= fzap_update(zn
, integer_size
, num_integers
, val
,
1371 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
1372 } else if (integer_size
!= 8 || num_integers
!= 1 ||
1373 strlen(name
) >= MZAP_NAME_LEN
) {
1374 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
1375 (u_longlong_t
)zapobj
, integer_size
,
1376 (u_longlong_t
)num_integers
, name
);
1377 err
= mzap_upgrade(&zn
->zn_zap
, FTAG
, tx
, 0);
1379 err
= fzap_update(zn
, integer_size
, num_integers
,
1382 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
1384 zfs_btree_index_t idx
;
1385 mzap_ent_t
*mze
= mze_find(zn
, &idx
);
1387 MZE_PHYS(zap
, mze
)->mze_value
= *intval
;
1389 mzap_addent(zn
, *intval
);
1392 ASSERT(zap
== zn
->zn_zap
);
1394 if (zap
!= NULL
) /* may be NULL if fzap_upgrade() failed */
1395 zap_unlockdir(zap
, FTAG
);
1400 zap_update_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1402 int integer_size
, uint64_t num_integers
, const void *val
, dmu_tx_t
*tx
)
1407 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, FTAG
, &zap
);
1410 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1412 zap_unlockdir(zap
, FTAG
);
1413 return (SET_ERROR(ENOTSUP
));
1415 err
= fzap_update(zn
, integer_size
, num_integers
, val
, FTAG
, tx
);
1416 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
1418 if (zap
!= NULL
) /* may be NULL if fzap_upgrade() failed */
1419 zap_unlockdir(zap
, FTAG
);
1424 zap_remove(objset_t
*os
, uint64_t zapobj
, const char *name
, dmu_tx_t
*tx
)
1426 return (zap_remove_norm(os
, zapobj
, name
, 0, tx
));
1430 zap_remove_impl(zap_t
*zap
, const char *name
,
1431 matchtype_t mt
, dmu_tx_t
*tx
)
1435 zap_name_t
*zn
= zap_name_alloc_str(zap
, name
, mt
);
1437 return (SET_ERROR(ENOTSUP
));
1438 if (!zap
->zap_ismicro
) {
1439 err
= fzap_remove(zn
, tx
);
1441 zfs_btree_index_t idx
;
1442 mzap_ent_t
*mze
= mze_find(zn
, &idx
);
1444 err
= SET_ERROR(ENOENT
);
1446 zap
->zap_m
.zap_num_entries
--;
1447 memset(MZE_PHYS(zap
, mze
), 0, sizeof (mzap_ent_phys_t
));
1448 zfs_btree_remove_idx(&zap
->zap_m
.zap_tree
, &idx
);
1456 zap_remove_norm(objset_t
*os
, uint64_t zapobj
, const char *name
,
1457 matchtype_t mt
, dmu_tx_t
*tx
)
1462 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, FALSE
, FTAG
, &zap
);
1465 err
= zap_remove_impl(zap
, name
, mt
, tx
);
1466 zap_unlockdir(zap
, FTAG
);
1471 zap_remove_by_dnode(dnode_t
*dn
, const char *name
, dmu_tx_t
*tx
)
1476 err
= zap_lockdir_by_dnode(dn
, tx
, RW_WRITER
, TRUE
, FALSE
, FTAG
, &zap
);
1479 err
= zap_remove_impl(zap
, name
, 0, tx
);
1480 zap_unlockdir(zap
, FTAG
);
1485 zap_remove_uint64(objset_t
*os
, uint64_t zapobj
, const uint64_t *key
,
1486 int key_numints
, dmu_tx_t
*tx
)
1491 zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, FALSE
, FTAG
, &zap
);
1494 zap_name_t
*zn
= zap_name_alloc_uint64(zap
, key
, key_numints
);
1496 zap_unlockdir(zap
, FTAG
);
1497 return (SET_ERROR(ENOTSUP
));
1499 err
= fzap_remove(zn
, tx
);
1501 zap_unlockdir(zap
, FTAG
);
1506 * Routines for iterating over the attributes.
1510 zap_cursor_init_impl(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
,
1511 uint64_t serialized
, boolean_t prefetch
)
1516 zc
->zc_zapobj
= zapobj
;
1517 zc
->zc_serialized
= serialized
;
1520 zc
->zc_prefetch
= prefetch
;
1523 zap_cursor_init_serialized(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
,
1524 uint64_t serialized
)
1526 zap_cursor_init_impl(zc
, os
, zapobj
, serialized
, B_TRUE
);
1530 * Initialize a cursor at the beginning of the ZAP object. The entire
1531 * ZAP object will be prefetched.
1534 zap_cursor_init(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
)
1536 zap_cursor_init_impl(zc
, os
, zapobj
, 0, B_TRUE
);
1540 * Initialize a cursor at the beginning, but request that we not prefetch
1541 * the entire ZAP object.
1544 zap_cursor_init_noprefetch(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
)
1546 zap_cursor_init_impl(zc
, os
, zapobj
, 0, B_FALSE
);
1550 zap_cursor_fini(zap_cursor_t
*zc
)
1553 rw_enter(&zc
->zc_zap
->zap_rwlock
, RW_READER
);
1554 zap_unlockdir(zc
->zc_zap
, NULL
);
1558 rw_enter(&zc
->zc_leaf
->l_rwlock
, RW_READER
);
1559 zap_put_leaf(zc
->zc_leaf
);
1562 zc
->zc_objset
= NULL
;
1566 zap_cursor_serialize(zap_cursor_t
*zc
)
1568 if (zc
->zc_hash
== -1ULL)
1570 if (zc
->zc_zap
== NULL
)
1571 return (zc
->zc_serialized
);
1572 ASSERT((zc
->zc_hash
& zap_maxcd(zc
->zc_zap
)) == 0);
1573 ASSERT(zc
->zc_cd
< zap_maxcd(zc
->zc_zap
));
1576 * We want to keep the high 32 bits of the cursor zero if we can, so
1577 * that 32-bit programs can access this. So usually use a small
1578 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
1581 * [ collision differentiator | zap_hashbits()-bit hash value ]
1583 return ((zc
->zc_hash
>> (64 - zap_hashbits(zc
->zc_zap
))) |
1584 ((uint64_t)zc
->zc_cd
<< zap_hashbits(zc
->zc_zap
)));
1588 zap_cursor_retrieve(zap_cursor_t
*zc
, zap_attribute_t
*za
)
1592 if (zc
->zc_hash
== -1ULL)
1593 return (SET_ERROR(ENOENT
));
1595 if (zc
->zc_zap
== NULL
) {
1597 err
= zap_lockdir(zc
->zc_objset
, zc
->zc_zapobj
, NULL
,
1598 RW_READER
, TRUE
, FALSE
, NULL
, &zc
->zc_zap
);
1603 * To support zap_cursor_init_serialized, advance, retrieve,
1604 * we must add to the existing zc_cd, which may already
1605 * be 1 due to the zap_cursor_advance.
1607 ASSERT(zc
->zc_hash
== 0);
1608 hb
= zap_hashbits(zc
->zc_zap
);
1609 zc
->zc_hash
= zc
->zc_serialized
<< (64 - hb
);
1610 zc
->zc_cd
+= zc
->zc_serialized
>> hb
;
1611 if (zc
->zc_cd
>= zap_maxcd(zc
->zc_zap
)) /* corrupt serialized */
1614 rw_enter(&zc
->zc_zap
->zap_rwlock
, RW_READER
);
1616 if (!zc
->zc_zap
->zap_ismicro
) {
1617 err
= fzap_cursor_retrieve(zc
->zc_zap
, zc
, za
);
1619 zfs_btree_index_t idx
;
1620 mzap_ent_t mze_tofind
;
1622 mze_tofind
.mze_hash
= zc
->zc_hash
>> 32;
1623 mze_tofind
.mze_cd
= zc
->zc_cd
;
1625 mzap_ent_t
*mze
= zfs_btree_find(&zc
->zc_zap
->zap_m
.zap_tree
,
1628 mze
= zfs_btree_next(&zc
->zc_zap
->zap_m
.zap_tree
,
1632 mzap_ent_phys_t
*mzep
= MZE_PHYS(zc
->zc_zap
, mze
);
1633 ASSERT3U(mze
->mze_cd
, ==, mzep
->mze_cd
);
1634 za
->za_normalization_conflict
=
1635 mzap_normalization_conflict(zc
->zc_zap
, NULL
,
1637 za
->za_integer_length
= 8;
1638 za
->za_num_integers
= 1;
1639 za
->za_first_integer
= mzep
->mze_value
;
1640 (void) strlcpy(za
->za_name
, mzep
->mze_name
,
1641 sizeof (za
->za_name
));
1642 zc
->zc_hash
= (uint64_t)mze
->mze_hash
<< 32;
1643 zc
->zc_cd
= mze
->mze_cd
;
1646 zc
->zc_hash
= -1ULL;
1647 err
= SET_ERROR(ENOENT
);
1650 rw_exit(&zc
->zc_zap
->zap_rwlock
);
1655 zap_cursor_advance(zap_cursor_t
*zc
)
1657 if (zc
->zc_hash
== -1ULL)
1663 zap_get_stats(objset_t
*os
, uint64_t zapobj
, zap_stats_t
*zs
)
1668 zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, FTAG
, &zap
);
1672 memset(zs
, 0, sizeof (zap_stats_t
));
1674 if (zap
->zap_ismicro
) {
1675 zs
->zs_blocksize
= zap
->zap_dbuf
->db_size
;
1676 zs
->zs_num_entries
= zap
->zap_m
.zap_num_entries
;
1677 zs
->zs_num_blocks
= 1;
1679 fzap_get_stats(zap
, zs
);
1681 zap_unlockdir(zap
, FTAG
);
1685 #if defined(_KERNEL)
1686 EXPORT_SYMBOL(zap_create
);
1687 EXPORT_SYMBOL(zap_create_dnsize
);
1688 EXPORT_SYMBOL(zap_create_norm
);
1689 EXPORT_SYMBOL(zap_create_norm_dnsize
);
1690 EXPORT_SYMBOL(zap_create_flags
);
1691 EXPORT_SYMBOL(zap_create_flags_dnsize
);
1692 EXPORT_SYMBOL(zap_create_claim
);
1693 EXPORT_SYMBOL(zap_create_claim_norm
);
1694 EXPORT_SYMBOL(zap_create_claim_norm_dnsize
);
1695 EXPORT_SYMBOL(zap_create_hold
);
1696 EXPORT_SYMBOL(zap_destroy
);
1697 EXPORT_SYMBOL(zap_lookup
);
1698 EXPORT_SYMBOL(zap_lookup_by_dnode
);
1699 EXPORT_SYMBOL(zap_lookup_norm
);
1700 EXPORT_SYMBOL(zap_lookup_uint64
);
1701 EXPORT_SYMBOL(zap_contains
);
1702 EXPORT_SYMBOL(zap_prefetch
);
1703 EXPORT_SYMBOL(zap_prefetch_uint64
);
1704 EXPORT_SYMBOL(zap_add
);
1705 EXPORT_SYMBOL(zap_add_by_dnode
);
1706 EXPORT_SYMBOL(zap_add_uint64
);
1707 EXPORT_SYMBOL(zap_update
);
1708 EXPORT_SYMBOL(zap_update_uint64
);
1709 EXPORT_SYMBOL(zap_length
);
1710 EXPORT_SYMBOL(zap_length_uint64
);
1711 EXPORT_SYMBOL(zap_remove
);
1712 EXPORT_SYMBOL(zap_remove_by_dnode
);
1713 EXPORT_SYMBOL(zap_remove_norm
);
1714 EXPORT_SYMBOL(zap_remove_uint64
);
1715 EXPORT_SYMBOL(zap_count
);
1716 EXPORT_SYMBOL(zap_value_search
);
1717 EXPORT_SYMBOL(zap_join
);
1718 EXPORT_SYMBOL(zap_join_increment
);
1719 EXPORT_SYMBOL(zap_add_int
);
1720 EXPORT_SYMBOL(zap_remove_int
);
1721 EXPORT_SYMBOL(zap_lookup_int
);
1722 EXPORT_SYMBOL(zap_increment_int
);
1723 EXPORT_SYMBOL(zap_add_int_key
);
1724 EXPORT_SYMBOL(zap_lookup_int_key
);
1725 EXPORT_SYMBOL(zap_increment
);
1726 EXPORT_SYMBOL(zap_cursor_init
);
1727 EXPORT_SYMBOL(zap_cursor_fini
);
1728 EXPORT_SYMBOL(zap_cursor_retrieve
);
1729 EXPORT_SYMBOL(zap_cursor_advance
);
1730 EXPORT_SYMBOL(zap_cursor_serialize
);
1731 EXPORT_SYMBOL(zap_cursor_init_serialized
);
1732 EXPORT_SYMBOL(zap_get_stats
);
1735 ZFS_MODULE_PARAM(zfs
, , zap_micro_max_size
, INT
, ZMOD_RW
,
1736 "Maximum micro ZAP size, before converting to a fat ZAP, in bytes");