4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2018 by Delphix. All rights reserved.
25 * Copyright (c) 2023, Klara Inc.
28 #include <sys/zfs_context.h>
32 #include <sys/ddt_impl.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/zio_compress.h>
37 static unsigned int ddt_zap_default_bs
= 15;
38 static unsigned int ddt_zap_default_ibs
= 15;
40 #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80
41 #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f
43 #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
46 ddt_zap_compress(const void *src
, uchar_t
*dst
, size_t s_len
, size_t d_len
)
48 uchar_t
*version
= dst
++;
49 int cpfunc
= ZIO_COMPRESS_ZLE
;
50 zio_compress_info_t
*ci
= &zio_compress_table
[cpfunc
];
53 ASSERT3U(d_len
, >=, s_len
+ 1); /* no compression plus version byte */
55 /* Call compress function directly to avoid hole detection. */
57 abd_get_from_buf_struct(&sabd
, (void *)src
, s_len
);
58 abd_get_from_buf_struct(&dabd
, dst
, d_len
);
59 c_len
= ci
->ci_compress(&sabd
, &dabd
, s_len
, d_len
- 1, ci
->ci_level
);
64 cpfunc
= ZIO_COMPRESS_OFF
;
65 memcpy(dst
, src
, s_len
);
69 if (ZFS_HOST_BYTEORDER
)
70 *version
|= DDT_ZAP_COMPRESS_BYTEORDER_MASK
;
76 ddt_zap_decompress(uchar_t
*src
, void *dst
, size_t s_len
, size_t d_len
)
78 uchar_t version
= *src
++;
79 int cpfunc
= version
& DDT_ZAP_COMPRESS_FUNCTION_MASK
;
81 if (zio_compress_table
[cpfunc
].ci_decompress
== NULL
) {
82 memcpy(dst
, src
, d_len
);
87 abd_get_from_buf_struct(&sabd
, src
, s_len
);
88 abd_get_from_buf_struct(&dabd
, dst
, d_len
);
89 VERIFY0(zio_decompress_data(cpfunc
, &sabd
, &dabd
, s_len
, d_len
, NULL
));
93 if (((version
& DDT_ZAP_COMPRESS_BYTEORDER_MASK
) != 0) !=
94 (ZFS_HOST_BYTEORDER
!= 0))
95 byteswap_uint64_array(dst
, d_len
);
99 ddt_zap_create(objset_t
*os
, uint64_t *objectp
, dmu_tx_t
*tx
, boolean_t prehash
)
101 zap_flags_t flags
= ZAP_FLAG_HASH64
| ZAP_FLAG_UINT64_KEY
;
104 flags
|= ZAP_FLAG_PRE_HASHED_KEY
;
106 *objectp
= zap_create_flags(os
, 0, flags
, DMU_OT_DDT_ZAP
,
107 ddt_zap_default_bs
, ddt_zap_default_ibs
,
110 return (SET_ERROR(ENOTSUP
));
116 ddt_zap_destroy(objset_t
*os
, uint64_t object
, dmu_tx_t
*tx
)
118 return (zap_destroy(os
, object
, tx
));
122 ddt_zap_lookup(objset_t
*os
, uint64_t object
,
123 const ddt_key_t
*ddk
, void *phys
, size_t psize
)
129 error
= zap_length_uint64(os
, object
, (uint64_t *)ddk
,
130 DDT_KEY_WORDS
, &one
, &csize
);
134 ASSERT3U(one
, ==, 1);
135 ASSERT3U(csize
, <=, psize
+ 1);
137 cbuf
= kmem_alloc(csize
, KM_SLEEP
);
139 error
= zap_lookup_uint64(os
, object
, (uint64_t *)ddk
,
140 DDT_KEY_WORDS
, 1, csize
, cbuf
);
142 ddt_zap_decompress(cbuf
, phys
, csize
, psize
);
144 kmem_free(cbuf
, csize
);
150 ddt_zap_contains(objset_t
*os
, uint64_t object
, const ddt_key_t
*ddk
)
152 return (zap_length_uint64(os
, object
, (uint64_t *)ddk
, DDT_KEY_WORDS
,
157 ddt_zap_prefetch(objset_t
*os
, uint64_t object
, const ddt_key_t
*ddk
)
159 (void) zap_prefetch_uint64(os
, object
, (uint64_t *)ddk
, DDT_KEY_WORDS
);
163 ddt_zap_prefetch_all(objset_t
*os
, uint64_t object
)
165 (void) zap_prefetch_object(os
, object
);
169 ddt_zap_update(objset_t
*os
, uint64_t object
, const ddt_key_t
*ddk
,
170 const void *phys
, size_t psize
, dmu_tx_t
*tx
)
172 const size_t cbuf_size
= psize
+ 1;
174 uchar_t
*cbuf
= kmem_alloc(cbuf_size
, KM_SLEEP
);
176 uint64_t csize
= ddt_zap_compress(phys
, cbuf
, psize
, cbuf_size
);
178 int error
= zap_update_uint64(os
, object
, (uint64_t *)ddk
,
179 DDT_KEY_WORDS
, 1, csize
, cbuf
, tx
);
181 kmem_free(cbuf
, cbuf_size
);
187 ddt_zap_remove(objset_t
*os
, uint64_t object
, const ddt_key_t
*ddk
,
190 return (zap_remove_uint64(os
, object
, (uint64_t *)ddk
,
195 ddt_zap_walk(objset_t
*os
, uint64_t object
, uint64_t *walk
, ddt_key_t
*ddk
,
196 void *phys
, size_t psize
)
202 za
= zap_attribute_alloc();
205 * We don't want to prefetch the entire ZAP object, because
206 * it can be enormous. Also the primary use of DDT iteration
207 * is for scrubbing, in which case we will be issuing many
208 * scrub I/Os for each ZAP block that we read in, so
209 * reading the ZAP is unlikely to be the bottleneck.
211 zap_cursor_init_noprefetch(&zc
, os
, object
);
213 zap_cursor_init_serialized(&zc
, os
, object
, *walk
);
215 if ((error
= zap_cursor_retrieve(&zc
, za
)) == 0) {
216 uint64_t csize
= za
->za_num_integers
;
218 ASSERT3U(za
->za_integer_length
, ==, 1);
219 ASSERT3U(csize
, <=, psize
+ 1);
221 uchar_t
*cbuf
= kmem_alloc(csize
, KM_SLEEP
);
223 error
= zap_lookup_uint64(os
, object
, (uint64_t *)za
->za_name
,
224 DDT_KEY_WORDS
, 1, csize
, cbuf
);
227 ddt_zap_decompress(cbuf
, phys
, csize
, psize
);
228 *ddk
= *(ddt_key_t
*)za
->za_name
;
231 kmem_free(cbuf
, csize
);
233 zap_cursor_advance(&zc
);
234 *walk
= zap_cursor_serialize(&zc
);
236 zap_cursor_fini(&zc
);
237 zap_attribute_free(za
);
242 ddt_zap_count(objset_t
*os
, uint64_t object
, uint64_t *count
)
244 return (zap_count(os
, object
, count
));
247 const ddt_ops_t ddt_zap_ops
= {
254 ddt_zap_prefetch_all
,
262 ZFS_MODULE_PARAM(zfs_dedup
, , ddt_zap_default_bs
, UINT
, ZMOD_RW
,
263 "DDT ZAP leaf blockshift");
264 ZFS_MODULE_PARAM(zfs_dedup
, , ddt_zap_default_ibs
, UINT
, ZMOD_RW
,
265 "DDT ZAP indirect blockshift");