4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/bplist.h>
27 #include <sys/zfs_context.h>
30 bplist_hold(bplist_t
*bpl
)
32 ASSERT(MUTEX_HELD(&bpl
->bpl_lock
));
33 if (bpl
->bpl_dbuf
== NULL
) {
34 int err
= dmu_bonus_hold(bpl
->bpl_mos
,
35 bpl
->bpl_object
, bpl
, &bpl
->bpl_dbuf
);
38 bpl
->bpl_phys
= bpl
->bpl_dbuf
->db_data
;
44 bplist_create(objset_t
*mos
, int blocksize
, dmu_tx_t
*tx
)
48 size
= spa_version(dmu_objset_spa(mos
)) < SPA_VERSION_BPLIST_ACCOUNT
?
49 BPLIST_SIZE_V0
: sizeof (bplist_phys_t
);
51 return (dmu_object_alloc(mos
, DMU_OT_BPLIST
, blocksize
,
52 DMU_OT_BPLIST_HDR
, size
, tx
));
56 bplist_destroy(objset_t
*mos
, uint64_t object
, dmu_tx_t
*tx
)
58 VERIFY(dmu_object_free(mos
, object
, tx
) == 0);
62 bplist_open(bplist_t
*bpl
, objset_t
*mos
, uint64_t object
)
64 dmu_object_info_t doi
;
67 err
= dmu_object_info(mos
, object
, &doi
);
71 mutex_enter(&bpl
->bpl_lock
);
73 ASSERT(bpl
->bpl_dbuf
== NULL
);
74 ASSERT(bpl
->bpl_phys
== NULL
);
75 ASSERT(bpl
->bpl_cached_dbuf
== NULL
);
76 ASSERT(bpl
->bpl_queue
== NULL
);
78 ASSERT3U(doi
.doi_type
, ==, DMU_OT_BPLIST
);
79 ASSERT3U(doi
.doi_bonus_type
, ==, DMU_OT_BPLIST_HDR
);
82 bpl
->bpl_object
= object
;
83 bpl
->bpl_blockshift
= highbit(doi
.doi_data_block_size
- 1);
84 bpl
->bpl_bpshift
= bpl
->bpl_blockshift
- SPA_BLKPTRSHIFT
;
85 bpl
->bpl_havecomp
= (doi
.doi_bonus_size
== sizeof (bplist_phys_t
));
87 mutex_exit(&bpl
->bpl_lock
);
92 bplist_close(bplist_t
*bpl
)
94 mutex_enter(&bpl
->bpl_lock
);
96 ASSERT(bpl
->bpl_queue
== NULL
);
98 if (bpl
->bpl_cached_dbuf
) {
99 dmu_buf_rele(bpl
->bpl_cached_dbuf
, bpl
);
100 bpl
->bpl_cached_dbuf
= NULL
;
103 dmu_buf_rele(bpl
->bpl_dbuf
, bpl
);
104 bpl
->bpl_dbuf
= NULL
;
105 bpl
->bpl_phys
= NULL
;
108 mutex_exit(&bpl
->bpl_lock
);
112 bplist_empty(bplist_t
*bpl
)
116 if (bpl
->bpl_object
== 0)
119 mutex_enter(&bpl
->bpl_lock
);
120 VERIFY(0 == bplist_hold(bpl
)); /* XXX */
121 rv
= (bpl
->bpl_phys
->bpl_entries
== 0);
122 mutex_exit(&bpl
->bpl_lock
);
128 bplist_cache(bplist_t
*bpl
, uint64_t blkid
)
132 if (bpl
->bpl_cached_dbuf
== NULL
||
133 bpl
->bpl_cached_dbuf
->db_offset
!= (blkid
<< bpl
->bpl_blockshift
)) {
134 if (bpl
->bpl_cached_dbuf
!= NULL
)
135 dmu_buf_rele(bpl
->bpl_cached_dbuf
, bpl
);
136 err
= dmu_buf_hold(bpl
->bpl_mos
,
137 bpl
->bpl_object
, blkid
<< bpl
->bpl_blockshift
,
138 bpl
, &bpl
->bpl_cached_dbuf
);
139 ASSERT(err
|| bpl
->bpl_cached_dbuf
->db_size
==
140 1ULL << bpl
->bpl_blockshift
);
146 bplist_iterate(bplist_t
*bpl
, uint64_t *itorp
, blkptr_t
*bp
)
152 mutex_enter(&bpl
->bpl_lock
);
154 err
= bplist_hold(bpl
);
156 mutex_exit(&bpl
->bpl_lock
);
160 if (*itorp
>= bpl
->bpl_phys
->bpl_entries
) {
161 mutex_exit(&bpl
->bpl_lock
);
165 blk
= *itorp
>> bpl
->bpl_bpshift
;
166 off
= P2PHASE(*itorp
, 1ULL << bpl
->bpl_bpshift
);
168 err
= bplist_cache(bpl
, blk
);
170 mutex_exit(&bpl
->bpl_lock
);
174 bparray
= bpl
->bpl_cached_dbuf
->db_data
;
177 mutex_exit(&bpl
->bpl_lock
);
182 bplist_enqueue(bplist_t
*bpl
, const blkptr_t
*bp
, dmu_tx_t
*tx
)
188 ASSERT(!BP_IS_HOLE(bp
));
189 mutex_enter(&bpl
->bpl_lock
);
190 err
= bplist_hold(bpl
);
194 blk
= bpl
->bpl_phys
->bpl_entries
>> bpl
->bpl_bpshift
;
195 off
= P2PHASE(bpl
->bpl_phys
->bpl_entries
, 1ULL << bpl
->bpl_bpshift
);
197 err
= bplist_cache(bpl
, blk
);
199 mutex_exit(&bpl
->bpl_lock
);
203 dmu_buf_will_dirty(bpl
->bpl_cached_dbuf
, tx
);
204 bparray
= bpl
->bpl_cached_dbuf
->db_data
;
207 /* We never need the fill count. */
208 bparray
[off
].blk_fill
= 0;
210 /* The bplist will compress better if we can leave off the checksum */
211 bzero(&bparray
[off
].blk_cksum
, sizeof (bparray
[off
].blk_cksum
));
213 dmu_buf_will_dirty(bpl
->bpl_dbuf
, tx
);
214 bpl
->bpl_phys
->bpl_entries
++;
215 bpl
->bpl_phys
->bpl_bytes
+=
216 bp_get_dasize(dmu_objset_spa(bpl
->bpl_mos
), bp
);
217 if (bpl
->bpl_havecomp
) {
218 bpl
->bpl_phys
->bpl_comp
+= BP_GET_PSIZE(bp
);
219 bpl
->bpl_phys
->bpl_uncomp
+= BP_GET_UCSIZE(bp
);
221 mutex_exit(&bpl
->bpl_lock
);
227 * Deferred entry; will be written later by bplist_sync().
230 bplist_enqueue_deferred(bplist_t
*bpl
, const blkptr_t
*bp
)
232 bplist_q_t
*bpq
= kmem_alloc(sizeof (*bpq
), KM_SLEEP
);
234 ASSERT(!BP_IS_HOLE(bp
));
235 mutex_enter(&bpl
->bpl_lock
);
237 bpq
->bpq_next
= bpl
->bpl_queue
;
238 bpl
->bpl_queue
= bpq
;
239 mutex_exit(&bpl
->bpl_lock
);
243 bplist_sync(bplist_t
*bpl
, dmu_tx_t
*tx
)
247 mutex_enter(&bpl
->bpl_lock
);
248 while ((bpq
= bpl
->bpl_queue
) != NULL
) {
249 bpl
->bpl_queue
= bpq
->bpq_next
;
250 mutex_exit(&bpl
->bpl_lock
);
251 VERIFY(0 == bplist_enqueue(bpl
, &bpq
->bpq_blk
, tx
));
252 kmem_free(bpq
, sizeof (*bpq
));
253 mutex_enter(&bpl
->bpl_lock
);
255 mutex_exit(&bpl
->bpl_lock
);
259 bplist_vacate(bplist_t
*bpl
, dmu_tx_t
*tx
)
261 mutex_enter(&bpl
->bpl_lock
);
262 ASSERT3P(bpl
->bpl_queue
, ==, NULL
);
263 VERIFY(0 == bplist_hold(bpl
));
264 dmu_buf_will_dirty(bpl
->bpl_dbuf
, tx
);
265 VERIFY(0 == dmu_free_range(bpl
->bpl_mos
,
266 bpl
->bpl_object
, 0, -1ULL, tx
));
267 bpl
->bpl_phys
->bpl_entries
= 0;
268 bpl
->bpl_phys
->bpl_bytes
= 0;
269 if (bpl
->bpl_havecomp
) {
270 bpl
->bpl_phys
->bpl_comp
= 0;
271 bpl
->bpl_phys
->bpl_uncomp
= 0;
273 mutex_exit(&bpl
->bpl_lock
);
277 bplist_space(bplist_t
*bpl
, uint64_t *usedp
, uint64_t *compp
, uint64_t *uncompp
)
281 mutex_enter(&bpl
->bpl_lock
);
283 err
= bplist_hold(bpl
);
285 mutex_exit(&bpl
->bpl_lock
);
289 *usedp
= bpl
->bpl_phys
->bpl_bytes
;
290 if (bpl
->bpl_havecomp
) {
291 *compp
= bpl
->bpl_phys
->bpl_comp
;
292 *uncompp
= bpl
->bpl_phys
->bpl_uncomp
;
294 mutex_exit(&bpl
->bpl_lock
);
296 if (!bpl
->bpl_havecomp
) {
297 uint64_t itor
= 0, comp
= 0, uncomp
= 0;
300 while ((err
= bplist_iterate(bpl
, &itor
, &bp
)) == 0) {
301 comp
+= BP_GET_PSIZE(&bp
);
302 uncomp
+= BP_GET_UCSIZE(&bp
);
314 * Return (in *dasizep) the amount of space on the deadlist which is:
315 * mintxg < blk_birth <= maxtxg
318 bplist_space_birthrange(bplist_t
*bpl
, uint64_t mintxg
, uint64_t maxtxg
,
327 * As an optimization, if they want the whole txg range, just
328 * get bpl_bytes rather than iterating over the bps.
330 if (mintxg
< TXG_INITIAL
&& maxtxg
== UINT64_MAX
) {
331 mutex_enter(&bpl
->bpl_lock
);
332 err
= bplist_hold(bpl
);
334 *dasizep
= bpl
->bpl_phys
->bpl_bytes
;
335 mutex_exit(&bpl
->bpl_lock
);
339 while ((err
= bplist_iterate(bpl
, &itor
, &bp
)) == 0) {
340 if (bp
.blk_birth
> mintxg
&& bp
.blk_birth
<= maxtxg
) {
342 bp_get_dasize(dmu_objset_spa(bpl
->bpl_mos
), &bp
);