4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
30 #ifndef _SYS_RANGE_TREE_H
31 #define _SYS_RANGE_TREE_H
33 #include <sys/btree.h>
40 #define RANGE_TREE_HISTOGRAM_SIZE 64
42 typedef struct range_tree_ops range_tree_ops_t
;
44 typedef enum range_seg_type
{
52 * Note: the range_tree may not be accessed concurrently; consumers
53 * must provide external locking if required.
55 typedef struct range_tree
{
56 zfs_btree_t rt_root
; /* offset-ordered segment b-tree */
57 uint64_t rt_space
; /* sum of all segments in the map */
58 range_seg_type_t rt_type
; /* type of range_seg_t in use */
60 * All data that is stored in the range tree must have a start higher
61 * than or equal to rt_start, and all sizes and offsets must be
62 * multiples of 1 << rt_shift.
66 range_tree_ops_t
*rt_ops
;
68 /* rt_btree_compare should only be set if rt_arg is a b-tree */
70 int (*rt_btree_compare
)(const void *, const void *);
72 uint64_t rt_gap
; /* allowable inter-segment gap */
75 * The rt_histogram maintains a histogram of ranges. Each bucket,
76 * rt_histogram[i], contains the number of ranges whose size is:
77 * 2^i <= size of range in bytes < 2^(i+1)
79 uint64_t rt_histogram
[RANGE_TREE_HISTOGRAM_SIZE
];
82 typedef struct range_seg32
{
83 uint32_t rs_start
; /* starting offset of this segment */
84 uint32_t rs_end
; /* ending offset (non-inclusive) */
88 * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may
89 * require 64-bit integers for ranges.
91 typedef struct range_seg64
{
92 uint64_t rs_start
; /* starting offset of this segment */
93 uint64_t rs_end
; /* ending offset (non-inclusive) */
96 typedef struct range_seg_gap
{
97 uint64_t rs_start
; /* starting offset of this segment */
98 uint64_t rs_end
; /* ending offset (non-inclusive) */
99 uint64_t rs_fill
; /* actual fill if gap mode is on */
103 * This type needs to be the largest of the range segs, since it will be stack
104 * allocated and then cast the actual type to do tree operations.
106 typedef range_seg_gap_t range_seg_max_t
;
109 * This is just for clarity of code purposes, so we can make it clear that a
110 * pointer is to a range seg of some type; when we need to do the actual math,
111 * we'll figure out the real type.
113 typedef void range_seg_t
;
115 struct range_tree_ops
{
116 void (*rtop_create
)(range_tree_t
*rt
, void *arg
);
117 void (*rtop_destroy
)(range_tree_t
*rt
, void *arg
);
118 void (*rtop_add
)(range_tree_t
*rt
, void *rs
, void *arg
);
119 void (*rtop_remove
)(range_tree_t
*rt
, void *rs
, void *arg
);
120 void (*rtop_vacate
)(range_tree_t
*rt
, void *arg
);
123 static inline uint64_t
124 rs_get_start_raw(const range_seg_t
*rs
, const range_tree_t
*rt
)
126 ASSERT3U(rt
->rt_type
, <=, RANGE_SEG_NUM_TYPES
);
127 switch (rt
->rt_type
) {
129 return (((const range_seg32_t
*)rs
)->rs_start
);
131 return (((const range_seg64_t
*)rs
)->rs_start
);
133 return (((const range_seg_gap_t
*)rs
)->rs_start
);
140 static inline uint64_t
141 rs_get_end_raw(const range_seg_t
*rs
, const range_tree_t
*rt
)
143 ASSERT3U(rt
->rt_type
, <=, RANGE_SEG_NUM_TYPES
);
144 switch (rt
->rt_type
) {
146 return (((const range_seg32_t
*)rs
)->rs_end
);
148 return (((const range_seg64_t
*)rs
)->rs_end
);
150 return (((const range_seg_gap_t
*)rs
)->rs_end
);
157 static inline uint64_t
158 rs_get_fill_raw(const range_seg_t
*rs
, const range_tree_t
*rt
)
160 ASSERT3U(rt
->rt_type
, <=, RANGE_SEG_NUM_TYPES
);
161 switch (rt
->rt_type
) {
163 const range_seg32_t
*r32
= (const range_seg32_t
*)rs
;
164 return (r32
->rs_end
- r32
->rs_start
);
167 const range_seg64_t
*r64
= (const range_seg64_t
*)rs
;
168 return (r64
->rs_end
- r64
->rs_start
);
171 return (((const range_seg_gap_t
*)rs
)->rs_fill
);
179 static inline uint64_t
180 rs_get_start(const range_seg_t
*rs
, const range_tree_t
*rt
)
182 return ((rs_get_start_raw(rs
, rt
) << rt
->rt_shift
) + rt
->rt_start
);
185 static inline uint64_t
186 rs_get_end(const range_seg_t
*rs
, const range_tree_t
*rt
)
188 return ((rs_get_end_raw(rs
, rt
) << rt
->rt_shift
) + rt
->rt_start
);
191 static inline uint64_t
192 rs_get_fill(const range_seg_t
*rs
, const range_tree_t
*rt
)
194 return (rs_get_fill_raw(rs
, rt
) << rt
->rt_shift
);
198 rs_set_start_raw(range_seg_t
*rs
, range_tree_t
*rt
, uint64_t start
)
200 ASSERT3U(rt
->rt_type
, <=, RANGE_SEG_NUM_TYPES
);
201 switch (rt
->rt_type
) {
203 ASSERT3U(start
, <=, UINT32_MAX
);
204 ((range_seg32_t
*)rs
)->rs_start
= (uint32_t)start
;
207 ((range_seg64_t
*)rs
)->rs_start
= start
;
210 ((range_seg_gap_t
*)rs
)->rs_start
= start
;
218 rs_set_end_raw(range_seg_t
*rs
, range_tree_t
*rt
, uint64_t end
)
220 ASSERT3U(rt
->rt_type
, <=, RANGE_SEG_NUM_TYPES
);
221 switch (rt
->rt_type
) {
223 ASSERT3U(end
, <=, UINT32_MAX
);
224 ((range_seg32_t
*)rs
)->rs_end
= (uint32_t)end
;
227 ((range_seg64_t
*)rs
)->rs_end
= end
;
230 ((range_seg_gap_t
*)rs
)->rs_end
= end
;
238 rs_set_fill_raw(range_seg_t
*rs
, range_tree_t
*rt
, uint64_t fill
)
240 ASSERT3U(rt
->rt_type
, <=, RANGE_SEG_NUM_TYPES
);
241 switch (rt
->rt_type
) {
245 ASSERT3U(fill
, ==, rs_get_end_raw(rs
, rt
) - rs_get_start_raw(rs
,
249 ((range_seg_gap_t
*)rs
)->rs_fill
= fill
;
257 rs_set_start(range_seg_t
*rs
, range_tree_t
*rt
, uint64_t start
)
259 ASSERT3U(start
, >=, rt
->rt_start
);
260 ASSERT(IS_P2ALIGNED(start
, 1ULL << rt
->rt_shift
));
261 rs_set_start_raw(rs
, rt
, (start
- rt
->rt_start
) >> rt
->rt_shift
);
265 rs_set_end(range_seg_t
*rs
, range_tree_t
*rt
, uint64_t end
)
267 ASSERT3U(end
, >=, rt
->rt_start
);
268 ASSERT(IS_P2ALIGNED(end
, 1ULL << rt
->rt_shift
));
269 rs_set_end_raw(rs
, rt
, (end
- rt
->rt_start
) >> rt
->rt_shift
);
273 rs_set_fill(range_seg_t
*rs
, range_tree_t
*rt
, uint64_t fill
)
275 ASSERT(IS_P2ALIGNED(fill
, 1ULL << rt
->rt_shift
));
276 rs_set_fill_raw(rs
, rt
, fill
>> rt
->rt_shift
);
279 typedef void range_tree_func_t(void *arg
, uint64_t start
, uint64_t size
);
281 range_tree_t
*range_tree_create_impl(range_tree_ops_t
*ops
,
282 range_seg_type_t type
, void *arg
, uint64_t start
, uint64_t shift
,
283 int (*zfs_btree_compare
) (const void *, const void *), uint64_t gap
);
284 range_tree_t
*range_tree_create(range_tree_ops_t
*ops
, range_seg_type_t type
,
285 void *arg
, uint64_t start
, uint64_t shift
);
286 void range_tree_destroy(range_tree_t
*rt
);
287 boolean_t
range_tree_contains(range_tree_t
*rt
, uint64_t start
, uint64_t size
);
288 range_seg_t
*range_tree_find(range_tree_t
*rt
, uint64_t start
, uint64_t size
);
289 boolean_t
range_tree_find_in(range_tree_t
*rt
, uint64_t start
, uint64_t size
,
290 uint64_t *ostart
, uint64_t *osize
);
291 void range_tree_verify_not_present(range_tree_t
*rt
,
292 uint64_t start
, uint64_t size
);
293 void range_tree_resize_segment(range_tree_t
*rt
, range_seg_t
*rs
,
294 uint64_t newstart
, uint64_t newsize
);
295 uint64_t range_tree_space(range_tree_t
*rt
);
296 uint64_t range_tree_numsegs(range_tree_t
*rt
);
297 boolean_t
range_tree_is_empty(range_tree_t
*rt
);
298 void range_tree_swap(range_tree_t
**rtsrc
, range_tree_t
**rtdst
);
299 void range_tree_stat_verify(range_tree_t
*rt
);
300 uint64_t range_tree_min(range_tree_t
*rt
);
301 uint64_t range_tree_max(range_tree_t
*rt
);
302 uint64_t range_tree_span(range_tree_t
*rt
);
304 void range_tree_add(void *arg
, uint64_t start
, uint64_t size
);
305 void range_tree_remove(void *arg
, uint64_t start
, uint64_t size
);
306 void range_tree_remove_fill(range_tree_t
*rt
, uint64_t start
, uint64_t size
);
307 void range_tree_adjust_fill(range_tree_t
*rt
, range_seg_t
*rs
, int64_t delta
);
308 void range_tree_clear(range_tree_t
*rt
, uint64_t start
, uint64_t size
);
310 void range_tree_vacate(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
);
311 void range_tree_walk(range_tree_t
*rt
, range_tree_func_t
*func
, void *arg
);
312 range_seg_t
*range_tree_first(range_tree_t
*rt
);
314 void range_tree_remove_xor_add_segment(uint64_t start
, uint64_t end
,
315 range_tree_t
*removefrom
, range_tree_t
*addto
);
316 void range_tree_remove_xor_add(range_tree_t
*rt
, range_tree_t
*removefrom
,
317 range_tree_t
*addto
);
319 void rt_btree_create(range_tree_t
*rt
, void *arg
);
320 void rt_btree_destroy(range_tree_t
*rt
, void *arg
);
321 void rt_btree_add(range_tree_t
*rt
, range_seg_t
*rs
, void *arg
);
322 void rt_btree_remove(range_tree_t
*rt
, range_seg_t
*rs
, void *arg
);
323 void rt_btree_vacate(range_tree_t
*rt
, void *arg
);
324 extern range_tree_ops_t rt_btree_ops
;
330 #endif /* _SYS_RANGE_TREE_H */