FreeBSD: Fix ZFS so that snapshots under .zfs/snapshot are NFS visible
[zfs.git] / include / sys / range_tree.h
blobd6f60e7952882a95959b848d9376e11dbb3bb0b1
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 2013, 2019 by Delphix. All rights reserved.
30 #ifndef _SYS_RANGE_TREE_H
31 #define _SYS_RANGE_TREE_H
33 #include <sys/btree.h>
34 #include <sys/dmu.h>
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
40 #define RANGE_TREE_HISTOGRAM_SIZE 64
42 typedef struct range_tree_ops range_tree_ops_t;
44 typedef enum range_seg_type {
45 RANGE_SEG32,
46 RANGE_SEG64,
47 RANGE_SEG_GAP,
48 RANGE_SEG_NUM_TYPES,
49 } range_seg_type_t;
52 * Note: the range_tree may not be accessed concurrently; consumers
53 * must provide external locking if required.
55 typedef struct range_tree {
56 zfs_btree_t rt_root; /* offset-ordered segment b-tree */
57 uint64_t rt_space; /* sum of all segments in the map */
58 range_seg_type_t rt_type; /* type of range_seg_t in use */
60 * All data that is stored in the range tree must have a start higher
61 * than or equal to rt_start, and all sizes and offsets must be
62 * multiples of 1 << rt_shift.
64 uint8_t rt_shift;
65 uint64_t rt_start;
66 const range_tree_ops_t *rt_ops;
67 void *rt_arg;
68 uint64_t rt_gap; /* allowable inter-segment gap */
71 * The rt_histogram maintains a histogram of ranges. Each bucket,
72 * rt_histogram[i], contains the number of ranges whose size is:
73 * 2^i <= size of range in bytes < 2^(i+1)
75 uint64_t rt_histogram[RANGE_TREE_HISTOGRAM_SIZE];
76 } range_tree_t;
78 typedef struct range_seg32 {
79 uint32_t rs_start; /* starting offset of this segment */
80 uint32_t rs_end; /* ending offset (non-inclusive) */
81 } range_seg32_t;
84 * Extremely large metaslabs, vdev-wide trees, and dnode-wide trees may
85 * require 64-bit integers for ranges.
87 typedef struct range_seg64 {
88 uint64_t rs_start; /* starting offset of this segment */
89 uint64_t rs_end; /* ending offset (non-inclusive) */
90 } range_seg64_t;
92 typedef struct range_seg_gap {
93 uint64_t rs_start; /* starting offset of this segment */
94 uint64_t rs_end; /* ending offset (non-inclusive) */
95 uint64_t rs_fill; /* actual fill if gap mode is on */
96 } range_seg_gap_t;
99 * This type needs to be the largest of the range segs, since it will be stack
100 * allocated and then cast the actual type to do tree operations.
102 typedef range_seg_gap_t range_seg_max_t;
105 * This is just for clarity of code purposes, so we can make it clear that a
106 * pointer is to a range seg of some type; when we need to do the actual math,
107 * we'll figure out the real type.
109 typedef void range_seg_t;
111 struct range_tree_ops {
112 void (*rtop_create)(range_tree_t *rt, void *arg);
113 void (*rtop_destroy)(range_tree_t *rt, void *arg);
114 void (*rtop_add)(range_tree_t *rt, void *rs, void *arg);
115 void (*rtop_remove)(range_tree_t *rt, void *rs, void *arg);
116 void (*rtop_vacate)(range_tree_t *rt, void *arg);
119 static inline uint64_t
120 rs_get_start_raw(const range_seg_t *rs, const range_tree_t *rt)
122 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
123 switch (rt->rt_type) {
124 case RANGE_SEG32:
125 return (((const range_seg32_t *)rs)->rs_start);
126 case RANGE_SEG64:
127 return (((const range_seg64_t *)rs)->rs_start);
128 case RANGE_SEG_GAP:
129 return (((const range_seg_gap_t *)rs)->rs_start);
130 default:
131 VERIFY(0);
132 return (0);
136 static inline uint64_t
137 rs_get_end_raw(const range_seg_t *rs, const range_tree_t *rt)
139 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
140 switch (rt->rt_type) {
141 case RANGE_SEG32:
142 return (((const range_seg32_t *)rs)->rs_end);
143 case RANGE_SEG64:
144 return (((const range_seg64_t *)rs)->rs_end);
145 case RANGE_SEG_GAP:
146 return (((const range_seg_gap_t *)rs)->rs_end);
147 default:
148 VERIFY(0);
149 return (0);
153 static inline uint64_t
154 rs_get_fill_raw(const range_seg_t *rs, const range_tree_t *rt)
156 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
157 switch (rt->rt_type) {
158 case RANGE_SEG32: {
159 const range_seg32_t *r32 = (const range_seg32_t *)rs;
160 return (r32->rs_end - r32->rs_start);
162 case RANGE_SEG64: {
163 const range_seg64_t *r64 = (const range_seg64_t *)rs;
164 return (r64->rs_end - r64->rs_start);
166 case RANGE_SEG_GAP:
167 return (((const range_seg_gap_t *)rs)->rs_fill);
168 default:
169 VERIFY(0);
170 return (0);
175 static inline uint64_t
176 rs_get_start(const range_seg_t *rs, const range_tree_t *rt)
178 return ((rs_get_start_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
181 static inline uint64_t
182 rs_get_end(const range_seg_t *rs, const range_tree_t *rt)
184 return ((rs_get_end_raw(rs, rt) << rt->rt_shift) + rt->rt_start);
187 static inline uint64_t
188 rs_get_fill(const range_seg_t *rs, const range_tree_t *rt)
190 return (rs_get_fill_raw(rs, rt) << rt->rt_shift);
193 static inline void
194 rs_set_start_raw(range_seg_t *rs, range_tree_t *rt, uint64_t start)
196 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
197 switch (rt->rt_type) {
198 case RANGE_SEG32:
199 ASSERT3U(start, <=, UINT32_MAX);
200 ((range_seg32_t *)rs)->rs_start = (uint32_t)start;
201 break;
202 case RANGE_SEG64:
203 ((range_seg64_t *)rs)->rs_start = start;
204 break;
205 case RANGE_SEG_GAP:
206 ((range_seg_gap_t *)rs)->rs_start = start;
207 break;
208 default:
209 VERIFY(0);
213 static inline void
214 rs_set_end_raw(range_seg_t *rs, range_tree_t *rt, uint64_t end)
216 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
217 switch (rt->rt_type) {
218 case RANGE_SEG32:
219 ASSERT3U(end, <=, UINT32_MAX);
220 ((range_seg32_t *)rs)->rs_end = (uint32_t)end;
221 break;
222 case RANGE_SEG64:
223 ((range_seg64_t *)rs)->rs_end = end;
224 break;
225 case RANGE_SEG_GAP:
226 ((range_seg_gap_t *)rs)->rs_end = end;
227 break;
228 default:
229 VERIFY(0);
233 static inline void
234 rs_set_fill_raw(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
236 ASSERT3U(rt->rt_type, <=, RANGE_SEG_NUM_TYPES);
237 switch (rt->rt_type) {
238 case RANGE_SEG32:
239 /* fall through */
240 case RANGE_SEG64:
241 ASSERT3U(fill, ==, rs_get_end_raw(rs, rt) - rs_get_start_raw(rs,
242 rt));
243 break;
244 case RANGE_SEG_GAP:
245 ((range_seg_gap_t *)rs)->rs_fill = fill;
246 break;
247 default:
248 VERIFY(0);
252 static inline void
253 rs_set_start(range_seg_t *rs, range_tree_t *rt, uint64_t start)
255 ASSERT3U(start, >=, rt->rt_start);
256 ASSERT(IS_P2ALIGNED(start, 1ULL << rt->rt_shift));
257 rs_set_start_raw(rs, rt, (start - rt->rt_start) >> rt->rt_shift);
260 static inline void
261 rs_set_end(range_seg_t *rs, range_tree_t *rt, uint64_t end)
263 ASSERT3U(end, >=, rt->rt_start);
264 ASSERT(IS_P2ALIGNED(end, 1ULL << rt->rt_shift));
265 rs_set_end_raw(rs, rt, (end - rt->rt_start) >> rt->rt_shift);
268 static inline void
269 rs_set_fill(range_seg_t *rs, range_tree_t *rt, uint64_t fill)
271 ASSERT(IS_P2ALIGNED(fill, 1ULL << rt->rt_shift));
272 rs_set_fill_raw(rs, rt, fill >> rt->rt_shift);
275 typedef void range_tree_func_t(void *arg, uint64_t start, uint64_t size);
277 range_tree_t *range_tree_create_gap(const range_tree_ops_t *ops,
278 range_seg_type_t type, void *arg, uint64_t start, uint64_t shift,
279 uint64_t gap);
280 range_tree_t *range_tree_create(const range_tree_ops_t *ops,
281 range_seg_type_t type, void *arg, uint64_t start, uint64_t shift);
282 void range_tree_destroy(range_tree_t *rt);
283 boolean_t range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size);
284 range_seg_t *range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size);
285 boolean_t range_tree_find_in(range_tree_t *rt, uint64_t start, uint64_t size,
286 uint64_t *ostart, uint64_t *osize);
287 void range_tree_verify_not_present(range_tree_t *rt,
288 uint64_t start, uint64_t size);
289 void range_tree_resize_segment(range_tree_t *rt, range_seg_t *rs,
290 uint64_t newstart, uint64_t newsize);
291 uint64_t range_tree_space(range_tree_t *rt);
292 uint64_t range_tree_numsegs(range_tree_t *rt);
293 boolean_t range_tree_is_empty(range_tree_t *rt);
294 void range_tree_swap(range_tree_t **rtsrc, range_tree_t **rtdst);
295 void range_tree_stat_verify(range_tree_t *rt);
296 uint64_t range_tree_min(range_tree_t *rt);
297 uint64_t range_tree_max(range_tree_t *rt);
298 uint64_t range_tree_span(range_tree_t *rt);
300 void range_tree_add(void *arg, uint64_t start, uint64_t size);
301 void range_tree_remove(void *arg, uint64_t start, uint64_t size);
302 void range_tree_remove_fill(range_tree_t *rt, uint64_t start, uint64_t size);
303 void range_tree_adjust_fill(range_tree_t *rt, range_seg_t *rs, int64_t delta);
304 void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size);
306 void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
307 void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
308 range_seg_t *range_tree_first(range_tree_t *rt);
310 void range_tree_remove_xor_add_segment(uint64_t start, uint64_t end,
311 range_tree_t *removefrom, range_tree_t *addto);
312 void range_tree_remove_xor_add(range_tree_t *rt, range_tree_t *removefrom,
313 range_tree_t *addto);
315 #ifdef __cplusplus
317 #endif
319 #endif /* _SYS_RANGE_TREE_H */