config/dracut/90zfs: handle cases where hostid(1) returns all zeros
[zfs.git] / include / sys / btree.h
blob3b53476c7c68a471bc65b1e28dd6b8122d594d1f
1 /*
2 * CDDL HEADER START
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
9 * A full copy of the text of the CDDL should have accompanied this
10 * source. A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
13 * CDDL HEADER END
16 * Copyright (c) 2019 by Delphix. All rights reserved.
19 #ifndef _BTREE_H
20 #define _BTREE_H
22 #ifdef __cplusplus
23 extern "C" {
24 #endif
26 #include <sys/zfs_context.h>
29 * This file defines the interface for a B-Tree implementation for ZFS. The
30 * tree can be used to store arbitrary sortable data types with low overhead
31 * and good operation performance. In addition the tree intelligently
32 * optimizes bulk in-order insertions to improve memory use and performance.
34 * Note that for all B-Tree functions, the values returned are pointers to the
35 * internal copies of the data in the tree. The internal data can only be
36 * safely mutated if the changes cannot change the ordering of the element
37 * with respect to any other elements in the tree.
39 * The major drawback of the B-Tree is that any returned elements or indexes
40 * are only valid until a side-effectful operation occurs, since these can
41 * result in reallocation or relocation of data. Side effectful operations are
42 * defined as insertion, removal, and zfs_btree_destroy_nodes.
44 * The B-Tree has two types of nodes: core nodes, and leaf nodes. Core
45 * nodes have an array of children pointing to other nodes, and an array of
46 * elements that act as separators between the elements of the subtrees rooted
47 * at its children. Leaf nodes only contain data elements, and form the bottom
48 * layer of the tree. Unlike B+ Trees, in this B-Tree implementation the
49 * elements in the core nodes are not copies of or references to leaf node
50 * elements. Each element occurs only once in the tree, no matter what kind
51 * of node it is in.
53 * The tree's height is the same throughout, unlike many other forms of search
54 * tree. Each node (except for the root) must be between half minus one and
55 * completely full of elements (and children) at all times. Any operation that
56 * would put the node outside of that range results in a rebalancing operation
57 * (taking, merging, or splitting).
59 * This tree was implemented using descriptions from Wikipedia's articles on
60 * B-Trees and B+ Trees.
64 * Decreasing these values results in smaller memmove operations, but more of
65 * them, and increased memory overhead. Increasing these values results in
66 * higher variance in operation time, and reduces memory overhead.
68 #define BTREE_CORE_ELEMS 128
69 #define BTREE_LEAF_SIZE 4096
71 extern kmem_cache_t *zfs_btree_leaf_cache;
73 typedef struct zfs_btree_hdr {
74 struct zfs_btree_core *bth_parent;
75 boolean_t bth_core;
77 * For both leaf and core nodes, represents the number of elements in
78 * the node. For core nodes, they will have bth_count + 1 children.
80 uint32_t bth_count;
81 } zfs_btree_hdr_t;
83 typedef struct zfs_btree_core {
84 zfs_btree_hdr_t btc_hdr;
85 zfs_btree_hdr_t *btc_children[BTREE_CORE_ELEMS + 1];
86 uint8_t btc_elems[];
87 } zfs_btree_core_t;
89 typedef struct zfs_btree_leaf {
90 zfs_btree_hdr_t btl_hdr;
91 uint8_t btl_elems[];
92 } zfs_btree_leaf_t;
94 typedef struct zfs_btree_index {
95 zfs_btree_hdr_t *bti_node;
96 uint64_t bti_offset;
98 * True if the location is before the list offset, false if it's at
99 * the listed offset.
101 boolean_t bti_before;
102 } zfs_btree_index_t;
104 typedef struct btree {
105 zfs_btree_hdr_t *bt_root;
106 int64_t bt_height;
107 size_t bt_elem_size;
108 uint64_t bt_num_elems;
109 uint64_t bt_num_nodes;
110 zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
111 int (*bt_compar) (const void *, const void *);
112 } zfs_btree_t;
115 * Allocate and deallocate caches for btree nodes.
117 void zfs_btree_init(void);
118 void zfs_btree_fini(void);
121 * Initialize an B-Tree. Arguments are:
123 * tree - the tree to be initialized
124 * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
125 * -1 for <, 0 for ==, and +1 for >
126 * size - the value of sizeof(struct my_type)
128 void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
129 size_t);
132 * Find a node with a matching value in the tree. Returns the matching node
133 * found. If not found, it returns NULL and then if "where" is not NULL it sets
134 * "where" for use with zfs_btree_add_idx() or zfs_btree_nearest().
136 * node - node that has the value being looked for
137 * where - position for use with zfs_btree_nearest() or zfs_btree_add_idx(),
138 * may be NULL
140 void *zfs_btree_find(zfs_btree_t *, const void *, zfs_btree_index_t *);
143 * Insert a node into the tree.
145 * node - the node to insert
146 * where - position as returned from zfs_btree_find()
148 void zfs_btree_add_idx(zfs_btree_t *, const void *, const zfs_btree_index_t *);
151 * Return the first or last valued node in the tree. Will return NULL if the
152 * tree is empty. The index can be NULL if the location of the first or last
153 * element isn't required.
155 void *zfs_btree_first(zfs_btree_t *, zfs_btree_index_t *);
156 void *zfs_btree_last(zfs_btree_t *, zfs_btree_index_t *);
159 * Return the next or previous valued node in the tree. The second index can
160 * safely be NULL, if the location of the next or previous value isn't
161 * required.
163 void *zfs_btree_next(zfs_btree_t *, const zfs_btree_index_t *,
164 zfs_btree_index_t *);
165 void *zfs_btree_prev(zfs_btree_t *, const zfs_btree_index_t *,
166 zfs_btree_index_t *);
169 * Get a value from a tree and an index.
171 void *zfs_btree_get(zfs_btree_t *, zfs_btree_index_t *);
174 * Add a single value to the tree. The value must not compare equal to any
175 * other node already in the tree. Note that the value will be copied out, not
176 * inserted directly. It is safe to free or destroy the value once this
177 * function returns.
179 void zfs_btree_add(zfs_btree_t *, const void *);
182 * Remove a single value from the tree. The value must be in the tree. The
183 * pointer passed in may be a pointer into a tree-controlled buffer, but it
184 * need not be.
186 void zfs_btree_remove(zfs_btree_t *, const void *);
189 * Remove the value at the given location from the tree.
191 void zfs_btree_remove_idx(zfs_btree_t *, zfs_btree_index_t *);
194 * Return the number of nodes in the tree
196 ulong_t zfs_btree_numnodes(zfs_btree_t *);
199 * Used to destroy any remaining nodes in a tree. The cookie argument should
200 * be initialized to NULL before the first call. Returns a node that has been
201 * removed from the tree and may be free()'d. Returns NULL when the tree is
202 * empty.
204 * Once you call zfs_btree_destroy_nodes(), you can only continuing calling it
205 * and finally zfs_btree_destroy(). No other B-Tree routines will be valid.
207 * cookie - an index used to save state between calls to
208 * zfs_btree_destroy_nodes()
210 * EXAMPLE:
211 * zfs_btree_t *tree;
212 * struct my_data *node;
213 * zfs_btree_index_t *cookie;
215 * cookie = NULL;
216 * while ((node = zfs_btree_destroy_nodes(tree, &cookie)) != NULL)
217 * data_destroy(node);
218 * zfs_btree_destroy(tree);
220 void *zfs_btree_destroy_nodes(zfs_btree_t *, zfs_btree_index_t **);
223 * Destroys all nodes in the tree quickly. This doesn't give the caller an
224 * opportunity to iterate over each node and do its own cleanup; for that, use
225 * zfs_btree_destroy_nodes().
227 void zfs_btree_clear(zfs_btree_t *);
230 * Final destroy of an B-Tree. Arguments are:
232 * tree - the empty tree to destroy
234 void zfs_btree_destroy(zfs_btree_t *tree);
236 /* Runs a variety of self-checks on the btree to verify integrity. */
237 void zfs_btree_verify(zfs_btree_t *tree);
239 #ifdef __cplusplus
241 #endif
243 #endif /* _BTREE_H */