On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / plugin / node / node.h
blobaf0c6416ac9bbb54e7e2747ea0fd69c20f10c2d6
1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
3 /* We need a definition of the default node layout here. */
5 /* Generally speaking, it is best to have free space in the middle of the
6 node so that two sets of things can grow towards it, and to have the
7 item bodies on the left so that the last one of them grows into free
8 space. We optimize for the case where we append new items to the end
9 of the node, or grow the last item, because it hurts nothing to so
10 optimize and it is a common special case to do massive insertions in
11 increasing key order (and one of cases more likely to have a real user
12 notice the delay time for).
14 formatted leaf default layout: (leaf1)
16 |node header:item bodies:free space:key + pluginid + item offset|
18 We grow towards the middle, optimizing layout for the case where we
19 append new items to the end of the node. The node header is fixed
20 length. Keys, and item offsets plus pluginids for the items
21 corresponding to them are in increasing key order, and are fixed
22 length. Item offsets are relative to start of node (16 bits creating
23 a node size limit of 64k, 12 bits might be a better choice....). Item
24 bodies are in decreasing key order. Item bodies have a variable size.
25 There is a one to one to one mapping of keys to item offsets to item
26 bodies. Item offsets consist of pointers to the zeroth byte of the
27 item body. Item length equals the start of the next item minus the
28 start of this item, except the zeroth item whose length equals the end
29 of the node minus the start of that item (plus a byte). In other
30 words, the item length is not recorded anywhere, and it does not need
31 to be since it is computable.
33 Leaf variable length items and keys layout : (lvar)
35 |node header:key offset + item offset + pluginid triplets:free space:key bodies:item bodies|
37 We grow towards the middle, optimizing layout for the case where we
38 append new items to the end of the node. The node header is fixed
39 length. Keys and item offsets for the items corresponding to them are
40 in increasing key order, and keys are variable length. Item offsets
41 are relative to start of node (16 bits). Item bodies are in
42 decreasing key order. Item bodies have a variable size. There is a
43 one to one to one mapping of keys to item offsets to item bodies.
44 Item offsets consist of pointers to the zeroth byte of the item body.
45 Item length equals the start of the next item's key minus the start of
46 this item, except the zeroth item whose length equals the end of the
47 node minus the start of that item (plus a byte).
49 leaf compressed keys layout: (lcomp)
51 |node header:key offset + key inherit + item offset pairs:free space:key bodies:item bodies|
53 We grow towards the middle, optimizing layout for the case where we
54 append new items to the end of the node. The node header is fixed
55 length. Keys and item offsets for the items corresponding to them are
56 in increasing key order, and keys are variable length. The "key
57 inherit" field indicates how much of the key prefix is identical to
58 the previous key (stem compression as described in "Managing
59 Gigabytes" is used). key_inherit is a one byte integer. The
60 intra-node searches performed through this layout are linear searches,
61 and this is theorized to not hurt performance much due to the high
62 cost of processor stalls on modern CPUs, and the small number of keys
63 in a single node. Item offsets are relative to start of node (16
64 bits). Item bodies are in decreasing key order. Item bodies have a
65 variable size. There is a one to one to one mapping of keys to item
66 offsets to item bodies. Item offsets consist of pointers to the
67 zeroth byte of the item body. Item length equals the start of the
68 next item minus the start of this item, except the zeroth item whose
69 length equals the end of the node minus the start of that item (plus a
70 byte). In other words, item length and key length is not recorded
71 anywhere, and it does not need to be since it is computable.
73 internal node default layout: (idef1)
75 just like ldef1 except that item bodies are either blocknrs of
76 children or extents, and moving them may require updating parent
77 pointers in the nodes that they point to.
80 /* There is an inherent 3-way tradeoff between optimizing and
81 exchanging disks between different architectures and code
82 complexity. This is optimal and simple and inexchangeable.
83 Someone else can do the code for exchanging disks and make it
84 complex. It would not be that hard. Using other than the PAGE_SIZE
85 might be suboptimal.
88 #if !defined( __REISER4_NODE_H__ )
89 #define __REISER4_NODE_H__
91 #define LEAF40_NODE_SIZE PAGE_CACHE_SIZE
93 #include "../../dformat.h"
94 #include "../plugin_header.h"
96 #include <linux/types.h>
98 typedef enum {
99 NS_FOUND = 0,
100 NS_NOT_FOUND = -ENOENT
101 } node_search_result;
103 /* Maximal possible space overhead for creation of new item in a node */
104 #define REISER4_NODE_MAX_OVERHEAD ( sizeof( reiser4_key ) + 32 )
106 typedef enum {
107 REISER4_NODE_DKEYS = (1 << 0),
108 REISER4_NODE_TREE_STABLE = (1 << 1)
109 } reiser4_node_check_flag;
111 /* cut and cut_and_kill have too long list of parameters. This structure is just to safe some space on stack */
112 struct cut_list {
113 coord_t *from;
114 coord_t *to;
115 const reiser4_key *from_key;
116 const reiser4_key *to_key;
117 reiser4_key *smallest_removed;
118 carry_plugin_info *info;
119 __u32 flags;
120 struct inode *inode; /* this is to pass list of eflushed jnodes down to extent_kill_hook */
121 lock_handle *left;
122 lock_handle *right;
125 struct carry_cut_data;
126 struct carry_kill_data;
128 /* The responsibility of the node plugin is to store and give access
129 to the sequence of items within the node. */
130 typedef struct node_plugin {
131 /* generic plugin fields */
132 plugin_header h;
134 /* calculates the amount of space that will be required to store an
135 item which is in addition to the space consumed by the item body.
136 (the space consumed by the item body can be gotten by calling
137 item->estimate) */
138 size_t(*item_overhead) (const znode * node, flow_t * f);
140 /* returns free space by looking into node (i.e., without using
141 znode->free_space). */
142 size_t(*free_space) (znode * node);
143 /* search within the node for the one item which might
144 contain the key, invoking item->search_within to search within
145 that item to see if it is in there */
146 node_search_result(*lookup) (znode * node, const reiser4_key * key,
147 lookup_bias bias, coord_t * coord);
148 /* number of items in node */
149 int (*num_of_items) (const znode * node);
151 /* store information about item in @coord in @data */
152 /* break into several node ops, don't add any more uses of this before doing so */
153 /*int ( *item_at )( const coord_t *coord, reiser4_item_data *data ); */
154 char *(*item_by_coord) (const coord_t * coord);
155 int (*length_by_coord) (const coord_t * coord);
156 item_plugin *(*plugin_by_coord) (const coord_t * coord);
158 /* store item key in @key */
159 reiser4_key *(*key_at) (const coord_t * coord, reiser4_key * key);
160 /* conservatively estimate whether unit of what size can fit
161 into node. This estimation should be performed without
162 actually looking into the node's content (free space is saved in
163 znode). */
164 size_t(*estimate) (znode * node);
166 /* performs every consistency check the node plugin author could
167 imagine. Optional. */
168 int (*check) (const znode * node, __u32 flags, const char **error);
170 /* Called when node is read into memory and node plugin is
171 already detected. This should read some data into znode (like free
172 space counter) and, optionally, check data consistency.
174 int (*parse) (znode * node);
175 /* This method is called on a new node to initialise plugin specific
176 data (header, etc.) */
177 int (*init) (znode * node);
178 /* Check whether @node content conforms to this plugin format.
179 Probably only useful after support for old V3.x formats is added.
180 Uncomment after 4.0 only.
182 /* int ( *guess )( const znode *node ); */
183 #if REISER4_DEBUG
184 void (*print) (const char *prefix, const znode * node, __u32 flags);
185 #endif
186 /* change size of @item by @by bytes. @item->node has enough free
187 space. When @by > 0 - free space is appended to end of item. When
188 @by < 0 - item is truncated - it is assumed that last @by bytes if
189 the item are freed already */
190 void (*change_item_size) (coord_t * item, int by);
192 /* create new item @length bytes long in coord @target */
193 int (*create_item) (coord_t * target, const reiser4_key * key,
194 reiser4_item_data * data, carry_plugin_info * info);
196 /* update key of item. */
197 void (*update_item_key) (coord_t * target, const reiser4_key * key,
198 carry_plugin_info * info);
200 int (*cut_and_kill) (struct carry_kill_data *, carry_plugin_info *);
201 int (*cut) (struct carry_cut_data *, carry_plugin_info *);
204 * shrink item pointed to by @coord by @delta bytes.
206 int (*shrink_item) (coord_t * coord, int delta);
208 /* copy as much as possible but not more than up to @stop from
209 @stop->node to @target. If (pend == append) then data from beginning of
210 @stop->node are copied to the end of @target. If (pend == prepend) then
211 data from the end of @stop->node are copied to the beginning of
212 @target. Copied data are removed from @stop->node. Information
213 about what to do on upper level is stored in @todo */
214 int (*shift) (coord_t * stop, znode * target, shift_direction pend,
215 int delete_node, int including_insert_coord,
216 carry_plugin_info * info);
217 /* return true if this node allows skip carry() in some situations
218 (see fs/reiser4/tree.c:insert_by_coord()). Reiser3.x format
219 emulation doesn't.
221 This will speedup insertions that doesn't require updates to the
222 parent, by bypassing initialisation of carry() structures. It's
223 believed that majority of insertions will fit there.
226 int (*fast_insert) (const coord_t * coord);
227 int (*fast_paste) (const coord_t * coord);
228 int (*fast_cut) (const coord_t * coord);
229 /* this limits max size of item which can be inserted into a node and
230 number of bytes item in a node may be appended with */
231 int (*max_item_size) (void);
232 int (*prepare_removal) (znode * empty, carry_plugin_info * info);
233 /* change plugin id of items which are in a node already. Currently it is Used in tail conversion for regular
234 * files */
235 int (*set_item_plugin) (coord_t * coord, item_id);
236 } node_plugin;
238 typedef enum {
239 /* standard unified node layout used for both leaf and internal
240 nodes */
241 NODE40_ID,
242 LAST_NODE_ID
243 } reiser4_node_id;
245 extern reiser4_key *leftmost_key_in_node(const znode * node, reiser4_key * key);
246 #if REISER4_DEBUG
247 extern void print_node_content(const char *prefix, const znode * node,
248 __u32 flags);
249 #endif
251 extern void indent_znode(const znode * node);
253 typedef struct common_node_header {
255 * identifier of node plugin. Must be located at the very beginning of
256 * a node.
258 __le16 plugin_id;
259 } common_node_header;
261 /* __REISER4_NODE_H__ */
262 #endif
264 * Local variables:
265 * c-indentation-style: "K&R"
266 * mode-name: "LC"
267 * c-basic-offset: 8
268 * tab-width: 8
269 * fill-column: 79
270 * scroll-step: 1
271 * End: