On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / tree_mod.c
blobbcc6548dfa6d02d1a38c6cc98bfabaf56f787663
1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /*
5 * Functions to add/delete new nodes to/from the tree.
7 * Functions from this file are used by carry (see carry*) to handle:
9 * . insertion of new formatted node into tree
11 * . addition of new tree root, increasing tree height
13 * . removing tree root, decreasing tree height
17 #include "forward.h"
18 #include "debug.h"
19 #include "dformat.h"
20 #include "key.h"
21 #include "coord.h"
22 #include "plugin/plugin.h"
23 #include "jnode.h"
24 #include "znode.h"
25 #include "tree_mod.h"
26 #include "block_alloc.h"
27 #include "tree_walk.h"
28 #include "tree.h"
29 #include "super.h"
31 #include <linux/err.h>
33 static int add_child_ptr(znode * parent, znode * child);
34 /* warning only issued if error is not -E_REPEAT */
35 #define ewarning( error, ... ) \
36 if( ( error ) != -E_REPEAT ) \
37 warning( __VA_ARGS__ )
39 /* allocate new node on the @level and immediately on the right of @brother. */
40 znode * reiser4_new_node(znode * brother /* existing left neighbor
41 * of new node */,
42 tree_level level /* tree level at which new node is to
43 * be allocated */)
45 znode *result;
46 int retcode;
47 reiser4_block_nr blocknr;
49 assert("nikita-930", brother != NULL);
50 assert("umka-264", level < REAL_MAX_ZTREE_HEIGHT);
52 retcode = assign_fake_blocknr_formatted(&blocknr);
53 if (retcode == 0) {
54 result =
55 zget(znode_get_tree(brother), &blocknr, NULL, level,
56 reiser4_ctx_gfp_mask_get());
57 if (IS_ERR(result)) {
58 ewarning(PTR_ERR(result), "nikita-929",
59 "Cannot allocate znode for carry: %li",
60 PTR_ERR(result));
61 return result;
63 /* cheap test, can be executed even when debugging is off */
64 if (!znode_just_created(result)) {
65 warning("nikita-2213",
66 "Allocated already existing block: %llu",
67 (unsigned long long)blocknr);
68 zput(result);
69 return ERR_PTR(RETERR(-EIO));
72 assert("nikita-931", result != NULL);
73 result->nplug = znode_get_tree(brother)->nplug;
74 assert("nikita-933", result->nplug != NULL);
76 retcode = zinit_new(result, reiser4_ctx_gfp_mask_get());
77 if (retcode == 0) {
78 ZF_SET(result, JNODE_CREATED);
79 zrelse(result);
80 } else {
81 zput(result);
82 result = ERR_PTR(retcode);
84 } else {
85 /* failure to allocate new node during balancing.
86 This should never happen. Ever. Returning -E_REPEAT
87 is not viable solution, because "out of disk space"
88 is not transient error that will go away by itself.
90 ewarning(retcode, "nikita-928",
91 "Cannot allocate block for carry: %i", retcode);
92 result = ERR_PTR(retcode);
94 assert("nikita-1071", result != NULL);
95 return result;
98 /* allocate new root and add it to the tree
100 This helper function is called by add_new_root().
103 znode *reiser4_add_tree_root(znode * old_root /* existing tree root */ ,
104 znode * fake /* "fake" znode */ )
106 reiser4_tree *tree = znode_get_tree(old_root);
107 znode *new_root = NULL; /* to shut gcc up */
108 int result;
110 assert("nikita-1069", old_root != NULL);
111 assert("umka-262", fake != NULL);
112 assert("umka-263", tree != NULL);
114 /* "fake" znode---one always hanging just above current root. This
115 node is locked when new root is created or existing root is
116 deleted. Downward tree traversal takes lock on it before taking
117 lock on a root node. This avoids race conditions with root
118 manipulations.
121 assert("nikita-1348", znode_above_root(fake));
122 assert("nikita-1211", znode_is_root(old_root));
124 result = 0;
125 if (tree->height >= REAL_MAX_ZTREE_HEIGHT) {
126 warning("nikita-1344", "Tree is too tall: %i", tree->height);
127 /* ext2 returns -ENOSPC when it runs out of free inodes with a
128 following comment (fs/ext2/ialloc.c:441): Is it really
129 ENOSPC?
131 -EXFULL? -EINVAL?
133 result = RETERR(-ENOSPC);
134 } else {
135 /* Allocate block for new root. It's not that
136 important where it will be allocated, as root is
137 almost always in memory. Moreover, allocate on
138 flush can be going here.
140 assert("nikita-1448", znode_is_root(old_root));
141 new_root = reiser4_new_node(fake, tree->height + 1);
142 if (!IS_ERR(new_root) && (result = zload(new_root)) == 0) {
143 lock_handle rlh;
145 init_lh(&rlh);
146 result =
147 longterm_lock_znode(&rlh, new_root,
148 ZNODE_WRITE_LOCK,
149 ZNODE_LOCK_LOPRI);
150 if (result == 0) {
151 parent_coord_t *in_parent;
153 znode_make_dirty(fake);
155 /* new root is a child of "fake" node */
156 write_lock_tree(tree);
158 ++tree->height;
160 /* recalculate max balance overhead */
161 tree->estimate_one_insert =
162 estimate_one_insert_item(tree);
164 tree->root_block = *znode_get_block(new_root);
165 in_parent = &new_root->in_parent;
166 init_parent_coord(in_parent, fake);
167 /* manually insert new root into sibling
168 * list. With this all nodes involved into
169 * balancing are connected after balancing is
170 * done---useful invariant to check. */
171 sibling_list_insert_nolock(new_root, NULL);
172 write_unlock_tree(tree);
174 /* insert into new root pointer to the
175 @old_root. */
176 assert("nikita-1110",
177 WITH_DATA(new_root,
178 node_is_empty(new_root)));
179 write_lock_dk(tree);
180 znode_set_ld_key(new_root, reiser4_min_key());
181 znode_set_rd_key(new_root, reiser4_max_key());
182 write_unlock_dk(tree);
183 if (REISER4_DEBUG) {
184 ZF_CLR(old_root, JNODE_LEFT_CONNECTED);
185 ZF_CLR(old_root, JNODE_RIGHT_CONNECTED);
186 ZF_SET(old_root, JNODE_ORPHAN);
188 result = add_child_ptr(new_root, old_root);
189 done_lh(&rlh);
191 zrelse(new_root);
194 if (result != 0)
195 new_root = ERR_PTR(result);
196 return new_root;
199 /* build &reiser4_item_data for inserting child pointer
201 Build &reiser4_item_data that can be later used to insert pointer to @child
202 in its parent.
205 void build_child_ptr_data(znode * child /* node pointer to which will be
206 * inserted */ ,
207 reiser4_item_data * data /* where to store result */ )
209 assert("nikita-1116", child != NULL);
210 assert("nikita-1117", data != NULL);
213 * NOTE: use address of child's blocknr as address of data to be
214 * inserted. As result of this data gets into on-disk structure in cpu
215 * byte order. internal's create_hook converts it to little endian byte
216 * order.
218 data->data = (char *)znode_get_block(child);
219 /* data -> data is kernel space */
220 data->user = 0;
221 data->length = sizeof(reiser4_block_nr);
222 /* FIXME-VS: hardcoded internal item? */
224 /* AUDIT: Is it possible that "item_plugin_by_id" may find nothing? */
225 data->iplug = item_plugin_by_id(NODE_POINTER_ID);
228 /* add pointer to @child into empty @parent.
230 This is used when pointer to old root is inserted into new root which is
231 empty.
233 static int add_child_ptr(znode * parent, znode * child)
235 coord_t coord;
236 reiser4_item_data data;
237 int result;
238 reiser4_key key;
240 assert("nikita-1111", parent != NULL);
241 assert("nikita-1112", child != NULL);
242 assert("nikita-1115",
243 znode_get_level(parent) == znode_get_level(child) + 1);
245 result = zload(parent);
246 if (result != 0)
247 return result;
248 assert("nikita-1113", node_is_empty(parent));
249 coord_init_first_unit(&coord, parent);
251 build_child_ptr_data(child, &data);
252 data.arg = NULL;
254 read_lock_dk(znode_get_tree(parent));
255 key = *znode_get_ld_key(child);
256 read_unlock_dk(znode_get_tree(parent));
258 result = node_plugin_by_node(parent)->create_item(&coord, &key, &data,
259 NULL);
260 znode_make_dirty(parent);
261 zrelse(parent);
262 return result;
265 /* actually remove tree root */
266 static int reiser4_kill_root(reiser4_tree * tree /* tree from which root is
267 * being removed */,
268 znode * old_root /* root node that is being
269 * removed */ ,
270 znode * new_root /* new root---sole child of
271 * @old_root */,
272 const reiser4_block_nr * new_root_blk /* disk address of
273 * @new_root */)
275 znode *uber;
276 int result;
277 lock_handle handle_for_uber;
279 assert("umka-265", tree != NULL);
280 assert("nikita-1198", new_root != NULL);
281 assert("nikita-1199",
282 znode_get_level(new_root) + 1 == znode_get_level(old_root));
284 assert("nikita-1201", znode_is_write_locked(old_root));
286 assert("nikita-1203",
287 disk_addr_eq(new_root_blk, znode_get_block(new_root)));
289 init_lh(&handle_for_uber);
290 /* obtain and lock "fake" znode protecting changes in tree height. */
291 result = get_uber_znode(tree, ZNODE_WRITE_LOCK, ZNODE_LOCK_HIPRI,
292 &handle_for_uber);
293 if (result == 0) {
294 uber = handle_for_uber.node;
296 znode_make_dirty(uber);
298 /* don't take long term lock a @new_root. Take spinlock. */
300 write_lock_tree(tree);
302 tree->root_block = *new_root_blk;
303 --tree->height;
305 /* recalculate max balance overhead */
306 tree->estimate_one_insert = estimate_one_insert_item(tree);
308 assert("nikita-1202",
309 tree->height == znode_get_level(new_root));
311 /* new root is child on "fake" node */
312 init_parent_coord(&new_root->in_parent, uber);
313 ++uber->c_count;
315 /* sibling_list_insert_nolock(new_root, NULL); */
316 write_unlock_tree(tree);
318 /* reinitialise old root. */
319 result = node_plugin_by_node(old_root)->init(old_root);
320 znode_make_dirty(old_root);
321 if (result == 0) {
322 assert("nikita-1279", node_is_empty(old_root));
323 ZF_SET(old_root, JNODE_HEARD_BANSHEE);
324 old_root->c_count = 0;
327 done_lh(&handle_for_uber);
329 return result;
332 /* remove tree root
334 This function removes tree root, decreasing tree height by one. Tree root
335 and its only child (that is going to become new tree root) are write locked
336 at the entry.
338 To remove tree root we need to take lock on special "fake" znode that
339 protects changes of tree height. See comments in reiser4_add_tree_root() for
340 more on this.
342 Also parent pointers have to be updated in
343 old and new root. To simplify code, function is split into two parts: outer
344 reiser4_kill_tree_root() collects all necessary arguments and calls
345 reiser4_kill_root() to do the actual job.
348 int reiser4_kill_tree_root(znode * old_root /* tree root that we are
349 removing*/)
351 int result;
352 coord_t down_link;
353 znode *new_root;
354 reiser4_tree *tree;
356 assert("umka-266", current_tree != NULL);
357 assert("nikita-1194", old_root != NULL);
358 assert("nikita-1196", znode_is_root(old_root));
359 assert("nikita-1200", node_num_items(old_root) == 1);
360 assert("nikita-1401", znode_is_write_locked(old_root));
362 coord_init_first_unit(&down_link, old_root);
364 tree = znode_get_tree(old_root);
365 new_root = child_znode(&down_link, old_root, 0, 1);
366 if (!IS_ERR(new_root)) {
367 result =
368 reiser4_kill_root(tree, old_root, new_root,
369 znode_get_block(new_root));
370 zput(new_root);
371 } else
372 result = PTR_ERR(new_root);
374 return result;
377 /* Make Linus happy.
378 Local variables:
379 c-indentation-style: "K&R"
380 mode-name: "LC"
381 c-basic-offset: 8
382 tab-width: 8
383 fill-column: 120
384 scroll-step: 1
385 End: