On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / search.c
blob0fa6bdb3645e27b7ae5618a1dbf81a633d66715d
1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 #include "forward.h"
5 #include "debug.h"
6 #include "dformat.h"
7 #include "key.h"
8 #include "coord.h"
9 #include "seal.h"
10 #include "plugin/item/item.h"
11 #include "plugin/node/node.h"
12 #include "plugin/plugin.h"
13 #include "jnode.h"
14 #include "znode.h"
15 #include "block_alloc.h"
16 #include "tree_walk.h"
17 #include "tree.h"
18 #include "reiser4.h"
19 #include "super.h"
20 #include "inode.h"
22 #include <linux/slab.h>
24 static const char *bias_name(lookup_bias bias);
26 /* tree searching algorithm, intranode searching algorithms are in
27 plugin/node/ */
29 /* tree lookup cache
31 * The coord by key cache consists of small list of recently accessed nodes
32 * maintained according to the LRU discipline. Before doing real top-to-down
33 * tree traversal this cache is scanned for nodes that can contain key
34 * requested.
36 * The efficiency of coord cache depends heavily on locality of reference for
37 * tree accesses. Our user level simulations show reasonably good hit ratios
38 * for coord cache under most loads so far.
41 /* Initialise coord cache slot */
42 static void cbk_cache_init_slot(cbk_cache_slot *slot)
44 assert("nikita-345", slot != NULL);
46 INIT_LIST_HEAD(&slot->lru);
47 slot->node = NULL;
50 /* Initialize coord cache */
51 int cbk_cache_init(cbk_cache * cache/* cache to init */)
53 int i;
55 assert("nikita-346", cache != NULL);
57 cache->slot =
58 kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots,
59 reiser4_ctx_gfp_mask_get());
60 if (cache->slot == NULL)
61 return RETERR(-ENOMEM);
63 INIT_LIST_HEAD(&cache->lru);
64 for (i = 0; i < cache->nr_slots; ++i) {
65 cbk_cache_init_slot(cache->slot + i);
66 list_add_tail(&((cache->slot + i)->lru), &cache->lru);
68 rwlock_init(&cache->guard);
69 return 0;
72 /* free cbk cache data */
73 void cbk_cache_done(cbk_cache * cache/* cache to release */)
75 assert("nikita-2493", cache != NULL);
76 if (cache->slot != NULL) {
77 kfree(cache->slot);
78 cache->slot = NULL;
82 /* macro to iterate over all cbk cache slots */
83 #define for_all_slots(cache, slot) \
84 for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \
85 &(cache)->lru != &(slot)->lru; \
86 (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru))
88 #if REISER4_DEBUG
89 /* this function assures that [cbk-cache-invariant] invariant holds */
90 static int cbk_cache_invariant(const cbk_cache * cache)
92 cbk_cache_slot *slot;
93 int result;
94 int unused;
96 if (cache->nr_slots == 0)
97 return 1;
99 assert("nikita-2469", cache != NULL);
100 unused = 0;
101 result = 1;
102 read_lock(&((cbk_cache *)cache)->guard);
103 for_all_slots(cache, slot) {
104 /* in LRU first go all `used' slots followed by `unused' */
105 if (unused && (slot->node != NULL))
106 result = 0;
107 if (slot->node == NULL)
108 unused = 1;
109 else {
110 cbk_cache_slot *scan;
112 /* all cached nodes are different */
113 scan = slot;
114 while (result) {
115 scan = list_entry(scan->lru.next,
116 cbk_cache_slot, lru);
117 if (&cache->lru == &scan->lru)
118 break;
119 if (slot->node == scan->node)
120 result = 0;
123 if (!result)
124 break;
126 read_unlock(&((cbk_cache *)cache)->guard);
127 return result;
130 #endif
132 /* Remove references, if any, to @node from coord cache */
133 void cbk_cache_invalidate(const znode * node /* node to remove from cache */ ,
134 reiser4_tree * tree/* tree to remove node from */)
136 cbk_cache_slot *slot;
137 cbk_cache *cache;
138 int i;
140 assert("nikita-350", node != NULL);
141 assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree));
143 cache = &tree->cbk_cache;
144 assert("nikita-2470", cbk_cache_invariant(cache));
146 write_lock(&(cache->guard));
147 for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
148 if (slot->node == node) {
149 list_move_tail(&slot->lru, &cache->lru);
150 slot->node = NULL;
151 break;
154 write_unlock(&(cache->guard));
155 assert("nikita-2471", cbk_cache_invariant(cache));
158 /* add to the cbk-cache in the "tree" information about "node". This
159 can actually be update of existing slot in a cache. */
160 static void cbk_cache_add(const znode * node/* node to add to the cache */)
162 cbk_cache *cache;
164 cbk_cache_slot *slot;
165 int i;
167 assert("nikita-352", node != NULL);
169 cache = &znode_get_tree(node)->cbk_cache;
170 assert("nikita-2472", cbk_cache_invariant(cache));
172 if (cache->nr_slots == 0)
173 return;
175 write_lock(&(cache->guard));
176 /* find slot to update/add */
177 for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
178 /* oops, this node is already in a cache */
179 if (slot->node == node)
180 break;
182 /* if all slots are used, reuse least recently used one */
183 if (i == cache->nr_slots) {
184 slot = list_entry(cache->lru.prev, cbk_cache_slot, lru);
185 slot->node = (znode *) node;
187 list_move(&slot->lru, &cache->lru);
188 write_unlock(&(cache->guard));
189 assert("nikita-2473", cbk_cache_invariant(cache));
192 static int setup_delimiting_keys(cbk_handle * h);
193 static lookup_result coord_by_handle(cbk_handle * handle);
194 static lookup_result traverse_tree(cbk_handle * h);
195 static int cbk_cache_search(cbk_handle * h);
197 static level_lookup_result cbk_level_lookup(cbk_handle * h);
198 static level_lookup_result cbk_node_lookup(cbk_handle * h);
200 /* helper functions */
202 static void update_stale_dk(reiser4_tree * tree, znode * node);
204 /* release parent node during traversal */
205 static void put_parent(cbk_handle * h);
206 /* check consistency of fields */
207 static int sanity_check(cbk_handle * h);
208 /* release resources in handle */
209 static void hput(cbk_handle * h);
211 static level_lookup_result search_to_left(cbk_handle * h);
213 /* pack numerous (numberous I should say) arguments of coord_by_key() into
214 * cbk_handle */
215 static cbk_handle *cbk_pack(cbk_handle * handle,
216 reiser4_tree * tree,
217 const reiser4_key * key,
218 coord_t *coord,
219 lock_handle * active_lh,
220 lock_handle * parent_lh,
221 znode_lock_mode lock_mode,
222 lookup_bias bias,
223 tree_level lock_level,
224 tree_level stop_level,
225 __u32 flags, ra_info_t *info)
227 memset(handle, 0, sizeof *handle);
229 handle->tree = tree;
230 handle->key = key;
231 handle->lock_mode = lock_mode;
232 handle->bias = bias;
233 handle->lock_level = lock_level;
234 handle->stop_level = stop_level;
235 handle->coord = coord;
236 /* set flags. See comment in tree.h:cbk_flags */
237 handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK;
239 handle->active_lh = active_lh;
240 handle->parent_lh = parent_lh;
241 handle->ra_info = info;
242 return handle;
245 /* main tree lookup procedure
247 Check coord cache. If key we are looking for is not found there, call cbk()
248 to do real tree traversal.
250 As we have extents on the twig level, @lock_level and @stop_level can
251 be different from LEAF_LEVEL and each other.
253 Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode
254 long term locks) while calling this.
256 lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search
257 * in. Usually this tree is
258 * part of file-system
259 * super-block */ ,
260 const reiser4_key * key /* key to look for */ ,
261 coord_t *coord /* where to store found
262 * position in a tree. Fields
263 * in "coord" are only valid if
264 * coord_by_key() returned
265 * "CBK_COORD_FOUND" */ ,
266 lock_handle * lh, /* resulting lock handle */
267 znode_lock_mode lock_mode /* type of lookup we
268 * want on node. Pass
269 * ZNODE_READ_LOCK here
270 * if you only want to
271 * read item found and
272 * ZNODE_WRITE_LOCK if
273 * you want to modify
274 * it */ ,
275 lookup_bias bias /* what to return if coord
276 * with exactly the @key is
277 * not in the tree */ ,
278 tree_level lock_level/* tree level where to start
279 * taking @lock type of
280 * locks */ ,
281 tree_level stop_level/* tree level to stop. Pass
282 * LEAF_LEVEL or TWIG_LEVEL
283 * here Item being looked
284 * for has to be between
285 * @lock_level and
286 * @stop_level, inclusive */ ,
287 __u32 flags /* search flags */ ,
288 ra_info_t *
289 info
290 /* information about desired tree traversal
291 * readahead */
294 cbk_handle handle;
295 lock_handle parent_lh;
296 lookup_result result;
298 init_lh(lh);
299 init_lh(&parent_lh);
301 assert("nikita-3023", reiser4_schedulable());
303 assert("nikita-353", tree != NULL);
304 assert("nikita-354", key != NULL);
305 assert("nikita-355", coord != NULL);
306 assert("nikita-356", (bias == FIND_EXACT)
307 || (bias == FIND_MAX_NOT_MORE_THAN));
308 assert("nikita-357", stop_level >= LEAF_LEVEL);
309 /* no locks can be held during tree traversal */
310 assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
312 cbk_pack(&handle,
313 tree,
314 key,
315 coord,
317 &parent_lh,
318 lock_mode, bias, lock_level, stop_level, flags, info);
320 result = coord_by_handle(&handle);
321 assert("nikita-3247",
322 ergo(!IS_CBKERR(result), coord->node == lh->node));
323 return result;
326 /* like coord_by_key(), but starts traversal from vroot of @object rather than
327 * from tree root. */
328 lookup_result reiser4_object_lookup(struct inode *object,
329 const reiser4_key * key,
330 coord_t *coord,
331 lock_handle * lh,
332 znode_lock_mode lock_mode,
333 lookup_bias bias,
334 tree_level lock_level,
335 tree_level stop_level, __u32 flags,
336 ra_info_t *info)
338 cbk_handle handle;
339 lock_handle parent_lh;
340 lookup_result result;
342 init_lh(lh);
343 init_lh(&parent_lh);
345 assert("nikita-3023", reiser4_schedulable());
347 assert("nikita-354", key != NULL);
348 assert("nikita-355", coord != NULL);
349 assert("nikita-356", (bias == FIND_EXACT)
350 || (bias == FIND_MAX_NOT_MORE_THAN));
351 assert("nikita-357", stop_level >= LEAF_LEVEL);
352 /* no locks can be held during tree search by key */
353 assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
355 cbk_pack(&handle,
356 object != NULL ? reiser4_tree_by_inode(object) : current_tree,
357 key,
358 coord,
360 &parent_lh,
361 lock_mode, bias, lock_level, stop_level, flags, info);
362 handle.object = object;
364 result = coord_by_handle(&handle);
365 assert("nikita-3247",
366 ergo(!IS_CBKERR(result), coord->node == lh->node));
367 return result;
370 /* lookup by cbk_handle. Common part of coord_by_key() and
371 reiser4_object_lookup(). */
372 static lookup_result coord_by_handle(cbk_handle * handle)
375 * first check cbk_cache (which is look-aside cache for our tree) and
376 * of this fails, start traversal.
378 /* first check whether "key" is in cache of recent lookups. */
379 if (cbk_cache_search(handle) == 0)
380 return handle->result;
381 else
382 return traverse_tree(handle);
385 /* Execute actor for each item (or unit, depending on @through_units_p),
386 starting from @coord, right-ward, until either:
388 - end of the tree is reached
389 - unformatted node is met
390 - error occurred
391 - @actor returns 0 or less
393 Error code, or last actor return value is returned.
395 This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through
396 sequence of entries with identical keys and alikes.
398 int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ ,
399 coord_t *coord /* coord to start from */ ,
400 lock_handle * lh /* lock handle to start with and to
401 * update along the way */ ,
402 tree_iterate_actor_t actor /* function to call on each
403 * item/unit */ ,
404 void *arg /* argument to pass to @actor */ ,
405 znode_lock_mode mode /* lock mode on scanned nodes */ ,
406 int through_units_p /* call @actor on each item or on
407 * each unit */ )
409 int result;
411 assert("nikita-1143", tree != NULL);
412 assert("nikita-1145", coord != NULL);
413 assert("nikita-1146", lh != NULL);
414 assert("nikita-1147", actor != NULL);
416 result = zload(coord->node);
417 coord_clear_iplug(coord);
418 if (result != 0)
419 return result;
420 if (!coord_is_existing_unit(coord)) {
421 zrelse(coord->node);
422 return -ENOENT;
424 while ((result = actor(tree, coord, lh, arg)) > 0) {
425 /* move further */
426 if ((through_units_p && coord_next_unit(coord)) ||
427 (!through_units_p && coord_next_item(coord))) {
428 do {
429 lock_handle couple;
431 /* move to the next node */
432 init_lh(&couple);
433 result =
434 reiser4_get_right_neighbor(&couple,
435 coord->node,
436 (int)mode,
437 GN_CAN_USE_UPPER_LEVELS);
438 zrelse(coord->node);
439 if (result == 0) {
441 result = zload(couple.node);
442 if (result != 0) {
443 done_lh(&couple);
444 return result;
447 coord_init_first_unit(coord,
448 couple.node);
449 done_lh(lh);
450 move_lh(lh, &couple);
451 } else
452 return result;
453 } while (node_is_empty(coord->node));
456 assert("nikita-1149", coord_is_existing_unit(coord));
458 zrelse(coord->node);
459 return result;
462 /* return locked uber znode for @tree */
463 int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
464 znode_lock_request pri, lock_handle * lh)
466 int result;
468 result = longterm_lock_znode(lh, tree->uber, mode, pri);
469 return result;
472 /* true if @key is strictly within @node
474 we are looking for possibly non-unique key and it is item is at the edge of
475 @node. May be it is in the neighbor.
477 static int znode_contains_key_strict(znode * node /* node to check key
478 * against */ ,
479 const reiser4_key *
480 key /* key to check */ ,
481 int isunique)
483 int answer;
485 assert("nikita-1760", node != NULL);
486 assert("nikita-1722", key != NULL);
488 if (keyge(key, &node->rd_key))
489 return 0;
491 answer = keycmp(&node->ld_key, key);
493 if (isunique)
494 return answer != GREATER_THAN;
495 else
496 return answer == LESS_THAN;
500 * Virtual Root (vroot) code.
502 * For given file system object (e.g., regular file or directory) let's
503 * define its "virtual root" as lowest in the tree (that is, furtherest
504 * from the tree root) node such that all body items of said object are
505 * located in a tree rooted at this node.
507 * Once vroot of object is found all tree lookups for items within body of
508 * this object ("object lookups") can be started from its vroot rather
509 * than from real root. This has following advantages:
511 * 1. amount of nodes traversed during lookup (and, hence, amount of
512 * key comparisons made) decreases, and
514 * 2. contention on tree root is decreased. This latter was actually
515 * motivating reason behind vroot, because spin lock of root node,
516 * which is taken when acquiring long-term lock on root node is the
517 * hottest lock in the reiser4.
519 * How to find vroot.
521 * When vroot of object F is not yet determined, all object lookups start
522 * from the root of the tree. At each tree level during traversal we have
523 * a node N such that a key we are looking for (which is the key inside
524 * object's body) is located within N. In function handle_vroot() called
525 * from cbk_level_lookup() we check whether N is possible vroot for
526 * F. Check is trivial---if neither leftmost nor rightmost item of N
527 * belongs to F (and we already have helpful ->owns_item() method of
528 * object plugin for this), then N is possible vroot of F. This, of
529 * course, relies on the assumption that each object occupies contiguous
530 * range of keys in the tree.
532 * Thus, traversing tree downward and checking each node as we go, we can
533 * find lowest such node, which, by definition, is vroot.
535 * How to track vroot.
537 * Nohow. If actual vroot changes, next object lookup will just restart
538 * from the actual tree root, refreshing object's vroot along the way.
543 * Check whether @node is possible vroot of @object.
545 static void handle_vroot(struct inode *object, znode * node)
547 file_plugin *fplug;
548 coord_t coord;
550 fplug = inode_file_plugin(object);
551 assert("nikita-3353", fplug != NULL);
552 assert("nikita-3354", fplug->owns_item != NULL);
554 if (unlikely(node_is_empty(node)))
555 return;
557 coord_init_first_unit(&coord, node);
559 * if leftmost item of @node belongs to @object, we cannot be sure
560 * that @node is vroot of @object, because, some items of @object are
561 * probably in the sub-tree rooted at the left neighbor of @node.
563 if (fplug->owns_item(object, &coord))
564 return;
565 coord_init_last_unit(&coord, node);
566 /* mutatis mutandis for the rightmost item */
567 if (fplug->owns_item(object, &coord))
568 return;
569 /* otherwise, @node is possible vroot of @object */
570 inode_set_vroot(object, node);
574 * helper function used by traverse tree to start tree traversal not from the
575 * tree root, but from @h->object's vroot, if possible.
577 static int prepare_object_lookup(cbk_handle * h)
579 znode *vroot;
580 int result;
582 vroot = inode_get_vroot(h->object);
583 if (vroot == NULL) {
585 * object doesn't have known vroot, start from real tree root.
587 return LOOKUP_CONT;
590 h->level = znode_get_level(vroot);
591 /* take a long-term lock on vroot */
592 h->result = longterm_lock_znode(h->active_lh, vroot,
593 cbk_lock_mode(h->level, h),
594 ZNODE_LOCK_LOPRI);
595 result = LOOKUP_REST;
596 if (h->result == 0) {
597 int isunique;
598 int inside;
600 isunique = h->flags & CBK_UNIQUE;
601 /* check that key is inside vroot */
602 read_lock_dk(h->tree);
603 inside = (znode_contains_key_strict(vroot, h->key, isunique) &&
604 !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE));
605 read_unlock_dk(h->tree);
606 if (inside) {
607 h->result = zload(vroot);
608 if (h->result == 0) {
609 /* search for key in vroot. */
610 result = cbk_node_lookup(h);
611 zrelse(vroot); /*h->active_lh->node); */
612 if (h->active_lh->node != vroot) {
613 result = LOOKUP_REST;
614 } else if (result == LOOKUP_CONT) {
615 move_lh(h->parent_lh, h->active_lh);
616 h->flags &= ~CBK_DKSET;
622 zput(vroot);
624 if (IS_CBKERR(h->result) || result == LOOKUP_REST)
625 hput(h);
626 return result;
629 /* main function that handles common parts of tree traversal: starting
630 (fake znode handling), restarts, error handling, completion */
631 static lookup_result traverse_tree(cbk_handle * h/* search handle */)
633 int done;
634 int iterations;
635 int vroot_used;
637 assert("nikita-365", h != NULL);
638 assert("nikita-366", h->tree != NULL);
639 assert("nikita-367", h->key != NULL);
640 assert("nikita-368", h->coord != NULL);
641 assert("nikita-369", (h->bias == FIND_EXACT)
642 || (h->bias == FIND_MAX_NOT_MORE_THAN));
643 assert("nikita-370", h->stop_level >= LEAF_LEVEL);
644 assert("nikita-2949", !(h->flags & CBK_DKSET));
645 assert("zam-355", lock_stack_isclean(get_current_lock_stack()));
647 done = 0;
648 iterations = 0;
649 vroot_used = 0;
651 /* loop for restarts */
652 restart:
654 assert("nikita-3024", reiser4_schedulable());
656 h->result = CBK_COORD_FOUND;
657 /* connect_znode() needs it */
658 h->ld_key = *reiser4_min_key();
659 h->rd_key = *reiser4_max_key();
660 h->flags |= CBK_DKSET;
661 h->error = NULL;
663 if (!vroot_used && h->object != NULL) {
664 vroot_used = 1;
665 done = prepare_object_lookup(h);
666 if (done == LOOKUP_REST)
667 goto restart;
668 else if (done == LOOKUP_DONE)
669 return h->result;
671 if (h->parent_lh->node == NULL) {
672 done =
673 get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI,
674 h->parent_lh);
676 assert("nikita-1637", done != -E_DEADLOCK);
678 h->block = h->tree->root_block;
679 h->level = h->tree->height;
680 h->coord->node = h->parent_lh->node;
682 if (done != 0)
683 return done;
686 /* loop descending a tree */
687 while (!done) {
689 if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) &&
690 IS_POW(iterations))) {
691 warning("nikita-1481", "Too many iterations: %i",
692 iterations);
693 reiser4_print_key("key", h->key);
694 ++iterations;
695 } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) {
696 h->error =
697 "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring.";
698 h->result = RETERR(-EIO);
699 break;
701 switch (cbk_level_lookup(h)) {
702 case LOOKUP_CONT:
703 move_lh(h->parent_lh, h->active_lh);
704 continue;
705 default:
706 wrong_return_value("nikita-372", "cbk_level");
707 case LOOKUP_DONE:
708 done = 1;
709 break;
710 case LOOKUP_REST:
711 hput(h);
712 /* deadlock avoidance is normal case. */
713 if (h->result != -E_DEADLOCK)
714 ++iterations;
715 reiser4_preempt_point();
716 goto restart;
719 /* that's all. The rest is error handling */
720 if (unlikely(h->error != NULL)) {
721 warning("nikita-373", "%s: level: %i, "
722 "lock_level: %i, stop_level: %i "
723 "lock_mode: %s, bias: %s",
724 h->error, h->level, h->lock_level, h->stop_level,
725 lock_mode_name(h->lock_mode), bias_name(h->bias));
726 reiser4_print_address("block", &h->block);
727 reiser4_print_key("key", h->key);
728 print_coord_content("coord", h->coord);
730 /* `unlikely' error case */
731 if (unlikely(IS_CBKERR(h->result))) {
732 /* failure. do cleanup */
733 hput(h);
734 } else {
735 assert("nikita-1605", WITH_DATA_RET
736 (h->coord->node, 1,
737 ergo((h->result == CBK_COORD_FOUND) &&
738 (h->bias == FIND_EXACT) &&
739 (!node_is_empty(h->coord->node)),
740 coord_is_existing_item(h->coord))));
742 return h->result;
745 /* find delimiting keys of child
747 Determine left and right delimiting keys for child pointed to by
748 @parent_coord.
751 static void find_child_delimiting_keys(znode * parent /* parent znode, passed
752 * locked */ ,
753 const coord_t *parent_coord
754 /* coord where pointer
755 * to child is stored
756 */ ,
757 reiser4_key * ld /* where to store left
758 * delimiting key */ ,
759 reiser4_key * rd /* where to store right
760 * delimiting key */ )
762 coord_t neighbor;
764 assert("nikita-1484", parent != NULL);
765 assert_rw_locked(&(znode_get_tree(parent)->dk_lock));
767 coord_dup(&neighbor, parent_coord);
769 if (neighbor.between == AT_UNIT)
770 /* imitate item ->lookup() behavior. */
771 neighbor.between = AFTER_UNIT;
773 if (coord_set_to_left(&neighbor) == 0)
774 unit_key_by_coord(&neighbor, ld);
775 else {
776 assert("nikita-14851", 0);
777 *ld = *znode_get_ld_key(parent);
780 coord_dup(&neighbor, parent_coord);
781 if (neighbor.between == AT_UNIT)
782 neighbor.between = AFTER_UNIT;
783 if (coord_set_to_right(&neighbor) == 0)
784 unit_key_by_coord(&neighbor, rd);
785 else
786 *rd = *znode_get_rd_key(parent);
790 * setup delimiting keys for a child
792 * @parent parent node
794 * @coord location in @parent where pointer to @child is
796 * @child child node
799 set_child_delimiting_keys(znode * parent, const coord_t *coord, znode * child)
801 reiser4_tree *tree;
803 assert("nikita-2952",
804 znode_get_level(parent) == znode_get_level(coord->node));
806 /* fast check without taking dk lock. This is safe, because
807 * JNODE_DKSET is never cleared once set. */
808 if (!ZF_ISSET(child, JNODE_DKSET)) {
809 tree = znode_get_tree(parent);
810 write_lock_dk(tree);
811 if (likely(!ZF_ISSET(child, JNODE_DKSET))) {
812 find_child_delimiting_keys(parent, coord,
813 &child->ld_key,
814 &child->rd_key);
815 ON_DEBUG(child->ld_key_version =
816 atomic_inc_return(&delim_key_version);
817 child->rd_key_version =
818 atomic_inc_return(&delim_key_version););
819 ZF_SET(child, JNODE_DKSET);
821 write_unlock_dk(tree);
822 return 1;
824 return 0;
827 /* Perform tree lookup at one level. This is called from cbk_traverse()
828 function that drives lookup through tree and calls cbk_node_lookup() to
829 perform lookup within one node.
831 See comments in a code.
833 static level_lookup_result cbk_level_lookup(cbk_handle * h/* search handle */)
835 int ret;
836 int setdk;
837 int ldkeyset = 0;
838 reiser4_key ldkey;
839 reiser4_key key;
840 znode *active;
842 assert("nikita-3025", reiser4_schedulable());
844 /* acquire reference to @active node */
845 active =
846 zget(h->tree, &h->block, h->parent_lh->node, h->level,
847 reiser4_ctx_gfp_mask_get());
849 if (IS_ERR(active)) {
850 h->result = PTR_ERR(active);
851 return LOOKUP_DONE;
854 /* lock @active */
855 h->result = longterm_lock_znode(h->active_lh,
856 active,
857 cbk_lock_mode(h->level, h),
858 ZNODE_LOCK_LOPRI);
859 /* longterm_lock_znode() acquires additional reference to znode (which
860 will be later released by longterm_unlock_znode()). Release
861 reference acquired by zget().
863 zput(active);
864 if (unlikely(h->result != 0))
865 goto fail_or_restart;
867 setdk = 0;
868 /* if @active is accessed for the first time, setup delimiting keys on
869 it. Delimiting keys are taken from the parent node. See
870 setup_delimiting_keys() for details.
872 if (h->flags & CBK_DKSET) {
873 setdk = setup_delimiting_keys(h);
874 h->flags &= ~CBK_DKSET;
875 } else {
876 znode *parent;
878 parent = h->parent_lh->node;
879 h->result = zload(parent);
880 if (unlikely(h->result != 0))
881 goto fail_or_restart;
883 if (!ZF_ISSET(active, JNODE_DKSET))
884 setdk = set_child_delimiting_keys(parent,
885 h->coord, active);
886 else {
887 read_lock_dk(h->tree);
888 find_child_delimiting_keys(parent, h->coord, &ldkey,
889 &key);
890 read_unlock_dk(h->tree);
891 ldkeyset = 1;
893 zrelse(parent);
896 /* this is ugly kludge. Reminder: this is necessary, because
897 ->lookup() method returns coord with ->between field probably set
898 to something different from AT_UNIT.
900 h->coord->between = AT_UNIT;
902 if (znode_just_created(active) && (h->coord->node != NULL)) {
903 write_lock_tree(h->tree);
904 /* if we are going to load znode right now, setup
905 ->in_parent: coord where pointer to this node is stored in
906 parent.
908 coord_to_parent_coord(h->coord, &active->in_parent);
909 write_unlock_tree(h->tree);
912 /* check connectedness without holding tree lock---false negatives
913 * will be re-checked by connect_znode(), and false positives are
914 * impossible---@active cannot suddenly turn into unconnected
915 * state. */
916 if (!znode_is_connected(active)) {
917 h->result = connect_znode(h->coord, active);
918 if (unlikely(h->result != 0)) {
919 put_parent(h);
920 goto fail_or_restart;
924 jload_prefetch(ZJNODE(active));
926 if (setdk)
927 update_stale_dk(h->tree, active);
929 /* put_parent() cannot be called earlier, because connect_znode()
930 assumes parent node is referenced; */
931 put_parent(h);
933 if ((!znode_contains_key_lock(active, h->key) &&
934 (h->flags & CBK_TRUST_DK))
935 || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) {
936 /* 1. key was moved out of this node while this thread was
937 waiting for the lock. Restart. More elaborate solution is
938 to determine where key moved (to the left, or to the right)
939 and try to follow it through sibling pointers.
941 2. or, node itself is going to be removed from the
942 tree. Release lock and restart.
944 h->result = -E_REPEAT;
946 if (h->result == -E_REPEAT)
947 return LOOKUP_REST;
949 h->result = zload_ra(active, h->ra_info);
950 if (h->result)
951 return LOOKUP_DONE;
953 /* sanity checks */
954 if (sanity_check(h)) {
955 zrelse(active);
956 return LOOKUP_DONE;
959 /* check that key of leftmost item in the @active is the same as in
960 * its parent */
961 if (ldkeyset && !node_is_empty(active) &&
962 !keyeq(leftmost_key_in_node(active, &key), &ldkey)) {
963 warning("vs-3533", "Keys are inconsistent. Fsck?");
964 reiser4_print_key("inparent", &ldkey);
965 reiser4_print_key("inchild", &key);
966 h->result = RETERR(-EIO);
967 zrelse(active);
968 return LOOKUP_DONE;
971 if (h->object != NULL)
972 handle_vroot(h->object, active);
974 ret = cbk_node_lookup(h);
976 /* h->active_lh->node might change, but active is yet to be zrelsed */
977 zrelse(active);
979 return ret;
981 fail_or_restart:
982 if (h->result == -E_DEADLOCK)
983 return LOOKUP_REST;
984 return LOOKUP_DONE;
987 #if REISER4_DEBUG
988 /* check left and right delimiting keys of a znode */
989 void check_dkeys(znode * node)
991 znode *left;
992 znode *right;
994 read_lock_tree(current_tree);
995 read_lock_dk(current_tree);
997 assert("vs-1710", znode_is_any_locked(node));
998 assert("vs-1197",
999 !keygt(znode_get_ld_key(node), znode_get_rd_key(node)));
1001 left = node->left;
1002 right = node->right;
1004 if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
1005 && left != NULL && ZF_ISSET(left, JNODE_DKSET))
1006 /* check left neighbor. Note that left neighbor is not locked,
1007 so it might get wrong delimiting keys therefore */
1008 assert("vs-1198",
1009 (keyeq(znode_get_rd_key(left), znode_get_ld_key(node))
1010 || ZF_ISSET(left, JNODE_HEARD_BANSHEE)));
1012 if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
1013 && right != NULL && ZF_ISSET(right, JNODE_DKSET))
1014 /* check right neighbor. Note that right neighbor is not
1015 locked, so it might get wrong delimiting keys therefore */
1016 assert("vs-1199",
1017 (keyeq(znode_get_rd_key(node), znode_get_ld_key(right))
1018 || ZF_ISSET(right, JNODE_HEARD_BANSHEE)));
1020 read_unlock_dk(current_tree);
1021 read_unlock_tree(current_tree);
1023 #endif
1025 /* true if @key is left delimiting key of @node */
1026 static int key_is_ld(znode * node, const reiser4_key * key)
1028 int ld;
1030 assert("nikita-1716", node != NULL);
1031 assert("nikita-1758", key != NULL);
1033 read_lock_dk(znode_get_tree(node));
1034 assert("nikita-1759", znode_contains_key(node, key));
1035 ld = keyeq(znode_get_ld_key(node), key);
1036 read_unlock_dk(znode_get_tree(node));
1037 return ld;
1040 /* Process one node during tree traversal.
1042 This is called by cbk_level_lookup(). */
1043 static level_lookup_result cbk_node_lookup(cbk_handle * h/* search handle */)
1045 /* node plugin of @active */
1046 node_plugin *nplug;
1047 /* item plugin of item that was found */
1048 item_plugin *iplug;
1049 /* search bias */
1050 lookup_bias node_bias;
1051 /* node we are operating upon */
1052 znode *active;
1053 /* tree we are searching in */
1054 reiser4_tree *tree;
1055 /* result */
1056 int result;
1058 assert("nikita-379", h != NULL);
1060 active = h->active_lh->node;
1061 tree = h->tree;
1063 nplug = active->nplug;
1064 assert("nikita-380", nplug != NULL);
1066 ON_DEBUG(check_dkeys(active));
1068 /* return item from "active" node with maximal key not greater than
1069 "key" */
1070 node_bias = h->bias;
1071 result = nplug->lookup(active, h->key, node_bias, h->coord);
1072 if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) {
1073 /* error occurred */
1074 h->result = result;
1075 return LOOKUP_DONE;
1077 if (h->level == h->stop_level) {
1078 /* welcome to the stop level */
1079 assert("nikita-381", h->coord->node == active);
1080 if (result == NS_FOUND) {
1081 /* success of tree lookup */
1082 if (!(h->flags & CBK_UNIQUE)
1083 && key_is_ld(active, h->key))
1084 return search_to_left(h);
1085 else
1086 h->result = CBK_COORD_FOUND;
1087 } else {
1088 h->result = CBK_COORD_NOTFOUND;
1090 if (!(h->flags & CBK_IN_CACHE))
1091 cbk_cache_add(active);
1092 return LOOKUP_DONE;
1095 if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) {
1096 h->error = "not found on internal node";
1097 h->result = result;
1098 return LOOKUP_DONE;
1101 assert("vs-361", h->level > h->stop_level);
1103 if (handle_eottl(h, &result)) {
1104 assert("vs-1674", (result == LOOKUP_DONE ||
1105 result == LOOKUP_REST));
1106 return result;
1109 /* go down to next level */
1110 check_me("vs-12", zload(h->coord->node) == 0);
1111 assert("nikita-2116", item_is_internal(h->coord));
1112 iplug = item_plugin_by_coord(h->coord);
1113 iplug->s.internal.down_link(h->coord, h->key, &h->block);
1114 zrelse(h->coord->node);
1115 --h->level;
1116 return LOOKUP_CONT; /* continue */
1119 /* scan cbk_cache slots looking for a match for @h */
1120 static int cbk_cache_scan_slots(cbk_handle * h/* cbk handle */)
1122 level_lookup_result llr;
1123 znode *node;
1124 reiser4_tree *tree;
1125 cbk_cache_slot *slot;
1126 cbk_cache *cache;
1127 tree_level level;
1128 int isunique;
1129 const reiser4_key *key;
1130 int result;
1132 assert("nikita-1317", h != NULL);
1133 assert("nikita-1315", h->tree != NULL);
1134 assert("nikita-1316", h->key != NULL);
1136 tree = h->tree;
1137 cache = &tree->cbk_cache;
1138 if (cache->nr_slots == 0)
1139 /* size of cbk cache was set to 0 by mount time option. */
1140 return RETERR(-ENOENT);
1142 assert("nikita-2474", cbk_cache_invariant(cache));
1143 node = NULL; /* to keep gcc happy */
1144 level = h->level;
1145 key = h->key;
1146 isunique = h->flags & CBK_UNIQUE;
1147 result = RETERR(-ENOENT);
1150 * this is time-critical function and dragons had, hence, been settled
1151 * here.
1153 * Loop below scans cbk cache slots trying to find matching node with
1154 * suitable range of delimiting keys and located at the h->level.
1156 * Scan is done under cbk cache spin lock that protects slot->node
1157 * pointers. If suitable node is found we want to pin it in
1158 * memory. But slot->node can point to the node with x_count 0
1159 * (unreferenced). Such node can be recycled at any moment, or can
1160 * already be in the process of being recycled (within jput()).
1162 * As we found node in the cbk cache, it means that jput() hasn't yet
1163 * called cbk_cache_invalidate().
1165 * We acquire reference to the node without holding tree lock, and
1166 * later, check node's RIP bit. This avoids races with jput().
1169 rcu_read_lock();
1170 read_lock(&((cbk_cache *)cache)->guard);
1172 slot = list_entry(cache->lru.next, cbk_cache_slot, lru);
1173 slot = list_entry(slot->lru.prev, cbk_cache_slot, lru);
1174 BUG_ON(&slot->lru != &cache->lru);/*????*/
1175 while (1) {
1177 slot = list_entry(slot->lru.next, cbk_cache_slot, lru);
1179 if (&cache->lru != &slot->lru)
1180 node = slot->node;
1181 else
1182 node = NULL;
1184 if (unlikely(node == NULL))
1185 break;
1188 * this is (hopefully) the only place in the code where we are
1189 * working with delimiting keys without holding dk lock. This
1190 * is fine here, because this is only "guess" anyway---keys
1191 * are rechecked under dk lock below.
1193 if (znode_get_level(node) == level &&
1194 /* reiser4_min_key < key < reiser4_max_key */
1195 znode_contains_key_strict(node, key, isunique)) {
1196 zref(node);
1197 result = 0;
1198 spin_lock_prefetch(&tree->tree_lock);
1199 break;
1202 read_unlock(&((cbk_cache *)cache)->guard);
1204 assert("nikita-2475", cbk_cache_invariant(cache));
1206 if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP)))
1207 result = -ENOENT;
1209 rcu_read_unlock();
1211 if (result != 0) {
1212 h->result = CBK_COORD_NOTFOUND;
1213 return RETERR(-ENOENT);
1216 result =
1217 longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h),
1218 ZNODE_LOCK_LOPRI);
1219 zput(node);
1220 if (result != 0)
1221 return result;
1222 result = zload(node);
1223 if (result != 0)
1224 return result;
1226 /* recheck keys */
1227 read_lock_dk(tree);
1228 result = (znode_contains_key_strict(node, key, isunique) &&
1229 !ZF_ISSET(node, JNODE_HEARD_BANSHEE));
1230 read_unlock_dk(tree);
1231 if (result) {
1232 /* do lookup inside node */
1233 llr = cbk_node_lookup(h);
1234 /* if cbk_node_lookup() wandered to another node (due to eottl
1235 or non-unique keys), adjust @node */
1236 /*node = h->active_lh->node; */
1238 if (llr != LOOKUP_DONE) {
1239 /* restart or continue on the next level */
1240 result = RETERR(-ENOENT);
1241 } else if (IS_CBKERR(h->result))
1242 /* io or oom */
1243 result = RETERR(-ENOENT);
1244 else {
1245 /* good. Either item found or definitely not found. */
1246 result = 0;
1248 write_lock(&(cache->guard));
1249 if (slot->node == h->active_lh->node) {
1250 /* if this node is still in cbk cache---move
1251 its slot to the head of the LRU list. */
1252 list_move(&slot->lru, &cache->lru);
1254 write_unlock(&(cache->guard));
1256 } else {
1257 /* race. While this thread was waiting for the lock, node was
1258 rebalanced and item we are looking for, shifted out of it
1259 (if it ever was here).
1261 Continuing scanning is almost hopeless: node key range was
1262 moved to, is almost certainly at the beginning of the LRU
1263 list at this time, because it's hot, but restarting
1264 scanning from the very beginning is complex. Just return,
1265 so that cbk() will be performed. This is not that
1266 important, because such races should be rare. Are they?
1268 result = RETERR(-ENOENT); /* -ERAUGHT */
1270 zrelse(node);
1271 assert("nikita-2476", cbk_cache_invariant(cache));
1272 return result;
1275 /* look for item with given key in the coord cache
1277 This function, called by coord_by_key(), scans "coord cache" (&cbk_cache)
1278 which is a small LRU list of znodes accessed lately. For each znode in
1279 znode in this list, it checks whether key we are looking for fits into key
1280 range covered by this node. If so, and in addition, node lies at allowed
1281 level (this is to handle extents on a twig level), node is locked, and
1282 lookup inside it is performed.
1284 we need a measurement of the cost of this cache search compared to the cost
1285 of coord_by_key.
1288 static int cbk_cache_search(cbk_handle * h/* cbk handle */)
1290 int result = 0;
1291 tree_level level;
1293 /* add CBK_IN_CACHE to the handle flags. This means that
1294 * cbk_node_lookup() assumes that cbk_cache is scanned and would add
1295 * found node to the cache. */
1296 h->flags |= CBK_IN_CACHE;
1297 for (level = h->stop_level; level <= h->lock_level; ++level) {
1298 h->level = level;
1299 result = cbk_cache_scan_slots(h);
1300 if (result != 0) {
1301 done_lh(h->active_lh);
1302 done_lh(h->parent_lh);
1303 } else {
1304 assert("nikita-1319", !IS_CBKERR(h->result));
1305 break;
1308 h->flags &= ~CBK_IN_CACHE;
1309 return result;
1312 /* type of lock we want to obtain during tree traversal. On stop level
1313 we want type of lock user asked for, on upper levels: read lock. */
1314 znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h)
1316 assert("nikita-382", h != NULL);
1318 return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK;
1321 /* update outdated delimiting keys */
1322 static void stale_dk(reiser4_tree * tree, znode * node)
1324 znode *right;
1326 read_lock_tree(tree);
1327 write_lock_dk(tree);
1328 right = node->right;
1330 if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
1331 right && ZF_ISSET(right, JNODE_DKSET) &&
1332 !keyeq(znode_get_rd_key(node), znode_get_ld_key(right)))
1333 znode_set_rd_key(node, znode_get_ld_key(right));
1335 write_unlock_dk(tree);
1336 read_unlock_tree(tree);
1339 /* check for possibly outdated delimiting keys, and update them if
1340 * necessary. */
1341 static void update_stale_dk(reiser4_tree * tree, znode * node)
1343 znode *right;
1344 reiser4_key rd;
1346 read_lock_tree(tree);
1347 read_lock_dk(tree);
1348 rd = *znode_get_rd_key(node);
1349 right = node->right;
1350 if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
1351 right && ZF_ISSET(right, JNODE_DKSET) &&
1352 !keyeq(&rd, znode_get_ld_key(right)))) {
1353 assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET));
1354 read_unlock_dk(tree);
1355 read_unlock_tree(tree);
1356 stale_dk(tree, node);
1357 return;
1359 read_unlock_dk(tree);
1360 read_unlock_tree(tree);
1364 * handle searches a the non-unique key.
1366 * Suppose that we are looking for an item with possibly non-unique key 100.
1368 * Root node contains two pointers: one to a node with left delimiting key 0,
1369 * and another to a node with left delimiting key 100. Item we interested in
1370 * may well happen in the sub-tree rooted at the first pointer.
1372 * To handle this search_to_left() is called when search reaches stop
1373 * level. This function checks it is _possible_ that item we are looking for
1374 * is in the left neighbor (this can be done by comparing delimiting keys) and
1375 * if so, tries to lock left neighbor (this is low priority lock, so it can
1376 * deadlock, tree traversal is just restarted if it did) and then checks
1377 * whether left neighbor actually contains items with our key.
1379 * Note that this is done on the stop level only. It is possible to try such
1380 * left-check on each level, but as duplicate keys are supposed to be rare
1381 * (very unlikely that more than one node is completely filled with items with
1382 * duplicate keys), it sis cheaper to scan to the left on the stop level once.
1385 static level_lookup_result search_to_left(cbk_handle * h/* search handle */)
1387 level_lookup_result result;
1388 coord_t *coord;
1389 znode *node;
1390 znode *neighbor;
1392 lock_handle lh;
1394 assert("nikita-1761", h != NULL);
1395 assert("nikita-1762", h->level == h->stop_level);
1397 init_lh(&lh);
1398 coord = h->coord;
1399 node = h->active_lh->node;
1400 assert("nikita-1763", coord_is_leftmost_unit(coord));
1402 h->result =
1403 reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode,
1404 GN_CAN_USE_UPPER_LEVELS);
1405 neighbor = NULL;
1406 switch (h->result) {
1407 case -E_DEADLOCK:
1408 result = LOOKUP_REST;
1409 break;
1410 case 0:{
1411 node_plugin *nplug;
1412 coord_t crd;
1413 lookup_bias bias;
1415 neighbor = lh.node;
1416 h->result = zload(neighbor);
1417 if (h->result != 0) {
1418 result = LOOKUP_DONE;
1419 break;
1422 nplug = neighbor->nplug;
1424 coord_init_zero(&crd);
1425 bias = h->bias;
1426 h->bias = FIND_EXACT;
1427 h->result =
1428 nplug->lookup(neighbor, h->key, h->bias, &crd);
1429 h->bias = bias;
1431 if (h->result == NS_NOT_FOUND) {
1432 case -E_NO_NEIGHBOR:
1433 h->result = CBK_COORD_FOUND;
1434 if (!(h->flags & CBK_IN_CACHE))
1435 cbk_cache_add(node);
1436 default: /* some other error */
1437 result = LOOKUP_DONE;
1438 } else if (h->result == NS_FOUND) {
1439 read_lock_dk(znode_get_tree(neighbor));
1440 h->rd_key = *znode_get_ld_key(node);
1441 leftmost_key_in_node(neighbor, &h->ld_key);
1442 read_unlock_dk(znode_get_tree(neighbor));
1443 h->flags |= CBK_DKSET;
1445 h->block = *znode_get_block(neighbor);
1446 /* clear coord->node so that cbk_level_lookup()
1447 wouldn't overwrite parent hint in neighbor.
1449 Parent hint was set up by
1450 reiser4_get_left_neighbor()
1452 /* FIXME: why do we have to spinlock here? */
1453 write_lock_tree(znode_get_tree(neighbor));
1454 h->coord->node = NULL;
1455 write_unlock_tree(znode_get_tree(neighbor));
1456 result = LOOKUP_CONT;
1457 } else {
1458 result = LOOKUP_DONE;
1460 if (neighbor != NULL)
1461 zrelse(neighbor);
1464 done_lh(&lh);
1465 return result;
1468 /* debugging aid: return symbolic name of search bias */
1469 static const char *bias_name(lookup_bias bias/* bias to get name of */)
1471 if (bias == FIND_EXACT)
1472 return "exact";
1473 else if (bias == FIND_MAX_NOT_MORE_THAN)
1474 return "left-slant";
1475 /* else if( bias == RIGHT_SLANT_BIAS ) */
1476 /* return "right-bias"; */
1477 else {
1478 static char buf[30];
1480 sprintf(buf, "unknown: %i", bias);
1481 return buf;
1485 #if REISER4_DEBUG
1486 /* debugging aid: print human readable information about @p */
1487 void print_coord_content(const char *prefix /* prefix to print */ ,
1488 coord_t *p/* coord to print */)
1490 reiser4_key key;
1492 if (p == NULL) {
1493 printk("%s: null\n", prefix);
1494 return;
1496 if ((p->node != NULL) && znode_is_loaded(p->node)
1497 && coord_is_existing_item(p))
1498 printk("%s: data: %p, length: %i\n", prefix,
1499 item_body_by_coord(p), item_length_by_coord(p));
1500 if (znode_is_loaded(p->node)) {
1501 item_key_by_coord(p, &key);
1502 reiser4_print_key(prefix, &key);
1506 /* debugging aid: print human readable information about @block */
1507 void reiser4_print_address(const char *prefix /* prefix to print */ ,
1508 const reiser4_block_nr * block/* block number to print */)
1510 printk("%s: %s\n", prefix, sprint_address(block));
1512 #endif
1514 /* return string containing human readable representation of @block */
1515 char *sprint_address(const reiser4_block_nr *
1516 block/* block number to print */)
1518 static char address[30];
1520 if (block == NULL)
1521 sprintf(address, "null");
1522 else if (reiser4_blocknr_is_fake(block))
1523 sprintf(address, "%llx", (unsigned long long)(*block));
1524 else
1525 sprintf(address, "%llu", (unsigned long long)(*block));
1526 return address;
1529 /* release parent node during traversal */
1530 static void put_parent(cbk_handle * h/* search handle */)
1532 assert("nikita-383", h != NULL);
1533 if (h->parent_lh->node != NULL)
1534 longterm_unlock_znode(h->parent_lh);
1537 /* helper function used by coord_by_key(): release reference to parent znode
1538 stored in handle before processing its child. */
1539 static void hput(cbk_handle * h/* search handle */)
1541 assert("nikita-385", h != NULL);
1542 done_lh(h->parent_lh);
1543 done_lh(h->active_lh);
1546 /* Helper function used by cbk(): update delimiting keys of child node (stored
1547 in h->active_lh->node) using key taken from parent on the parent level. */
1548 static int setup_delimiting_keys(cbk_handle * h/* search handle */)
1550 znode *active;
1551 reiser4_tree *tree;
1553 assert("nikita-1088", h != NULL);
1555 active = h->active_lh->node;
1557 /* fast check without taking dk lock. This is safe, because
1558 * JNODE_DKSET is never cleared once set. */
1559 if (!ZF_ISSET(active, JNODE_DKSET)) {
1560 tree = znode_get_tree(active);
1561 write_lock_dk(tree);
1562 if (!ZF_ISSET(active, JNODE_DKSET)) {
1563 znode_set_ld_key(active, &h->ld_key);
1564 znode_set_rd_key(active, &h->rd_key);
1565 ZF_SET(active, JNODE_DKSET);
1567 write_unlock_dk(tree);
1568 return 1;
1570 return 0;
1573 /* true if @block makes sense for the @tree. Used to detect corrupted node
1574 * pointers */
1575 static int
1576 block_nr_is_correct(reiser4_block_nr * block /* block number to check */ ,
1577 reiser4_tree * tree/* tree to check against */)
1579 assert("nikita-757", block != NULL);
1580 assert("nikita-758", tree != NULL);
1582 /* check to see if it exceeds the size of the device. */
1583 return reiser4_blocknr_is_sane_for(tree->super, block);
1586 /* check consistency of fields */
1587 static int sanity_check(cbk_handle * h/* search handle */)
1589 assert("nikita-384", h != NULL);
1591 if (h->level < h->stop_level) {
1592 h->error = "Buried under leaves";
1593 h->result = RETERR(-EIO);
1594 return LOOKUP_DONE;
1595 } else if (!block_nr_is_correct(&h->block, h->tree)) {
1596 h->error = "bad block number";
1597 h->result = RETERR(-EIO);
1598 return LOOKUP_DONE;
1599 } else
1600 return 0;
1603 /* Make Linus happy.
1604 Local variables:
1605 c-indentation-style: "K&R"
1606 mode-name: "LC"
1607 c-basic-offset: 8
1608 tab-width: 8
1609 fill-column: 120
1610 scroll-step: 1
1611 End: