revert-mm-fix-blkdev-size-calculation-in-generic_write_checks
[linux-2.6/linux-trees-mm.git] / fs / reiser4 / search.c
blob9d35e11dfca21c12f7639ac26609a4e7a7545bdf
1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 #include "forward.h"
5 #include "debug.h"
6 #include "dformat.h"
7 #include "key.h"
8 #include "coord.h"
9 #include "seal.h"
10 #include "plugin/item/item.h"
11 #include "plugin/node/node.h"
12 #include "plugin/plugin.h"
13 #include "jnode.h"
14 #include "znode.h"
15 #include "block_alloc.h"
16 #include "tree_walk.h"
17 #include "tree.h"
18 #include "reiser4.h"
19 #include "super.h"
20 #include "inode.h"
22 #include <linux/slab.h>
24 static const char *bias_name(lookup_bias bias);
26 /* tree searching algorithm, intranode searching algorithms are in
27 plugin/node/ */
29 /* tree lookup cache
31 * The coord by key cache consists of small list of recently accessed nodes
32 * maintained according to the LRU discipline. Before doing real top-to-down
33 * tree traversal this cache is scanned for nodes that can contain key
34 * requested.
36 * The efficiency of coord cache depends heavily on locality of reference for
37 * tree accesses. Our user level simulations show reasonably good hit ratios
38 * for coord cache under most loads so far.
41 /* Initialise coord cache slot */
42 static void cbk_cache_init_slot(cbk_cache_slot *slot)
44 assert("nikita-345", slot != NULL);
46 INIT_LIST_HEAD(&slot->lru);
47 slot->node = NULL;
50 /* Initialize coord cache */
51 int cbk_cache_init(cbk_cache *cache /* cache to init */ )
53 int i;
55 assert("nikita-346", cache != NULL);
57 cache->slot =
58 kmalloc(sizeof(cbk_cache_slot) * cache->nr_slots,
59 reiser4_ctx_gfp_mask_get());
60 if (cache->slot == NULL)
61 return RETERR(-ENOMEM);
63 INIT_LIST_HEAD(&cache->lru);
64 for (i = 0; i < cache->nr_slots; ++i) {
65 cbk_cache_init_slot(cache->slot + i);
66 list_add_tail(&((cache->slot + i)->lru), &cache->lru);
68 rwlock_init(&cache->guard);
69 return 0;
72 /* free cbk cache data */
73 void cbk_cache_done(cbk_cache * cache /* cache to release */ )
75 assert("nikita-2493", cache != NULL);
76 if (cache->slot != NULL) {
77 kfree(cache->slot);
78 cache->slot = NULL;
82 /* macro to iterate over all cbk cache slots */
83 #define for_all_slots(cache, slot) \
84 for ((slot) = list_entry((cache)->lru.next, cbk_cache_slot, lru); \
85 &(cache)->lru != &(slot)->lru; \
86 (slot) = list_entry(slot->lru.next, cbk_cache_slot, lru))
88 #if REISER4_DEBUG
89 /* this function assures that [cbk-cache-invariant] invariant holds */
90 static int cbk_cache_invariant(const cbk_cache *cache)
92 cbk_cache_slot *slot;
93 int result;
94 int unused;
96 if (cache->nr_slots == 0)
97 return 1;
99 assert("nikita-2469", cache != NULL);
100 unused = 0;
101 result = 1;
102 read_lock(&((cbk_cache *)cache)->guard);
103 for_all_slots(cache, slot) {
104 /* in LRU first go all `used' slots followed by `unused' */
105 if (unused && (slot->node != NULL))
106 result = 0;
107 if (slot->node == NULL)
108 unused = 1;
109 else {
110 cbk_cache_slot *scan;
112 /* all cached nodes are different */
113 scan = slot;
114 while (result) {
115 scan = list_entry(scan->lru.next, cbk_cache_slot, lru);
116 if (&cache->lru == &scan->lru)
117 break;
118 if (slot->node == scan->node)
119 result = 0;
122 if (!result)
123 break;
125 read_unlock(&((cbk_cache *)cache)->guard);
126 return result;
129 #endif
131 /* Remove references, if any, to @node from coord cache */
132 void cbk_cache_invalidate(const znode * node /* node to remove from cache */ ,
133 reiser4_tree * tree /* tree to remove node from */ )
135 cbk_cache_slot *slot;
136 cbk_cache *cache;
137 int i;
139 assert("nikita-350", node != NULL);
140 assert("nikita-1479", LOCK_CNT_GTZ(rw_locked_tree));
142 cache = &tree->cbk_cache;
143 assert("nikita-2470", cbk_cache_invariant(cache));
145 write_lock(&(cache->guard));
146 for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
147 if (slot->node == node) {
148 list_move_tail(&slot->lru, &cache->lru);
149 slot->node = NULL;
150 break;
153 write_unlock(&(cache->guard));
154 assert("nikita-2471", cbk_cache_invariant(cache));
157 /* add to the cbk-cache in the "tree" information about "node". This
158 can actually be update of existing slot in a cache. */
159 static void cbk_cache_add(const znode *node /* node to add to the cache */ )
161 cbk_cache *cache;
162 cbk_cache_slot *slot;
163 int i;
165 assert("nikita-352", node != NULL);
167 cache = &znode_get_tree(node)->cbk_cache;
168 assert("nikita-2472", cbk_cache_invariant(cache));
170 if (cache->nr_slots == 0)
171 return;
173 write_lock(&(cache->guard));
174 /* find slot to update/add */
175 for (i = 0, slot = cache->slot; i < cache->nr_slots; ++i, ++slot) {
176 /* oops, this node is already in a cache */
177 if (slot->node == node)
178 break;
180 /* if all slots are used, reuse least recently used one */
181 if (i == cache->nr_slots) {
182 slot = list_entry(cache->lru.prev, cbk_cache_slot, lru);
183 slot->node = (znode *) node;
185 list_move(&slot->lru, &cache->lru);
186 write_unlock(&(cache->guard));
187 assert("nikita-2473", cbk_cache_invariant(cache));
190 static int setup_delimiting_keys(cbk_handle * h);
191 static lookup_result coord_by_handle(cbk_handle * handle);
192 static lookup_result traverse_tree(cbk_handle * h);
193 static int cbk_cache_search(cbk_handle * h);
195 static level_lookup_result cbk_level_lookup(cbk_handle * h);
196 static level_lookup_result cbk_node_lookup(cbk_handle * h);
198 /* helper functions */
200 static void update_stale_dk(reiser4_tree * tree, znode * node);
202 /* release parent node during traversal */
203 static void put_parent(cbk_handle * h);
204 /* check consistency of fields */
205 static int sanity_check(cbk_handle * h);
206 /* release resources in handle */
207 static void hput(cbk_handle * h);
209 static level_lookup_result search_to_left(cbk_handle * h);
211 /* pack numerous (numberous I should say) arguments of coord_by_key() into
212 * cbk_handle */
213 static cbk_handle *cbk_pack(cbk_handle * handle,
214 reiser4_tree * tree,
215 const reiser4_key * key,
216 coord_t * coord,
217 lock_handle * active_lh,
218 lock_handle * parent_lh,
219 znode_lock_mode lock_mode,
220 lookup_bias bias,
221 tree_level lock_level,
222 tree_level stop_level,
223 __u32 flags, ra_info_t * info)
225 memset(handle, 0, sizeof *handle);
227 handle->tree = tree;
228 handle->key = key;
229 handle->lock_mode = lock_mode;
230 handle->bias = bias;
231 handle->lock_level = lock_level;
232 handle->stop_level = stop_level;
233 handle->coord = coord;
234 /* set flags. See comment in tree.h:cbk_flags */
235 handle->flags = flags | CBK_TRUST_DK | CBK_USE_CRABLOCK;
237 handle->active_lh = active_lh;
238 handle->parent_lh = parent_lh;
239 handle->ra_info = info;
240 return handle;
243 /* main tree lookup procedure
245 Check coord cache. If key we are looking for is not found there, call cbk()
246 to do real tree traversal.
248 As we have extents on the twig level, @lock_level and @stop_level can
249 be different from LEAF_LEVEL and each other.
251 Thread cannot keep any reiser4 locks (tree, znode, dk spin-locks, or znode
252 long term locks) while calling this.
254 lookup_result coord_by_key(reiser4_tree * tree /* tree to perform search
255 * in. Usually this tree is
256 * part of file-system
257 * super-block */ ,
258 const reiser4_key * key /* key to look for */ ,
259 coord_t * coord /* where to store found
260 * position in a tree. Fields
261 * in "coord" are only valid if
262 * coord_by_key() returned
263 * "CBK_COORD_FOUND" */ ,
264 lock_handle * lh, /* resulting lock handle */
265 znode_lock_mode lock_mode /* type of lookup we
266 * want on node. Pass
267 * ZNODE_READ_LOCK here
268 * if you only want to
269 * read item found and
270 * ZNODE_WRITE_LOCK if
271 * you want to modify
272 * it */ ,
273 lookup_bias bias /* what to return if coord
274 * with exactly the @key is
275 * not in the tree */ ,
276 tree_level lock_level /* tree level where to start
277 * taking @lock type of
278 * locks */ ,
279 tree_level stop_level /* tree level to stop. Pass
280 * LEAF_LEVEL or TWIG_LEVEL
281 * here Item being looked
282 * for has to be between
283 * @lock_level and
284 * @stop_level, inclusive */ ,
285 __u32 flags /* search flags */ ,
286 ra_info_t *
287 info
288 /* information about desired tree traversal readahead */
291 cbk_handle handle;
292 lock_handle parent_lh;
293 lookup_result result;
295 init_lh(lh);
296 init_lh(&parent_lh);
298 assert("nikita-3023", reiser4_schedulable());
300 assert("nikita-353", tree != NULL);
301 assert("nikita-354", key != NULL);
302 assert("nikita-355", coord != NULL);
303 assert("nikita-356", (bias == FIND_EXACT)
304 || (bias == FIND_MAX_NOT_MORE_THAN));
305 assert("nikita-357", stop_level >= LEAF_LEVEL);
306 /* no locks can be held during tree traversal */
307 assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
309 cbk_pack(&handle,
310 tree,
311 key,
312 coord,
314 &parent_lh,
315 lock_mode, bias, lock_level, stop_level, flags, info);
317 result = coord_by_handle(&handle);
318 assert("nikita-3247",
319 ergo(!IS_CBKERR(result), coord->node == lh->node));
320 return result;
323 /* like coord_by_key(), but starts traversal from vroot of @object rather than
324 * from tree root. */
325 lookup_result reiser4_object_lookup(struct inode * object,
326 const reiser4_key * key,
327 coord_t * coord,
328 lock_handle * lh,
329 znode_lock_mode lock_mode,
330 lookup_bias bias,
331 tree_level lock_level,
332 tree_level stop_level, __u32 flags,
333 ra_info_t * info)
335 cbk_handle handle;
336 lock_handle parent_lh;
337 lookup_result result;
339 init_lh(lh);
340 init_lh(&parent_lh);
342 assert("nikita-3023", reiser4_schedulable());
344 assert("nikita-354", key != NULL);
345 assert("nikita-355", coord != NULL);
346 assert("nikita-356", (bias == FIND_EXACT)
347 || (bias == FIND_MAX_NOT_MORE_THAN));
348 assert("nikita-357", stop_level >= LEAF_LEVEL);
349 /* no locks can be held during tree search by key */
350 assert("nikita-2104", lock_stack_isclean(get_current_lock_stack()));
352 cbk_pack(&handle,
353 object != NULL ? reiser4_tree_by_inode(object) : current_tree,
354 key,
355 coord,
357 &parent_lh,
358 lock_mode, bias, lock_level, stop_level, flags, info);
359 handle.object = object;
361 result = coord_by_handle(&handle);
362 assert("nikita-3247",
363 ergo(!IS_CBKERR(result), coord->node == lh->node));
364 return result;
367 /* lookup by cbk_handle. Common part of coord_by_key() and
368 reiser4_object_lookup(). */
369 static lookup_result coord_by_handle(cbk_handle * handle)
372 * first check cbk_cache (which is look-aside cache for our tree) and
373 * of this fails, start traversal.
375 /* first check whether "key" is in cache of recent lookups. */
376 if (cbk_cache_search(handle) == 0)
377 return handle->result;
378 else
379 return traverse_tree(handle);
382 /* Execute actor for each item (or unit, depending on @through_units_p),
383 starting from @coord, right-ward, until either:
385 - end of the tree is reached
386 - unformatted node is met
387 - error occurred
388 - @actor returns 0 or less
390 Error code, or last actor return value is returned.
392 This is used by plugin/dir/hashe_dir.c:reiser4_find_entry() to move through
393 sequence of entries with identical keys and alikes.
395 int reiser4_iterate_tree(reiser4_tree * tree /* tree to scan */ ,
396 coord_t * coord /* coord to start from */ ,
397 lock_handle * lh /* lock handle to start with and to
398 * update along the way */ ,
399 tree_iterate_actor_t actor /* function to call on each
400 * item/unit */ ,
401 void *arg /* argument to pass to @actor */ ,
402 znode_lock_mode mode /* lock mode on scanned nodes */ ,
403 int through_units_p /* call @actor on each item or on
404 * each unit */ )
406 int result;
408 assert("nikita-1143", tree != NULL);
409 assert("nikita-1145", coord != NULL);
410 assert("nikita-1146", lh != NULL);
411 assert("nikita-1147", actor != NULL);
413 result = zload(coord->node);
414 coord_clear_iplug(coord);
415 if (result != 0)
416 return result;
417 if (!coord_is_existing_unit(coord)) {
418 zrelse(coord->node);
419 return -ENOENT;
421 while ((result = actor(tree, coord, lh, arg)) > 0) {
422 /* move further */
423 if ((through_units_p && coord_next_unit(coord)) ||
424 (!through_units_p && coord_next_item(coord))) {
425 do {
426 lock_handle couple;
428 /* move to the next node */
429 init_lh(&couple);
430 result =
431 reiser4_get_right_neighbor(&couple,
432 coord->node,
433 (int)mode,
434 GN_CAN_USE_UPPER_LEVELS);
435 zrelse(coord->node);
436 if (result == 0) {
438 result = zload(couple.node);
439 if (result != 0) {
440 done_lh(&couple);
441 return result;
444 coord_init_first_unit(coord,
445 couple.node);
446 done_lh(lh);
447 move_lh(lh, &couple);
448 } else
449 return result;
450 } while (node_is_empty(coord->node));
453 assert("nikita-1149", coord_is_existing_unit(coord));
455 zrelse(coord->node);
456 return result;
459 /* return locked uber znode for @tree */
460 int get_uber_znode(reiser4_tree * tree, znode_lock_mode mode,
461 znode_lock_request pri, lock_handle * lh)
463 int result;
465 result = longterm_lock_znode(lh, tree->uber, mode, pri);
466 return result;
469 /* true if @key is strictly within @node
471 we are looking for possibly non-unique key and it is item is at the edge of
472 @node. May be it is in the neighbor.
474 static int znode_contains_key_strict(znode * node /* node to check key
475 * against */ ,
476 const reiser4_key *
477 key /* key to check */ ,
478 int isunique)
480 int answer;
482 assert("nikita-1760", node != NULL);
483 assert("nikita-1722", key != NULL);
485 if (keyge(key, &node->rd_key))
486 return 0;
488 answer = keycmp(&node->ld_key, key);
490 if (isunique)
491 return answer != GREATER_THAN;
492 else
493 return answer == LESS_THAN;
497 * Virtual Root (vroot) code.
499 * For given file system object (e.g., regular file or directory) let's
500 * define its "virtual root" as lowest in the tree (that is, furtherest
501 * from the tree root) node such that all body items of said object are
502 * located in a tree rooted at this node.
504 * Once vroot of object is found all tree lookups for items within body of
505 * this object ("object lookups") can be started from its vroot rather
506 * than from real root. This has following advantages:
508 * 1. amount of nodes traversed during lookup (and, hence, amount of
509 * key comparisons made) decreases, and
511 * 2. contention on tree root is decreased. This latter was actually
512 * motivating reason behind vroot, because spin lock of root node,
513 * which is taken when acquiring long-term lock on root node is the
514 * hottest lock in the reiser4.
516 * How to find vroot.
518 * When vroot of object F is not yet determined, all object lookups start
519 * from the root of the tree. At each tree level during traversal we have
520 * a node N such that a key we are looking for (which is the key inside
521 * object's body) is located within N. In function handle_vroot() called
522 * from cbk_level_lookup() we check whether N is possible vroot for
523 * F. Check is trivial---if neither leftmost nor rightmost item of N
524 * belongs to F (and we already have helpful ->owns_item() method of
525 * object plugin for this), then N is possible vroot of F. This, of
526 * course, relies on the assumption that each object occupies contiguous
527 * range of keys in the tree.
529 * Thus, traversing tree downward and checking each node as we go, we can
530 * find lowest such node, which, by definition, is vroot.
532 * How to track vroot.
534 * Nohow. If actual vroot changes, next object lookup will just restart
535 * from the actual tree root, refreshing object's vroot along the way.
540 * Check whether @node is possible vroot of @object.
542 static void handle_vroot(struct inode *object, znode * node)
544 file_plugin *fplug;
545 coord_t coord;
547 fplug = inode_file_plugin(object);
548 assert("nikita-3353", fplug != NULL);
549 assert("nikita-3354", fplug->owns_item != NULL);
551 if (unlikely(node_is_empty(node)))
552 return;
554 coord_init_first_unit(&coord, node);
556 * if leftmost item of @node belongs to @object, we cannot be sure
557 * that @node is vroot of @object, because, some items of @object are
558 * probably in the sub-tree rooted at the left neighbor of @node.
560 if (fplug->owns_item(object, &coord))
561 return;
562 coord_init_last_unit(&coord, node);
563 /* mutatis mutandis for the rightmost item */
564 if (fplug->owns_item(object, &coord))
565 return;
566 /* otherwise, @node is possible vroot of @object */
567 inode_set_vroot(object, node);
571 * helper function used by traverse tree to start tree traversal not from the
572 * tree root, but from @h->object's vroot, if possible.
574 static int prepare_object_lookup(cbk_handle * h)
576 znode *vroot;
577 int result;
579 vroot = inode_get_vroot(h->object);
580 if (vroot == NULL) {
582 * object doesn't have known vroot, start from real tree root.
584 return LOOKUP_CONT;
587 h->level = znode_get_level(vroot);
588 /* take a long-term lock on vroot */
589 h->result = longterm_lock_znode(h->active_lh, vroot,
590 cbk_lock_mode(h->level, h),
591 ZNODE_LOCK_LOPRI);
592 result = LOOKUP_REST;
593 if (h->result == 0) {
594 int isunique;
595 int inside;
597 isunique = h->flags & CBK_UNIQUE;
598 /* check that key is inside vroot */
599 read_lock_dk(h->tree);
600 inside = (znode_contains_key_strict(vroot, h->key, isunique) &&
601 !ZF_ISSET(vroot, JNODE_HEARD_BANSHEE));
602 read_unlock_dk(h->tree);
603 if (inside) {
604 h->result = zload(vroot);
605 if (h->result == 0) {
606 /* search for key in vroot. */
607 result = cbk_node_lookup(h);
608 zrelse(vroot); /*h->active_lh->node); */
609 if (h->active_lh->node != vroot) {
610 result = LOOKUP_REST;
611 } else if (result == LOOKUP_CONT) {
612 move_lh(h->parent_lh, h->active_lh);
613 h->flags &= ~CBK_DKSET;
619 zput(vroot);
621 if (IS_CBKERR(h->result) || result == LOOKUP_REST)
622 hput(h);
623 return result;
626 /* main function that handles common parts of tree traversal: starting
627 (fake znode handling), restarts, error handling, completion */
628 static lookup_result traverse_tree(cbk_handle * h /* search handle */ )
630 int done;
631 int iterations;
632 int vroot_used;
634 assert("nikita-365", h != NULL);
635 assert("nikita-366", h->tree != NULL);
636 assert("nikita-367", h->key != NULL);
637 assert("nikita-368", h->coord != NULL);
638 assert("nikita-369", (h->bias == FIND_EXACT)
639 || (h->bias == FIND_MAX_NOT_MORE_THAN));
640 assert("nikita-370", h->stop_level >= LEAF_LEVEL);
641 assert("nikita-2949", !(h->flags & CBK_DKSET));
642 assert("zam-355", lock_stack_isclean(get_current_lock_stack()));
644 done = 0;
645 iterations = 0;
646 vroot_used = 0;
648 /* loop for restarts */
649 restart:
651 assert("nikita-3024", reiser4_schedulable());
653 h->result = CBK_COORD_FOUND;
654 /* connect_znode() needs it */
655 h->ld_key = *reiser4_min_key();
656 h->rd_key = *reiser4_max_key();
657 h->flags |= CBK_DKSET;
658 h->error = NULL;
660 if (!vroot_used && h->object != NULL) {
661 vroot_used = 1;
662 done = prepare_object_lookup(h);
663 if (done == LOOKUP_REST) {
664 goto restart;
665 } else if (done == LOOKUP_DONE)
666 return h->result;
668 if (h->parent_lh->node == NULL) {
669 done =
670 get_uber_znode(h->tree, ZNODE_READ_LOCK, ZNODE_LOCK_LOPRI,
671 h->parent_lh);
673 assert("nikita-1637", done != -E_DEADLOCK);
675 h->block = h->tree->root_block;
676 h->level = h->tree->height;
677 h->coord->node = h->parent_lh->node;
679 if (done != 0)
680 return done;
683 /* loop descending a tree */
684 while (!done) {
686 if (unlikely((iterations > REISER4_CBK_ITERATIONS_LIMIT) &&
687 IS_POW(iterations))) {
688 warning("nikita-1481", "Too many iterations: %i",
689 iterations);
690 reiser4_print_key("key", h->key);
691 ++iterations;
692 } else if (unlikely(iterations > REISER4_MAX_CBK_ITERATIONS)) {
693 h->error =
694 "reiser-2018: Too many iterations. Tree corrupted, or (less likely) starvation occurring.";
695 h->result = RETERR(-EIO);
696 break;
698 switch (cbk_level_lookup(h)) {
699 case LOOKUP_CONT:
700 move_lh(h->parent_lh, h->active_lh);
701 continue;
702 default:
703 wrong_return_value("nikita-372", "cbk_level");
704 case LOOKUP_DONE:
705 done = 1;
706 break;
707 case LOOKUP_REST:
708 hput(h);
709 /* deadlock avoidance is normal case. */
710 if (h->result != -E_DEADLOCK)
711 ++iterations;
712 reiser4_preempt_point();
713 goto restart;
716 /* that's all. The rest is error handling */
717 if (unlikely(h->error != NULL)) {
718 warning("nikita-373", "%s: level: %i, "
719 "lock_level: %i, stop_level: %i "
720 "lock_mode: %s, bias: %s",
721 h->error, h->level, h->lock_level, h->stop_level,
722 lock_mode_name(h->lock_mode), bias_name(h->bias));
723 reiser4_print_address("block", &h->block);
724 reiser4_print_key("key", h->key);
725 print_coord_content("coord", h->coord);
727 /* `unlikely' error case */
728 if (unlikely(IS_CBKERR(h->result))) {
729 /* failure. do cleanup */
730 hput(h);
731 } else {
732 assert("nikita-1605", WITH_DATA_RET
733 (h->coord->node, 1,
734 ergo((h->result == CBK_COORD_FOUND) &&
735 (h->bias == FIND_EXACT) &&
736 (!node_is_empty(h->coord->node)),
737 coord_is_existing_item(h->coord))));
739 return h->result;
742 /* find delimiting keys of child
744 Determine left and right delimiting keys for child pointed to by
745 @parent_coord.
748 static void find_child_delimiting_keys(znode * parent /* parent znode, passed
749 * locked */ ,
750 const coord_t * parent_coord /* coord where
751 * pointer to
752 * child is
753 * stored */ ,
754 reiser4_key * ld /* where to store left
755 * delimiting key */ ,
756 reiser4_key * rd /* where to store right
757 * delimiting key */ )
759 coord_t neighbor;
761 assert("nikita-1484", parent != NULL);
762 assert_rw_locked(&(znode_get_tree(parent)->dk_lock));
764 coord_dup(&neighbor, parent_coord);
766 if (neighbor.between == AT_UNIT)
767 /* imitate item ->lookup() behavior. */
768 neighbor.between = AFTER_UNIT;
770 if (coord_set_to_left(&neighbor) == 0)
771 unit_key_by_coord(&neighbor, ld);
772 else {
773 assert("nikita-14851", 0);
774 *ld = *znode_get_ld_key(parent);
777 coord_dup(&neighbor, parent_coord);
778 if (neighbor.between == AT_UNIT)
779 neighbor.between = AFTER_UNIT;
780 if (coord_set_to_right(&neighbor) == 0)
781 unit_key_by_coord(&neighbor, rd);
782 else
783 *rd = *znode_get_rd_key(parent);
787 * setup delimiting keys for a child
789 * @parent parent node
791 * @coord location in @parent where pointer to @child is
793 * @child child node
796 set_child_delimiting_keys(znode * parent, const coord_t * coord, znode * child)
798 reiser4_tree *tree;
800 assert("nikita-2952",
801 znode_get_level(parent) == znode_get_level(coord->node));
803 /* fast check without taking dk lock. This is safe, because
804 * JNODE_DKSET is never cleared once set. */
805 if (!ZF_ISSET(child, JNODE_DKSET)) {
806 tree = znode_get_tree(parent);
807 write_lock_dk(tree);
808 if (likely(!ZF_ISSET(child, JNODE_DKSET))) {
809 find_child_delimiting_keys(parent, coord,
810 &child->ld_key,
811 &child->rd_key);
812 ON_DEBUG(child->ld_key_version =
813 atomic_inc_return(&delim_key_version);
814 child->rd_key_version =
815 atomic_inc_return(&delim_key_version););
816 ZF_SET(child, JNODE_DKSET);
818 write_unlock_dk(tree);
819 return 1;
821 return 0;
824 /* Perform tree lookup at one level. This is called from cbk_traverse()
825 function that drives lookup through tree and calls cbk_node_lookup() to
826 perform lookup within one node.
828 See comments in a code.
830 static level_lookup_result cbk_level_lookup(cbk_handle * h /* search handle */ )
832 int ret;
833 int setdk;
834 int ldkeyset = 0;
835 reiser4_key ldkey;
836 reiser4_key key;
837 znode *active;
839 assert("nikita-3025", reiser4_schedulable());
841 /* acquire reference to @active node */
842 active =
843 zget(h->tree, &h->block, h->parent_lh->node, h->level,
844 reiser4_ctx_gfp_mask_get());
846 if (IS_ERR(active)) {
847 h->result = PTR_ERR(active);
848 return LOOKUP_DONE;
851 /* lock @active */
852 h->result = longterm_lock_znode(h->active_lh,
853 active,
854 cbk_lock_mode(h->level, h),
855 ZNODE_LOCK_LOPRI);
856 /* longterm_lock_znode() acquires additional reference to znode (which
857 will be later released by longterm_unlock_znode()). Release
858 reference acquired by zget().
860 zput(active);
861 if (unlikely(h->result != 0))
862 goto fail_or_restart;
864 setdk = 0;
865 /* if @active is accessed for the first time, setup delimiting keys on
866 it. Delimiting keys are taken from the parent node. See
867 setup_delimiting_keys() for details.
869 if (h->flags & CBK_DKSET) {
870 setdk = setup_delimiting_keys(h);
871 h->flags &= ~CBK_DKSET;
872 } else {
873 znode *parent;
875 parent = h->parent_lh->node;
876 h->result = zload(parent);
877 if (unlikely(h->result != 0))
878 goto fail_or_restart;
880 if (!ZF_ISSET(active, JNODE_DKSET))
881 setdk = set_child_delimiting_keys(parent,
882 h->coord, active);
883 else {
884 read_lock_dk(h->tree);
885 find_child_delimiting_keys(parent, h->coord, &ldkey,
886 &key);
887 read_unlock_dk(h->tree);
888 ldkeyset = 1;
890 zrelse(parent);
893 /* this is ugly kludge. Reminder: this is necessary, because
894 ->lookup() method returns coord with ->between field probably set
895 to something different from AT_UNIT.
897 h->coord->between = AT_UNIT;
899 if (znode_just_created(active) && (h->coord->node != NULL)) {
900 write_lock_tree(h->tree);
901 /* if we are going to load znode right now, setup
902 ->in_parent: coord where pointer to this node is stored in
903 parent.
905 coord_to_parent_coord(h->coord, &active->in_parent);
906 write_unlock_tree(h->tree);
909 /* check connectedness without holding tree lock---false negatives
910 * will be re-checked by connect_znode(), and false positives are
911 * impossible---@active cannot suddenly turn into unconnected
912 * state. */
913 if (!znode_is_connected(active)) {
914 h->result = connect_znode(h->coord, active);
915 if (unlikely(h->result != 0)) {
916 put_parent(h);
917 goto fail_or_restart;
921 jload_prefetch(ZJNODE(active));
923 if (setdk)
924 update_stale_dk(h->tree, active);
926 /* put_parent() cannot be called earlier, because connect_znode()
927 assumes parent node is referenced; */
928 put_parent(h);
930 if ((!znode_contains_key_lock(active, h->key) &&
931 (h->flags & CBK_TRUST_DK))
932 || ZF_ISSET(active, JNODE_HEARD_BANSHEE)) {
933 /* 1. key was moved out of this node while this thread was
934 waiting for the lock. Restart. More elaborate solution is
935 to determine where key moved (to the left, or to the right)
936 and try to follow it through sibling pointers.
938 2. or, node itself is going to be removed from the
939 tree. Release lock and restart.
941 h->result = -E_REPEAT;
943 if (h->result == -E_REPEAT)
944 return LOOKUP_REST;
946 h->result = zload_ra(active, h->ra_info);
947 if (h->result) {
948 return LOOKUP_DONE;
951 /* sanity checks */
952 if (sanity_check(h)) {
953 zrelse(active);
954 return LOOKUP_DONE;
957 /* check that key of leftmost item in the @active is the same as in
958 * its parent */
959 if (ldkeyset && !node_is_empty(active) &&
960 !keyeq(leftmost_key_in_node(active, &key), &ldkey)) {
961 warning("vs-3533", "Keys are inconsistent. Fsck?");
962 reiser4_print_key("inparent", &ldkey);
963 reiser4_print_key("inchild", &key);
964 h->result = RETERR(-EIO);
965 zrelse(active);
966 return LOOKUP_DONE;
969 if (h->object != NULL)
970 handle_vroot(h->object, active);
972 ret = cbk_node_lookup(h);
974 /* h->active_lh->node might change, but active is yet to be zrelsed */
975 zrelse(active);
977 return ret;
979 fail_or_restart:
980 if (h->result == -E_DEADLOCK)
981 return LOOKUP_REST;
982 return LOOKUP_DONE;
985 #if REISER4_DEBUG
986 /* check left and right delimiting keys of a znode */
987 void check_dkeys(znode * node)
989 znode *left;
990 znode *right;
992 read_lock_tree(current_tree);
993 read_lock_dk(current_tree);
995 assert("vs-1710", znode_is_any_locked(node));
996 assert("vs-1197",
997 !keygt(znode_get_ld_key(node), znode_get_rd_key(node)));
999 left = node->left;
1000 right = node->right;
1002 if (ZF_ISSET(node, JNODE_LEFT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
1003 && left != NULL && ZF_ISSET(left, JNODE_DKSET))
1004 /* check left neighbor. Note that left neighbor is not locked,
1005 so it might get wrong delimiting keys therefore */
1006 assert("vs-1198",
1007 (keyeq(znode_get_rd_key(left), znode_get_ld_key(node))
1008 || ZF_ISSET(left, JNODE_HEARD_BANSHEE)));
1010 if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) && ZF_ISSET(node, JNODE_DKSET)
1011 && right != NULL && ZF_ISSET(right, JNODE_DKSET))
1012 /* check right neighbor. Note that right neighbor is not
1013 locked, so it might get wrong delimiting keys therefore */
1014 assert("vs-1199",
1015 (keyeq(znode_get_rd_key(node), znode_get_ld_key(right))
1016 || ZF_ISSET(right, JNODE_HEARD_BANSHEE)));
1018 read_unlock_dk(current_tree);
1019 read_unlock_tree(current_tree);
1021 #endif
1023 /* true if @key is left delimiting key of @node */
1024 static int key_is_ld(znode * node, const reiser4_key * key)
1026 int ld;
1028 assert("nikita-1716", node != NULL);
1029 assert("nikita-1758", key != NULL);
1031 read_lock_dk(znode_get_tree(node));
1032 assert("nikita-1759", znode_contains_key(node, key));
1033 ld = keyeq(znode_get_ld_key(node), key);
1034 read_unlock_dk(znode_get_tree(node));
1035 return ld;
1038 /* Process one node during tree traversal.
1040 This is called by cbk_level_lookup(). */
1041 static level_lookup_result cbk_node_lookup(cbk_handle * h /* search handle */ )
1043 /* node plugin of @active */
1044 node_plugin *nplug;
1045 /* item plugin of item that was found */
1046 item_plugin *iplug;
1047 /* search bias */
1048 lookup_bias node_bias;
1049 /* node we are operating upon */
1050 znode *active;
1051 /* tree we are searching in */
1052 reiser4_tree *tree;
1053 /* result */
1054 int result;
1056 assert("nikita-379", h != NULL);
1058 active = h->active_lh->node;
1059 tree = h->tree;
1061 nplug = active->nplug;
1062 assert("nikita-380", nplug != NULL);
1064 ON_DEBUG(check_dkeys(active));
1066 /* return item from "active" node with maximal key not greater than
1067 "key" */
1068 node_bias = h->bias;
1069 result = nplug->lookup(active, h->key, node_bias, h->coord);
1070 if (unlikely(result != NS_FOUND && result != NS_NOT_FOUND)) {
1071 /* error occurred */
1072 h->result = result;
1073 return LOOKUP_DONE;
1075 if (h->level == h->stop_level) {
1076 /* welcome to the stop level */
1077 assert("nikita-381", h->coord->node == active);
1078 if (result == NS_FOUND) {
1079 /* success of tree lookup */
1080 if (!(h->flags & CBK_UNIQUE)
1081 && key_is_ld(active, h->key)) {
1082 return search_to_left(h);
1083 } else
1084 h->result = CBK_COORD_FOUND;
1085 } else {
1086 h->result = CBK_COORD_NOTFOUND;
1088 if (!(h->flags & CBK_IN_CACHE))
1089 cbk_cache_add(active);
1090 return LOOKUP_DONE;
1093 if (h->level > TWIG_LEVEL && result == NS_NOT_FOUND) {
1094 h->error = "not found on internal node";
1095 h->result = result;
1096 return LOOKUP_DONE;
1099 assert("vs-361", h->level > h->stop_level);
1101 if (handle_eottl(h, &result)) {
1102 assert("vs-1674", (result == LOOKUP_DONE ||
1103 result == LOOKUP_REST));
1104 return result;
1107 /* go down to next level */
1108 check_me("vs-12", zload(h->coord->node) == 0);
1109 assert("nikita-2116", item_is_internal(h->coord));
1110 iplug = item_plugin_by_coord(h->coord);
1111 iplug->s.internal.down_link(h->coord, h->key, &h->block);
1112 zrelse(h->coord->node);
1113 --h->level;
1114 return LOOKUP_CONT; /* continue */
1117 /* scan cbk_cache slots looking for a match for @h */
1118 static int cbk_cache_scan_slots(cbk_handle * h /* cbk handle */ )
1120 level_lookup_result llr;
1121 znode *node;
1122 reiser4_tree *tree;
1123 cbk_cache_slot *slot;
1124 cbk_cache *cache;
1125 tree_level level;
1126 int isunique;
1127 const reiser4_key *key;
1128 int result;
1130 assert("nikita-1317", h != NULL);
1131 assert("nikita-1315", h->tree != NULL);
1132 assert("nikita-1316", h->key != NULL);
1134 tree = h->tree;
1135 cache = &tree->cbk_cache;
1136 if (cache->nr_slots == 0)
1137 /* size of cbk cache was set to 0 by mount time option. */
1138 return RETERR(-ENOENT);
1140 assert("nikita-2474", cbk_cache_invariant(cache));
1141 node = NULL; /* to keep gcc happy */
1142 level = h->level;
1143 key = h->key;
1144 isunique = h->flags & CBK_UNIQUE;
1145 result = RETERR(-ENOENT);
1148 * this is time-critical function and dragons had, hence, been settled
1149 * here.
1151 * Loop below scans cbk cache slots trying to find matching node with
1152 * suitable range of delimiting keys and located at the h->level.
1154 * Scan is done under cbk cache spin lock that protects slot->node
1155 * pointers. If suitable node is found we want to pin it in
1156 * memory. But slot->node can point to the node with x_count 0
1157 * (unreferenced). Such node can be recycled at any moment, or can
1158 * already be in the process of being recycled (within jput()).
1160 * As we found node in the cbk cache, it means that jput() hasn't yet
1161 * called cbk_cache_invalidate().
1163 * We acquire reference to the node without holding tree lock, and
1164 * later, check node's RIP bit. This avoids races with jput().
1167 rcu_read_lock();
1168 read_lock(&((cbk_cache *)cache)->guard);
1170 slot = list_entry(cache->lru.next, cbk_cache_slot, lru);
1171 slot = list_entry(slot->lru.prev, cbk_cache_slot, lru);
1172 BUG_ON(&slot->lru != &cache->lru);/*????*/
1173 while (1) {
1175 slot = list_entry(slot->lru.next, cbk_cache_slot, lru);
1177 if (&cache->lru != &slot->lru)
1178 node = slot->node;
1179 else
1180 node = NULL;
1182 if (unlikely(node == NULL))
1183 break;
1186 * this is (hopefully) the only place in the code where we are
1187 * working with delimiting keys without holding dk lock. This
1188 * is fine here, because this is only "guess" anyway---keys
1189 * are rechecked under dk lock below.
1191 if (znode_get_level(node) == level &&
1192 /* reiser4_min_key < key < reiser4_max_key */
1193 znode_contains_key_strict(node, key, isunique)) {
1194 zref(node);
1195 result = 0;
1196 spin_lock_prefetch(&tree->tree_lock);
1197 break;
1200 read_unlock(&((cbk_cache *)cache)->guard);
1202 assert("nikita-2475", cbk_cache_invariant(cache));
1204 if (unlikely(result == 0 && ZF_ISSET(node, JNODE_RIP)))
1205 result = -ENOENT;
1207 rcu_read_unlock();
1209 if (result != 0) {
1210 h->result = CBK_COORD_NOTFOUND;
1211 return RETERR(-ENOENT);
1214 result =
1215 longterm_lock_znode(h->active_lh, node, cbk_lock_mode(level, h),
1216 ZNODE_LOCK_LOPRI);
1217 zput(node);
1218 if (result != 0)
1219 return result;
1220 result = zload(node);
1221 if (result != 0)
1222 return result;
1224 /* recheck keys */
1225 read_lock_dk(tree);
1226 result = (znode_contains_key_strict(node, key, isunique) &&
1227 !ZF_ISSET(node, JNODE_HEARD_BANSHEE));
1228 read_unlock_dk(tree);
1229 if (result) {
1230 /* do lookup inside node */
1231 llr = cbk_node_lookup(h);
1232 /* if cbk_node_lookup() wandered to another node (due to eottl
1233 or non-unique keys), adjust @node */
1234 /*node = h->active_lh->node; */
1236 if (llr != LOOKUP_DONE) {
1237 /* restart or continue on the next level */
1238 result = RETERR(-ENOENT);
1239 } else if (IS_CBKERR(h->result))
1240 /* io or oom */
1241 result = RETERR(-ENOENT);
1242 else {
1243 /* good. Either item found or definitely not found. */
1244 result = 0;
1246 write_lock(&(cache->guard));
1247 if (slot->node == h->active_lh->node /*node */ ) {
1248 /* if this node is still in cbk cache---move
1249 its slot to the head of the LRU list. */
1250 list_move(&slot->lru, &cache->lru);
1252 write_unlock(&(cache->guard));
1254 } else {
1255 /* race. While this thread was waiting for the lock, node was
1256 rebalanced and item we are looking for, shifted out of it
1257 (if it ever was here).
1259 Continuing scanning is almost hopeless: node key range was
1260 moved to, is almost certainly at the beginning of the LRU
1261 list at this time, because it's hot, but restarting
1262 scanning from the very beginning is complex. Just return,
1263 so that cbk() will be performed. This is not that
1264 important, because such races should be rare. Are they?
1266 result = RETERR(-ENOENT); /* -ERAUGHT */
1268 zrelse(node);
1269 assert("nikita-2476", cbk_cache_invariant(cache));
1270 return result;
1273 /* look for item with given key in the coord cache
1275 This function, called by coord_by_key(), scans "coord cache" (&cbk_cache)
1276 which is a small LRU list of znodes accessed lately. For each znode in
1277 znode in this list, it checks whether key we are looking for fits into key
1278 range covered by this node. If so, and in addition, node lies at allowed
1279 level (this is to handle extents on a twig level), node is locked, and
1280 lookup inside it is performed.
1282 we need a measurement of the cost of this cache search compared to the cost
1283 of coord_by_key.
1286 static int cbk_cache_search(cbk_handle * h /* cbk handle */ )
1288 int result = 0;
1289 tree_level level;
1291 /* add CBK_IN_CACHE to the handle flags. This means that
1292 * cbk_node_lookup() assumes that cbk_cache is scanned and would add
1293 * found node to the cache. */
1294 h->flags |= CBK_IN_CACHE;
1295 for (level = h->stop_level; level <= h->lock_level; ++level) {
1296 h->level = level;
1297 result = cbk_cache_scan_slots(h);
1298 if (result != 0) {
1299 done_lh(h->active_lh);
1300 done_lh(h->parent_lh);
1301 } else {
1302 assert("nikita-1319", !IS_CBKERR(h->result));
1303 break;
1306 h->flags &= ~CBK_IN_CACHE;
1307 return result;
1310 /* type of lock we want to obtain during tree traversal. On stop level
1311 we want type of lock user asked for, on upper levels: read lock. */
1312 znode_lock_mode cbk_lock_mode(tree_level level, cbk_handle * h)
1314 assert("nikita-382", h != NULL);
1316 return (level <= h->lock_level) ? h->lock_mode : ZNODE_READ_LOCK;
1319 /* update outdated delimiting keys */
1320 static void stale_dk(reiser4_tree * tree, znode * node)
1322 znode *right;
1324 read_lock_tree(tree);
1325 write_lock_dk(tree);
1326 right = node->right;
1328 if (ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
1329 right && ZF_ISSET(right, JNODE_DKSET) &&
1330 !keyeq(znode_get_rd_key(node), znode_get_ld_key(right)))
1331 znode_set_rd_key(node, znode_get_ld_key(right));
1333 write_unlock_dk(tree);
1334 read_unlock_tree(tree);
1337 /* check for possibly outdated delimiting keys, and update them if
1338 * necessary. */
1339 static void update_stale_dk(reiser4_tree * tree, znode * node)
1341 znode *right;
1342 reiser4_key rd;
1344 read_lock_tree(tree);
1345 read_lock_dk(tree);
1346 rd = *znode_get_rd_key(node);
1347 right = node->right;
1348 if (unlikely(ZF_ISSET(node, JNODE_RIGHT_CONNECTED) &&
1349 right && ZF_ISSET(right, JNODE_DKSET) &&
1350 !keyeq(&rd, znode_get_ld_key(right)))) {
1351 assert("nikita-38211", ZF_ISSET(node, JNODE_DKSET));
1352 read_unlock_dk(tree);
1353 read_unlock_tree(tree);
1354 stale_dk(tree, node);
1355 return;
1357 read_unlock_dk(tree);
1358 read_unlock_tree(tree);
1362 * handle searches a the non-unique key.
1364 * Suppose that we are looking for an item with possibly non-unique key 100.
1366 * Root node contains two pointers: one to a node with left delimiting key 0,
1367 * and another to a node with left delimiting key 100. Item we interested in
1368 * may well happen in the sub-tree rooted at the first pointer.
1370 * To handle this search_to_left() is called when search reaches stop
1371 * level. This function checks it is _possible_ that item we are looking for
1372 * is in the left neighbor (this can be done by comparing delimiting keys) and
1373 * if so, tries to lock left neighbor (this is low priority lock, so it can
1374 * deadlock, tree traversal is just restarted if it did) and then checks
1375 * whether left neighbor actually contains items with our key.
1377 * Note that this is done on the stop level only. It is possible to try such
1378 * left-check on each level, but as duplicate keys are supposed to be rare
1379 * (very unlikely that more than one node is completely filled with items with
1380 * duplicate keys), it sis cheaper to scan to the left on the stop level once.
1383 static level_lookup_result search_to_left(cbk_handle * h /* search handle */ )
1385 level_lookup_result result;
1386 coord_t *coord;
1387 znode *node;
1388 znode *neighbor;
1390 lock_handle lh;
1392 assert("nikita-1761", h != NULL);
1393 assert("nikita-1762", h->level == h->stop_level);
1395 init_lh(&lh);
1396 coord = h->coord;
1397 node = h->active_lh->node;
1398 assert("nikita-1763", coord_is_leftmost_unit(coord));
1400 h->result =
1401 reiser4_get_left_neighbor(&lh, node, (int)h->lock_mode,
1402 GN_CAN_USE_UPPER_LEVELS);
1403 neighbor = NULL;
1404 switch (h->result) {
1405 case -E_DEADLOCK:
1406 result = LOOKUP_REST;
1407 break;
1408 case 0:{
1409 node_plugin *nplug;
1410 coord_t crd;
1411 lookup_bias bias;
1413 neighbor = lh.node;
1414 h->result = zload(neighbor);
1415 if (h->result != 0) {
1416 result = LOOKUP_DONE;
1417 break;
1420 nplug = neighbor->nplug;
1422 coord_init_zero(&crd);
1423 bias = h->bias;
1424 h->bias = FIND_EXACT;
1425 h->result =
1426 nplug->lookup(neighbor, h->key, h->bias, &crd);
1427 h->bias = bias;
1429 if (h->result == NS_NOT_FOUND) {
1430 case -E_NO_NEIGHBOR:
1431 h->result = CBK_COORD_FOUND;
1432 if (!(h->flags & CBK_IN_CACHE))
1433 cbk_cache_add(node);
1434 default: /* some other error */
1435 result = LOOKUP_DONE;
1436 } else if (h->result == NS_FOUND) {
1437 read_lock_dk(znode_get_tree(neighbor));
1438 h->rd_key = *znode_get_ld_key(node);
1439 leftmost_key_in_node(neighbor, &h->ld_key);
1440 read_unlock_dk(znode_get_tree(neighbor));
1441 h->flags |= CBK_DKSET;
1443 h->block = *znode_get_block(neighbor);
1444 /* clear coord -> node so that cbk_level_lookup()
1445 wouldn't overwrite parent hint in neighbor.
1447 Parent hint was set up by
1448 reiser4_get_left_neighbor()
1450 /* FIXME: why do we have to spinlock here? */
1451 write_lock_tree(znode_get_tree(neighbor));
1452 h->coord->node = NULL;
1453 write_unlock_tree(znode_get_tree(neighbor));
1454 result = LOOKUP_CONT;
1455 } else {
1456 result = LOOKUP_DONE;
1458 if (neighbor != NULL)
1459 zrelse(neighbor);
1462 done_lh(&lh);
1463 return result;
1466 /* debugging aid: return symbolic name of search bias */
1467 static const char *bias_name(lookup_bias bias /* bias to get name of */ )
1469 if (bias == FIND_EXACT)
1470 return "exact";
1471 else if (bias == FIND_MAX_NOT_MORE_THAN)
1472 return "left-slant";
1473 /* else if( bias == RIGHT_SLANT_BIAS ) */
1474 /* return "right-bias"; */
1475 else {
1476 static char buf[30];
1478 sprintf(buf, "unknown: %i", bias);
1479 return buf;
1483 #if REISER4_DEBUG
1484 /* debugging aid: print human readable information about @p */
1485 void print_coord_content(const char *prefix /* prefix to print */ ,
1486 coord_t * p /* coord to print */ )
1488 reiser4_key key;
1490 if (p == NULL) {
1491 printk("%s: null\n", prefix);
1492 return;
1494 if ((p->node != NULL) && znode_is_loaded(p->node)
1495 && coord_is_existing_item(p))
1496 printk("%s: data: %p, length: %i\n", prefix,
1497 item_body_by_coord(p), item_length_by_coord(p));
1498 if (znode_is_loaded(p->node)) {
1499 item_key_by_coord(p, &key);
1500 reiser4_print_key(prefix, &key);
1504 /* debugging aid: print human readable information about @block */
1505 void reiser4_print_address(const char *prefix /* prefix to print */ ,
1506 const reiser4_block_nr * block /* block number to print */ )
1508 printk("%s: %s\n", prefix, sprint_address(block));
1510 #endif
1512 /* return string containing human readable representation of @block */
1513 char *sprint_address(const reiser4_block_nr *
1514 block /* block number to print */ )
1516 static char address[30];
1518 if (block == NULL)
1519 sprintf(address, "null");
1520 else if (reiser4_blocknr_is_fake(block))
1521 sprintf(address, "%llx", (unsigned long long)(*block));
1522 else
1523 sprintf(address, "%llu", (unsigned long long)(*block));
1524 return address;
1527 /* release parent node during traversal */
1528 static void put_parent(cbk_handle * h /* search handle */ )
1530 assert("nikita-383", h != NULL);
1531 if (h->parent_lh->node != NULL) {
1532 longterm_unlock_znode(h->parent_lh);
1536 /* helper function used by coord_by_key(): release reference to parent znode
1537 stored in handle before processing its child. */
1538 static void hput(cbk_handle * h /* search handle */ )
1540 assert("nikita-385", h != NULL);
1541 done_lh(h->parent_lh);
1542 done_lh(h->active_lh);
1545 /* Helper function used by cbk(): update delimiting keys of child node (stored
1546 in h->active_lh->node) using key taken from parent on the parent level. */
1547 static int setup_delimiting_keys(cbk_handle * h /* search handle */ )
1549 znode *active;
1550 reiser4_tree *tree;
1552 assert("nikita-1088", h != NULL);
1554 active = h->active_lh->node;
1556 /* fast check without taking dk lock. This is safe, because
1557 * JNODE_DKSET is never cleared once set. */
1558 if (!ZF_ISSET(active, JNODE_DKSET)) {
1559 tree = znode_get_tree(active);
1560 write_lock_dk(tree);
1561 if (!ZF_ISSET(active, JNODE_DKSET)) {
1562 znode_set_ld_key(active, &h->ld_key);
1563 znode_set_rd_key(active, &h->rd_key);
1564 ZF_SET(active, JNODE_DKSET);
1566 write_unlock_dk(tree);
1567 return 1;
1569 return 0;
1572 /* true if @block makes sense for the @tree. Used to detect corrupted node
1573 * pointers */
1574 static int
1575 block_nr_is_correct(reiser4_block_nr * block /* block number to check */ ,
1576 reiser4_tree * tree /* tree to check against */ )
1578 assert("nikita-757", block != NULL);
1579 assert("nikita-758", tree != NULL);
1581 /* check to see if it exceeds the size of the device. */
1582 return reiser4_blocknr_is_sane_for(tree->super, block);
1585 /* check consistency of fields */
1586 static int sanity_check(cbk_handle * h /* search handle */ )
1588 assert("nikita-384", h != NULL);
1590 if (h->level < h->stop_level) {
1591 h->error = "Buried under leaves";
1592 h->result = RETERR(-EIO);
1593 return LOOKUP_DONE;
1594 } else if (!block_nr_is_correct(&h->block, h->tree)) {
1595 h->error = "bad block number";
1596 h->result = RETERR(-EIO);
1597 return LOOKUP_DONE;
1598 } else
1599 return 0;
1602 /* Make Linus happy.
1603 Local variables:
1604 c-indentation-style: "K&R"
1605 mode-name: "LC"
1606 c-basic-offset: 8
1607 tab-width: 8
1608 fill-column: 120
1609 scroll-step: 1
1610 End: