mm-only debug patch...
[mmotm.git] / fs / reiser4 / jnode.c
blob150bcd49ea48594216f4188b4d90d65978e787f6
1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
2 * reiser4/README */
3 /* Jnode manipulation functions. */
4 /* Jnode is entity used to track blocks with data and meta-data in reiser4.
6 In particular, jnodes are used to track transactional information
7 associated with each block. Each znode contains jnode as ->zjnode field.
9 Jnode stands for either Josh or Journal node.
13 * Taxonomy.
15 * Jnode represents block containing data or meta-data. There are jnodes
16 * for:
18 * unformatted blocks (jnodes proper). There are plans, however to
19 * have a handle per extent unit rather than per each unformatted
20 * block, because there are so many of them.
22 * For bitmaps. Each bitmap is actually represented by two jnodes--one
23 * for working and another for "commit" data, together forming bnode.
25 * For io-heads. These are used by log writer.
27 * For formatted nodes (znode). See comment at the top of znode.c for
28 * details specific to the formatted nodes (znodes).
30 * Node data.
32 * Jnode provides access to the data of node it represents. Data are
33 * stored in a page. Page is kept in a page cache. This means, that jnodes
34 * are highly interconnected with page cache and VM internals.
36 * jnode has a pointer to page (->pg) containing its data. Pointer to data
37 * themselves is cached in ->data field to avoid frequent calls to
38 * page_address().
40 * jnode and page are attached to each other by jnode_attach_page(). This
41 * function places pointer to jnode in set_page_private(), sets PG_private
42 * flag and increments page counter.
44 * Opposite operation is performed by page_clear_jnode().
46 * jnode->pg is protected by jnode spin lock, and page->private is
47 * protected by page lock. See comment at the top of page_cache.c for
48 * more.
50 * page can be detached from jnode for two reasons:
52 * . jnode is removed from a tree (file is truncated, of formatted
53 * node is removed by balancing).
55 * . during memory pressure, VM calls ->releasepage() method
56 * (reiser4_releasepage()) to evict page from memory.
58 * (there, of course, is also umount, but this is special case we are not
59 * concerned with here).
61 * To protect jnode page from eviction, one calls jload() function that
62 * "pins" page in memory (loading it if necessary), increments
63 * jnode->d_count, and kmap()s page. Page is unpinned through call to
64 * jrelse().
66 * Jnode life cycle.
68 * jnode is created, placed in hash table, and, optionally, in per-inode
69 * radix tree. Page can be attached to jnode, pinned, released, etc.
71 * When jnode is captured into atom its reference counter is
72 * increased. While being part of an atom, jnode can be "early
73 * flushed". This means that as part of flush procedure, jnode is placed
74 * into "relocate set", and its page is submitted to the disk. After io
75 * completes, page can be detached, then loaded again, re-dirtied, etc.
77 * Thread acquired reference to jnode by calling jref() and releases it by
78 * jput(). When last reference is removed, jnode is still retained in
79 * memory (cached) if it has page attached, _unless_ it is scheduled for
80 * destruction (has JNODE_HEARD_BANSHEE bit set).
82 * Tree read-write lock was used as "existential" lock for jnodes. That is,
83 * jnode->x_count could be changed from 0 to 1 only under tree write lock,
84 * that is, tree lock protected unreferenced jnodes stored in the hash
85 * table, from recycling.
87 * This resulted in high contention on tree lock, because jref()/jput() is
88 * frequent operation. To ameliorate this problem, RCU is used: when jput()
89 * is just about to release last reference on jnode it sets JNODE_RIP bit
90 * on it, and then proceed with jnode destruction (removing jnode from hash
91 * table, cbk_cache, detaching page, etc.). All places that change jnode
92 * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and
93 * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by
94 * jnode_rip_check() function), and pretend that nothing was found in hash
95 * table if bit is set.
97 * jput defers actual return of jnode into slab cache to some later time
98 * (by call_rcu()), this guarantees that other threads can safely continue
99 * working with JNODE_RIP-ped jnode.
103 #include "reiser4.h"
104 #include "debug.h"
105 #include "dformat.h"
106 #include "jnode.h"
107 #include "plugin/plugin_header.h"
108 #include "plugin/plugin.h"
109 #include "txnmgr.h"
110 /*#include "jnode.h"*/
111 #include "znode.h"
112 #include "tree.h"
113 #include "tree_walk.h"
114 #include "super.h"
115 #include "inode.h"
116 #include "page_cache.h"
118 #include <asm/uaccess.h> /* UML needs this for PAGE_OFFSET */
119 #include <linux/types.h>
120 #include <linux/slab.h>
121 #include <linux/pagemap.h>
122 #include <linux/swap.h>
123 #include <linux/fs.h> /* for struct address_space */
124 #include <linux/writeback.h> /* for inode_lock */
126 static struct kmem_cache *_jnode_slab = NULL;
128 static void jnode_set_type(jnode * node, jnode_type type);
129 static int jdelete(jnode * node);
130 static int jnode_try_drop(jnode * node);
132 #if REISER4_DEBUG
133 static int jnode_invariant(const jnode * node, int tlocked, int jlocked);
134 #endif
136 /* true if valid page is attached to jnode */
137 static inline int jnode_is_parsed(jnode * node)
139 return JF_ISSET(node, JNODE_PARSED);
142 /* hash table support */
144 /* compare two jnode keys for equality. Used by hash-table macros */
145 static inline int jnode_key_eq(const struct jnode_key *k1,
146 const struct jnode_key *k2)
148 assert("nikita-2350", k1 != NULL);
149 assert("nikita-2351", k2 != NULL);
151 return (k1->index == k2->index && k1->objectid == k2->objectid);
154 /* Hash jnode by its key (inode plus offset). Used by hash-table macros */
155 static inline __u32 jnode_key_hashfn(j_hash_table * table,
156 const struct jnode_key *key)
158 assert("nikita-2352", key != NULL);
159 assert("nikita-3346", IS_POW(table->_buckets));
161 /* yes, this is remarkable simply (where not stupid) hash function. */
162 return (key->objectid + key->index) & (table->_buckets - 1);
165 /* The hash table definition */
166 #define KMALLOC(size) reiser4_vmalloc(size)
167 #define KFREE(ptr, size) vfree(ptr)
168 TYPE_SAFE_HASH_DEFINE(j, jnode, struct jnode_key, key.j, link.j,
169 jnode_key_hashfn, jnode_key_eq);
170 #undef KFREE
171 #undef KMALLOC
173 /* call this to initialise jnode hash table */
174 int jnodes_tree_init(reiser4_tree * tree/* tree to initialise jnodes for */)
176 assert("nikita-2359", tree != NULL);
177 return j_hash_init(&tree->jhash_table, 16384);
180 /* call this to destroy jnode hash table. This is called during umount. */
181 int jnodes_tree_done(reiser4_tree * tree/* tree to destroy jnodes for */)
183 j_hash_table *jtable;
184 jnode *node;
185 jnode *next;
187 assert("nikita-2360", tree != NULL);
190 * Scan hash table and free all jnodes.
192 jtable = &tree->jhash_table;
193 if (jtable->_table) {
194 for_all_in_htable(jtable, j, node, next) {
195 assert("nikita-2361", !atomic_read(&node->x_count));
196 jdrop(node);
199 j_hash_done(&tree->jhash_table);
201 return 0;
205 * init_jnodes - create jnode cache
207 * Initializes slab cache jnodes. It is part of reiser4 module initialization.
209 int init_jnodes(void)
211 assert("umka-168", _jnode_slab == NULL);
213 _jnode_slab = kmem_cache_create("jnode", sizeof(jnode), 0,
214 SLAB_HWCACHE_ALIGN |
215 SLAB_RECLAIM_ACCOUNT, NULL);
216 if (_jnode_slab == NULL)
217 return RETERR(-ENOMEM);
219 return 0;
223 * done_znodes - delete znode cache
225 * This is called on reiser4 module unloading or system shutdown.
227 void done_jnodes(void)
229 destroy_reiser4_cache(&_jnode_slab);
232 /* Initialize a jnode. */
233 void jnode_init(jnode * node, reiser4_tree * tree, jnode_type type)
235 assert("umka-175", node != NULL);
237 memset(node, 0, sizeof(jnode));
238 ON_DEBUG(node->magic = JMAGIC);
239 jnode_set_type(node, type);
240 atomic_set(&node->d_count, 0);
241 atomic_set(&node->x_count, 0);
242 spin_lock_init(&node->guard);
243 spin_lock_init(&node->load);
244 node->atom = NULL;
245 node->tree = tree;
246 INIT_LIST_HEAD(&node->capture_link);
248 ASSIGN_NODE_LIST(node, NOT_CAPTURED);
250 INIT_RCU_HEAD(&node->rcu);
252 #if REISER4_DEBUG
254 reiser4_super_info_data *sbinfo;
256 sbinfo = get_super_private(tree->super);
257 spin_lock_irq(&sbinfo->all_guard);
258 list_add(&node->jnodes, &sbinfo->all_jnodes);
259 spin_unlock_irq(&sbinfo->all_guard);
261 #endif
264 #if REISER4_DEBUG
266 * Remove jnode from ->all_jnodes list.
268 static void jnode_done(jnode * node, reiser4_tree * tree)
270 reiser4_super_info_data *sbinfo;
272 sbinfo = get_super_private(tree->super);
274 spin_lock_irq(&sbinfo->all_guard);
275 assert("nikita-2422", !list_empty(&node->jnodes));
276 list_del_init(&node->jnodes);
277 spin_unlock_irq(&sbinfo->all_guard);
279 #endif
281 /* return already existing jnode of page */
282 jnode *jnode_by_page(struct page *pg)
284 assert("nikita-2066", pg != NULL);
285 assert("nikita-2400", PageLocked(pg));
286 assert("nikita-2068", PagePrivate(pg));
287 assert("nikita-2067", jprivate(pg) != NULL);
288 return jprivate(pg);
291 /* exported functions to allocate/free jnode objects outside this file */
292 jnode *jalloc(void)
294 jnode *jal = kmem_cache_alloc(_jnode_slab, reiser4_ctx_gfp_mask_get());
295 return jal;
298 /* return jnode back to the slab allocator */
299 inline void jfree(jnode * node)
301 assert("zam-449", node != NULL);
303 assert("nikita-2663", (list_empty_careful(&node->capture_link) &&
304 NODE_LIST(node) == NOT_CAPTURED));
305 assert("nikita-3222", list_empty(&node->jnodes));
306 assert("nikita-3221", jnode_page(node) == NULL);
308 /* not yet phash_jnode_destroy(node); */
310 kmem_cache_free(_jnode_slab, node);
314 * This function is supplied as RCU callback. It actually frees jnode when
315 * last reference to it is gone.
317 static void jnode_free_actor(struct rcu_head *head)
319 jnode *node;
320 jnode_type jtype;
322 node = container_of(head, jnode, rcu);
323 jtype = jnode_get_type(node);
325 ON_DEBUG(jnode_done(node, jnode_get_tree(node)));
327 switch (jtype) {
328 case JNODE_IO_HEAD:
329 case JNODE_BITMAP:
330 case JNODE_UNFORMATTED_BLOCK:
331 jfree(node);
332 break;
333 case JNODE_FORMATTED_BLOCK:
334 zfree(JZNODE(node));
335 break;
336 case JNODE_INODE:
337 default:
338 wrong_return_value("nikita-3197", "Wrong jnode type");
343 * Free a jnode. Post a callback to be executed later through RCU when all
344 * references to @node are released.
346 static inline void jnode_free(jnode * node, jnode_type jtype)
348 if (jtype != JNODE_INODE) {
349 /*assert("nikita-3219", list_empty(&node->rcu.list)); */
350 call_rcu(&node->rcu, jnode_free_actor);
351 } else
352 jnode_list_remove(node);
355 /* allocate new unformatted jnode */
356 static jnode *jnew_unformatted(void)
358 jnode *jal;
360 jal = jalloc();
361 if (jal == NULL)
362 return NULL;
364 jnode_init(jal, current_tree, JNODE_UNFORMATTED_BLOCK);
365 jal->key.j.mapping = NULL;
366 jal->key.j.index = (unsigned long)-1;
367 jal->key.j.objectid = 0;
368 return jal;
371 /* look for jnode with given mapping and offset within hash table */
372 jnode *jlookup(reiser4_tree * tree, oid_t objectid, unsigned long index)
374 struct jnode_key jkey;
375 jnode *node;
377 assert("nikita-2353", tree != NULL);
379 jkey.objectid = objectid;
380 jkey.index = index;
383 * hash table is _not_ protected by any lock during lookups. All we
384 * have to do is to disable preemption to keep RCU happy.
387 rcu_read_lock();
388 node = j_hash_find(&tree->jhash_table, &jkey);
389 if (node != NULL) {
390 /* protect @node from recycling */
391 jref(node);
392 assert("nikita-2955", jnode_invariant(node, 0, 0));
393 node = jnode_rip_check(tree, node);
395 rcu_read_unlock();
396 return node;
399 /* per inode radix tree of jnodes is protected by tree's read write spin lock */
400 static jnode *jfind_nolock(struct address_space *mapping, unsigned long index)
402 assert("vs-1694", mapping->host != NULL);
404 return radix_tree_lookup(jnode_tree_by_inode(mapping->host), index);
407 jnode *jfind(struct address_space *mapping, unsigned long index)
409 reiser4_tree *tree;
410 jnode *node;
412 assert("vs-1694", mapping->host != NULL);
413 tree = reiser4_tree_by_inode(mapping->host);
415 read_lock_tree(tree);
416 node = jfind_nolock(mapping, index);
417 if (node != NULL)
418 jref(node);
419 read_unlock_tree(tree);
420 return node;
423 static void inode_attach_jnode(jnode * node)
425 struct inode *inode;
426 reiser4_inode *info;
427 struct radix_tree_root *rtree;
429 assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
430 assert("zam-1043", node->key.j.mapping != NULL);
431 inode = node->key.j.mapping->host;
432 info = reiser4_inode_data(inode);
433 rtree = jnode_tree_by_reiser4_inode(info);
434 if (rtree->rnode == NULL) {
435 /* prevent inode from being pruned when it has jnodes attached
436 to it */
437 spin_lock_irq(&inode->i_data.tree_lock);
438 inode->i_data.nrpages++;
439 spin_unlock_irq(&inode->i_data.tree_lock);
441 assert("zam-1049", equi(rtree->rnode != NULL, info->nr_jnodes != 0));
442 check_me("zam-1045",
443 !radix_tree_insert(rtree, node->key.j.index, node));
444 ON_DEBUG(info->nr_jnodes++);
447 static void inode_detach_jnode(jnode * node)
449 struct inode *inode;
450 reiser4_inode *info;
451 struct radix_tree_root *rtree;
453 assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
454 assert("zam-1044", node->key.j.mapping != NULL);
455 inode = node->key.j.mapping->host;
456 info = reiser4_inode_data(inode);
457 rtree = jnode_tree_by_reiser4_inode(info);
459 assert("zam-1051", info->nr_jnodes != 0);
460 assert("zam-1052", rtree->rnode != NULL);
461 ON_DEBUG(info->nr_jnodes--);
463 /* delete jnode from inode's radix tree of jnodes */
464 check_me("zam-1046", radix_tree_delete(rtree, node->key.j.index));
465 if (rtree->rnode == NULL) {
466 /* inode can be pruned now */
467 spin_lock_irq(&inode->i_data.tree_lock);
468 inode->i_data.nrpages--;
469 spin_unlock_irq(&inode->i_data.tree_lock);
473 /* put jnode into hash table (where they can be found by flush who does not know
474 mapping) and to inode's tree of jnodes (where they can be found (hopefully
475 faster) in places where mapping is known). Currently it is used by
476 fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is
477 created */
478 static void
479 hash_unformatted_jnode(jnode * node, struct address_space *mapping,
480 unsigned long index)
482 j_hash_table *jtable;
484 assert("vs-1446", jnode_is_unformatted(node));
485 assert("vs-1442", node->key.j.mapping == 0);
486 assert("vs-1443", node->key.j.objectid == 0);
487 assert("vs-1444", node->key.j.index == (unsigned long)-1);
488 assert_rw_write_locked(&(jnode_get_tree(node)->tree_lock));
490 node->key.j.mapping = mapping;
491 node->key.j.objectid = get_inode_oid(mapping->host);
492 node->key.j.index = index;
494 jtable = &jnode_get_tree(node)->jhash_table;
496 /* race with some other thread inserting jnode into the hash table is
497 * impossible, because we keep the page lock. */
499 * following assertion no longer holds because of RCU: it is possible
500 * jnode is in the hash table, but with JNODE_RIP bit set.
502 /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */
503 j_hash_insert_rcu(jtable, node);
504 inode_attach_jnode(node);
507 static void unhash_unformatted_node_nolock(jnode * node)
509 assert("vs-1683", node->key.j.mapping != NULL);
510 assert("vs-1684",
511 node->key.j.objectid ==
512 get_inode_oid(node->key.j.mapping->host));
514 /* remove jnode from hash-table */
515 j_hash_remove_rcu(&node->tree->jhash_table, node);
516 inode_detach_jnode(node);
517 node->key.j.mapping = NULL;
518 node->key.j.index = (unsigned long)-1;
519 node->key.j.objectid = 0;
523 /* remove jnode from hash table and from inode's tree of jnodes. This is used in
524 reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes ->
525 reiser4_uncapture_jnode */
526 void unhash_unformatted_jnode(jnode * node)
528 assert("vs-1445", jnode_is_unformatted(node));
530 write_lock_tree(node->tree);
531 unhash_unformatted_node_nolock(node);
532 write_unlock_tree(node->tree);
536 * search hash table for a jnode with given oid and index. If not found,
537 * allocate new jnode, insert it, and also insert into radix tree for the
538 * given inode/mapping.
540 static jnode *find_get_jnode(reiser4_tree * tree,
541 struct address_space *mapping,
542 oid_t oid, unsigned long index)
544 jnode *result;
545 jnode *shadow;
546 int preload;
548 result = jnew_unformatted();
550 if (unlikely(result == NULL))
551 return ERR_PTR(RETERR(-ENOMEM));
553 preload = radix_tree_preload(reiser4_ctx_gfp_mask_get());
554 if (preload != 0)
555 return ERR_PTR(preload);
557 write_lock_tree(tree);
558 shadow = jfind_nolock(mapping, index);
559 if (likely(shadow == NULL)) {
560 /* add new jnode to hash table and inode's radix tree of
561 * jnodes */
562 jref(result);
563 hash_unformatted_jnode(result, mapping, index);
564 } else {
565 /* jnode is found in inode's radix tree of jnodes */
566 jref(shadow);
567 jnode_free(result, JNODE_UNFORMATTED_BLOCK);
568 assert("vs-1498", shadow->key.j.mapping == mapping);
569 result = shadow;
571 write_unlock_tree(tree);
573 assert("nikita-2955",
574 ergo(result != NULL, jnode_invariant(result, 0, 0)));
575 radix_tree_preload_end();
576 return result;
579 /* jget() (a la zget() but for unformatted nodes). Returns (and possibly
580 creates) jnode corresponding to page @pg. jnode is attached to page and
581 inserted into jnode hash-table. */
582 static jnode *do_jget(reiser4_tree * tree, struct page *pg)
585 * There are two ways to create jnode: starting with pre-existing page
586 * and without page.
588 * When page already exists, jnode is created
589 * (jnode_of_page()->do_jget()) under page lock. This is done in
590 * ->writepage(), or when capturing anonymous page dirtied through
591 * mmap.
593 * Jnode without page is created by index_extent_jnode().
597 jnode *result;
598 oid_t oid = get_inode_oid(pg->mapping->host);
600 assert("umka-176", pg != NULL);
601 assert("nikita-2394", PageLocked(pg));
603 result = jprivate(pg);
604 if (likely(result != NULL))
605 return jref(result);
607 tree = reiser4_tree_by_page(pg);
609 /* check hash-table first */
610 result = jfind(pg->mapping, pg->index);
611 if (unlikely(result != NULL)) {
612 spin_lock_jnode(result);
613 jnode_attach_page(result, pg);
614 spin_unlock_jnode(result);
615 result->key.j.mapping = pg->mapping;
616 return result;
619 /* since page is locked, jnode should be allocated with GFP_NOFS flag */
620 reiser4_ctx_gfp_mask_force(GFP_NOFS);
621 result = find_get_jnode(tree, pg->mapping, oid, pg->index);
622 if (unlikely(IS_ERR(result)))
623 return result;
624 /* attach jnode to page */
625 spin_lock_jnode(result);
626 jnode_attach_page(result, pg);
627 spin_unlock_jnode(result);
628 return result;
632 * return jnode for @pg, creating it if necessary.
634 jnode *jnode_of_page(struct page *pg)
636 jnode *result;
638 assert("umka-176", pg != NULL);
639 assert("nikita-2394", PageLocked(pg));
641 result = do_jget(reiser4_tree_by_page(pg), pg);
643 if (REISER4_DEBUG && !IS_ERR(result)) {
644 assert("nikita-3210", result == jprivate(pg));
645 assert("nikita-2046", jnode_page(jprivate(pg)) == pg);
646 if (jnode_is_unformatted(jprivate(pg))) {
647 assert("nikita-2364",
648 jprivate(pg)->key.j.index == pg->index);
649 assert("nikita-2367",
650 jprivate(pg)->key.j.mapping == pg->mapping);
651 assert("nikita-2365",
652 jprivate(pg)->key.j.objectid ==
653 get_inode_oid(pg->mapping->host));
654 assert("vs-1200",
655 jprivate(pg)->key.j.objectid ==
656 pg->mapping->host->i_ino);
657 assert("nikita-2356",
658 jnode_is_unformatted(jnode_by_page(pg)));
660 assert("nikita-2956", jnode_invariant(jprivate(pg), 0, 0));
662 return result;
665 /* attach page to jnode: set ->pg pointer in jnode, and ->private one in the
666 * page.*/
667 void jnode_attach_page(jnode * node, struct page *pg)
669 assert("nikita-2060", node != NULL);
670 assert("nikita-2061", pg != NULL);
672 assert("nikita-2050", jprivate(pg) == 0ul);
673 assert("nikita-2393", !PagePrivate(pg));
674 assert("vs-1741", node->pg == NULL);
676 assert("nikita-2396", PageLocked(pg));
677 assert_spin_locked(&(node->guard));
679 page_cache_get(pg);
680 set_page_private(pg, (unsigned long)node);
681 node->pg = pg;
682 SetPagePrivate(pg);
685 /* Dual to jnode_attach_page: break a binding between page and jnode */
686 void page_clear_jnode(struct page *page, jnode * node)
688 assert("nikita-2424", page != NULL);
689 assert("nikita-2425", PageLocked(page));
690 assert("nikita-2426", node != NULL);
691 assert_spin_locked(&(node->guard));
692 assert("nikita-2428", PagePrivate(page));
694 assert("nikita-3551", !PageWriteback(page));
696 JF_CLR(node, JNODE_PARSED);
697 set_page_private(page, 0ul);
698 ClearPagePrivate(page);
699 node->pg = NULL;
700 page_cache_release(page);
703 #if 0
704 /* it is only used in one place to handle error */
705 void
706 page_detach_jnode(struct page *page, struct address_space *mapping,
707 unsigned long index)
709 assert("nikita-2395", page != NULL);
711 lock_page(page);
712 if ((page->mapping == mapping) && (page->index == index)
713 && PagePrivate(page)) {
714 jnode *node;
716 node = jprivate(page);
717 spin_lock_jnode(node);
718 page_clear_jnode(page, node);
719 spin_unlock_jnode(node);
721 unlock_page(page);
723 #endif /* 0 */
725 /* return @node page locked.
727 Locking ordering requires that one first takes page lock and afterwards
728 spin lock on node attached to this page. Sometimes it is necessary to go in
729 the opposite direction. This is done through standard trylock-and-release
730 loop.
732 static struct page *jnode_lock_page(jnode * node)
734 struct page *page;
736 assert("nikita-2052", node != NULL);
737 assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode));
739 while (1) {
741 spin_lock_jnode(node);
742 page = jnode_page(node);
743 if (page == NULL)
744 break;
746 /* no need to page_cache_get( page ) here, because page cannot
747 be evicted from memory without detaching it from jnode and
748 this requires spin lock on jnode that we already hold.
750 if (trylock_page(page)) {
751 /* We won a lock on jnode page, proceed. */
752 break;
755 /* Page is locked by someone else. */
756 page_cache_get(page);
757 spin_unlock_jnode(node);
758 wait_on_page_locked(page);
759 /* it is possible that page was detached from jnode and
760 returned to the free pool, or re-assigned while we were
761 waiting on locked bit. This will be rechecked on the next
762 loop iteration.
764 page_cache_release(page);
766 /* try again */
768 return page;
772 * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify
773 * validness of jnode content.
775 static inline int jparse(jnode * node)
777 int result;
779 assert("nikita-2466", node != NULL);
781 spin_lock_jnode(node);
782 if (likely(!jnode_is_parsed(node))) {
783 result = jnode_ops(node)->parse(node);
784 if (likely(result == 0))
785 JF_SET(node, JNODE_PARSED);
786 } else
787 result = 0;
788 spin_unlock_jnode(node);
789 return result;
792 /* Lock a page attached to jnode, create and attach page to jnode if it had no
793 * one. */
794 static struct page *jnode_get_page_locked(jnode * node, gfp_t gfp_flags)
796 struct page *page;
798 spin_lock_jnode(node);
799 page = jnode_page(node);
801 if (page == NULL) {
802 spin_unlock_jnode(node);
803 page = find_or_create_page(jnode_get_mapping(node),
804 jnode_get_index(node), gfp_flags);
805 if (page == NULL)
806 return ERR_PTR(RETERR(-ENOMEM));
807 } else {
808 if (trylock_page(page)) {
809 spin_unlock_jnode(node);
810 return page;
812 page_cache_get(page);
813 spin_unlock_jnode(node);
814 lock_page(page);
815 assert("nikita-3134", page->mapping == jnode_get_mapping(node));
818 spin_lock_jnode(node);
819 if (!jnode_page(node))
820 jnode_attach_page(node, page);
821 spin_unlock_jnode(node);
823 page_cache_release(page);
824 assert("zam-894", jnode_page(node) == page);
825 return page;
828 /* Start read operation for jnode's page if page is not up-to-date. */
829 static int jnode_start_read(jnode * node, struct page *page)
831 assert("zam-893", PageLocked(page));
833 if (PageUptodate(page)) {
834 unlock_page(page);
835 return 0;
837 return reiser4_page_io(page, node, READ, reiser4_ctx_gfp_mask_get());
840 #if REISER4_DEBUG
841 static void check_jload(jnode * node, struct page *page)
843 if (jnode_is_znode(node)) {
844 node40_header *nh;
845 znode *z;
847 z = JZNODE(node);
848 if (znode_is_any_locked(z)) {
849 nh = (node40_header *) kmap(page);
850 /* this only works for node40-only file systems. For
851 * debugging. */
852 assert("nikita-3253",
853 z->nr_items == le16_to_cpu(get_unaligned(&nh->nr_items)));
854 kunmap(page);
856 assert("nikita-3565", znode_invariant(z));
859 #else
860 #define check_jload(node, page) noop
861 #endif
863 /* prefetch jnode to speed up next call to jload. Call this when you are going
864 * to call jload() shortly. This will bring appropriate portion of jnode into
865 * CPU cache. */
866 void jload_prefetch(jnode * node)
868 prefetchw(&node->x_count);
871 /* load jnode's data into memory */
872 int jload_gfp(jnode * node /* node to load */ ,
873 gfp_t gfp_flags /* allocation flags */ ,
874 int do_kmap/* true if page should be kmapped */)
876 struct page *page;
877 int result = 0;
878 int parsed;
880 assert("nikita-3010", reiser4_schedulable());
882 prefetchw(&node->pg);
884 /* taking d-reference implies taking x-reference. */
885 jref(node);
888 * acquiring d-reference to @jnode and check for JNODE_PARSED bit
889 * should be atomic, otherwise there is a race against
890 * reiser4_releasepage().
892 spin_lock(&(node->load));
893 add_d_ref(node);
894 parsed = jnode_is_parsed(node);
895 spin_unlock(&(node->load));
897 if (unlikely(!parsed)) {
898 page = jnode_get_page_locked(node, gfp_flags);
899 if (unlikely(IS_ERR(page))) {
900 result = PTR_ERR(page);
901 goto failed;
904 result = jnode_start_read(node, page);
905 if (unlikely(result != 0))
906 goto failed;
908 wait_on_page_locked(page);
909 if (unlikely(!PageUptodate(page))) {
910 result = RETERR(-EIO);
911 goto failed;
914 if (do_kmap)
915 node->data = kmap(page);
917 result = jparse(node);
918 if (unlikely(result != 0)) {
919 if (do_kmap)
920 kunmap(page);
921 goto failed;
923 check_jload(node, page);
924 } else {
925 page = jnode_page(node);
926 check_jload(node, page);
927 if (do_kmap)
928 node->data = kmap(page);
931 if (!is_writeout_mode())
932 /* We do not mark pages active if jload is called as a part of
933 * jnode_flush() or reiser4_write_logs(). Both jnode_flush()
934 * and write_logs() add no value to cached data, there is no
935 * sense to mark pages as active when they go to disk, it just
936 * confuses vm scanning routines because clean page could be
937 * moved out from inactive list as a result of this
938 * mark_page_accessed() call. */
939 mark_page_accessed(page);
941 return 0;
943 failed:
944 jrelse_tail(node);
945 return result;
949 /* start asynchronous reading for given jnode's page. */
950 int jstartio(jnode * node)
952 struct page *page;
954 page = jnode_get_page_locked(node, reiser4_ctx_gfp_mask_get());
955 if (IS_ERR(page))
956 return PTR_ERR(page);
958 return jnode_start_read(node, page);
961 /* Initialize a node by calling appropriate plugin instead of reading
962 * node from disk as in jload(). */
963 int jinit_new(jnode * node, gfp_t gfp_flags)
965 struct page *page;
966 int result;
968 jref(node);
969 add_d_ref(node);
971 page = jnode_get_page_locked(node, gfp_flags);
972 if (IS_ERR(page)) {
973 result = PTR_ERR(page);
974 goto failed;
977 SetPageUptodate(page);
978 unlock_page(page);
980 node->data = kmap(page);
982 if (!jnode_is_parsed(node)) {
983 jnode_plugin *jplug = jnode_ops(node);
984 spin_lock_jnode(node);
985 result = jplug->init(node);
986 spin_unlock_jnode(node);
987 if (result) {
988 kunmap(page);
989 goto failed;
991 JF_SET(node, JNODE_PARSED);
994 return 0;
996 failed:
997 jrelse(node);
998 return result;
1001 /* release a reference to jnode acquired by jload(), decrement ->d_count */
1002 void jrelse_tail(jnode * node/* jnode to release references to */)
1004 assert("nikita-489", atomic_read(&node->d_count) > 0);
1005 atomic_dec(&node->d_count);
1006 /* release reference acquired in jload_gfp() or jinit_new() */
1007 jput(node);
1008 if (jnode_is_unformatted(node) || jnode_is_znode(node))
1009 LOCK_CNT_DEC(d_refs);
1012 /* drop reference to node data. When last reference is dropped, data are
1013 unloaded. */
1014 void jrelse(jnode * node/* jnode to release references to */)
1016 struct page *page;
1018 assert("nikita-487", node != NULL);
1019 assert_spin_not_locked(&(node->guard));
1021 page = jnode_page(node);
1022 if (likely(page != NULL)) {
1024 * it is safe not to lock jnode here, because at this point
1025 * @node->d_count is greater than zero (if jrelse() is used
1026 * correctly, that is). JNODE_PARSED may be not set yet, if,
1027 * for example, we got here as a result of error handling path
1028 * in jload(). Anyway, page cannot be detached by
1029 * reiser4_releasepage(). truncate will invalidate page
1030 * regardless, but this should not be a problem.
1032 kunmap(page);
1034 jrelse_tail(node);
1037 /* called from jput() to wait for io completion */
1038 static void jnode_finish_io(jnode * node)
1040 struct page *page;
1042 assert("nikita-2922", node != NULL);
1044 spin_lock_jnode(node);
1045 page = jnode_page(node);
1046 if (page != NULL) {
1047 page_cache_get(page);
1048 spin_unlock_jnode(node);
1049 wait_on_page_writeback(page);
1050 page_cache_release(page);
1051 } else
1052 spin_unlock_jnode(node);
1056 * This is called by jput() when last reference to jnode is released. This is
1057 * separate function, because we want fast path of jput() to be inline and,
1058 * therefore, small.
1060 void jput_final(jnode * node)
1062 int r_i_p;
1064 /* A fast check for keeping node in cache. We always keep node in cache
1065 * if its page is present and node was not marked for deletion */
1066 if (jnode_page(node) != NULL && !JF_ISSET(node, JNODE_HEARD_BANSHEE)) {
1067 rcu_read_unlock();
1068 return;
1070 r_i_p = !JF_TEST_AND_SET(node, JNODE_RIP);
1072 * if r_i_p is true, we were first to set JNODE_RIP on this node. In
1073 * this case it is safe to access node after unlock.
1075 rcu_read_unlock();
1076 if (r_i_p) {
1077 jnode_finish_io(node);
1078 if (JF_ISSET(node, JNODE_HEARD_BANSHEE))
1079 /* node is removed from the tree. */
1080 jdelete(node);
1081 else
1082 jnode_try_drop(node);
1084 /* if !r_i_p some other thread is already killing it */
1087 int jwait_io(jnode * node, int rw)
1089 struct page *page;
1090 int result;
1092 assert("zam-447", node != NULL);
1093 assert("zam-448", jnode_page(node) != NULL);
1095 page = jnode_page(node);
1097 result = 0;
1098 if (rw == READ) {
1099 wait_on_page_locked(page);
1100 } else {
1101 assert("nikita-2227", rw == WRITE);
1102 wait_on_page_writeback(page);
1104 if (PageError(page))
1105 result = RETERR(-EIO);
1107 return result;
1111 * jnode types and plugins.
1113 * jnode by itself is a "base type". There are several different jnode
1114 * flavors, called "jnode types" (see jnode_type for a list). Sometimes code
1115 * has to do different things based on jnode type. In the standard reiser4 way
1116 * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin).
1118 * Functions below deal with jnode types and define methods of jnode plugin.
1122 /* set jnode type. This is done during jnode initialization. */
1123 static void jnode_set_type(jnode * node, jnode_type type)
1125 static unsigned long type_to_mask[] = {
1126 [JNODE_UNFORMATTED_BLOCK] = 1,
1127 [JNODE_FORMATTED_BLOCK] = 0,
1128 [JNODE_BITMAP] = 2,
1129 [JNODE_IO_HEAD] = 6,
1130 [JNODE_INODE] = 4
1133 assert("zam-647", type < LAST_JNODE_TYPE);
1134 assert("nikita-2815", !jnode_is_loaded(node));
1135 assert("nikita-3386", node->state == 0);
1137 node->state |= (type_to_mask[type] << JNODE_TYPE_1);
1140 /* ->init() method of jnode plugin for jnodes that don't require plugin
1141 * specific initialization. */
1142 static int init_noinit(jnode * node UNUSED_ARG)
1144 return 0;
1147 /* ->parse() method of jnode plugin for jnodes that don't require plugin
1148 * specific pasring. */
1149 static int parse_noparse(jnode * node UNUSED_ARG)
1151 return 0;
1154 /* ->mapping() method for unformatted jnode */
1155 struct address_space *mapping_jnode(const jnode * node)
1157 struct address_space *map;
1159 assert("nikita-2713", node != NULL);
1161 /* mapping is stored in jnode */
1163 map = node->key.j.mapping;
1164 assert("nikita-2714", map != NULL);
1165 assert("nikita-2897", is_reiser4_inode(map->host));
1166 assert("nikita-2715", get_inode_oid(map->host) == node->key.j.objectid);
1167 return map;
1170 /* ->index() method for unformatted jnodes */
1171 unsigned long index_jnode(const jnode * node)
1173 /* index is stored in jnode */
1174 return node->key.j.index;
1177 /* ->remove() method for unformatted jnodes */
1178 static inline void remove_jnode(jnode * node, reiser4_tree * tree)
1180 /* remove jnode from hash table and radix tree */
1181 if (node->key.j.mapping)
1182 unhash_unformatted_node_nolock(node);
1185 /* ->mapping() method for znodes */
1186 static struct address_space *mapping_znode(const jnode * node)
1188 /* all znodes belong to fake inode */
1189 return reiser4_get_super_fake(jnode_get_tree(node)->super)->i_mapping;
1192 /* ->index() method for znodes */
1193 static unsigned long index_znode(const jnode * node)
1195 unsigned long addr;
1196 assert("nikita-3317", (1 << znode_shift_order) < sizeof(znode));
1198 /* index of znode is just its address (shifted) */
1199 addr = (unsigned long)node;
1200 return (addr - PAGE_OFFSET) >> znode_shift_order;
1203 /* ->mapping() method for bitmap jnode */
1204 static struct address_space *mapping_bitmap(const jnode * node)
1206 /* all bitmap blocks belong to special bitmap inode */
1207 return get_super_private(jnode_get_tree(node)->super)->bitmap->
1208 i_mapping;
1211 /* ->index() method for jnodes that are indexed by address */
1212 static unsigned long index_is_address(const jnode * node)
1214 unsigned long ind;
1216 ind = (unsigned long)node;
1217 return ind - PAGE_OFFSET;
1220 /* resolve race with jput */
1221 jnode *jnode_rip_sync(reiser4_tree *tree, jnode *node)
1224 * This is used as part of RCU-based jnode handling.
1226 * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work
1227 * with unreferenced jnodes (ones with ->x_count == 0). Hash table is
1228 * not protected during this, so concurrent thread may execute
1229 * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be
1230 * freed in jput_final(). To avoid such races, jput_final() sets
1231 * JNODE_RIP on jnode (under tree lock). All places that work with
1232 * unreferenced jnodes call this function. It checks for JNODE_RIP bit
1233 * (first without taking tree lock), and if this bit is set, released
1234 * reference acquired by the current thread and returns NULL.
1236 * As a result, if jnode is being concurrently freed, NULL is returned
1237 * and caller should pretend that jnode wasn't found in the first
1238 * place.
1240 * Otherwise it's safe to release "rcu-read-lock" and continue with
1241 * jnode.
1243 if (unlikely(JF_ISSET(node, JNODE_RIP))) {
1244 read_lock_tree(tree);
1245 if (JF_ISSET(node, JNODE_RIP)) {
1246 dec_x_ref(node);
1247 node = NULL;
1249 read_unlock_tree(tree);
1251 return node;
1254 reiser4_key *jnode_build_key(const jnode * node, reiser4_key * key)
1256 struct inode *inode;
1257 item_plugin *iplug;
1258 loff_t off;
1260 assert("nikita-3092", node != NULL);
1261 assert("nikita-3093", key != NULL);
1262 assert("nikita-3094", jnode_is_unformatted(node));
1264 off = ((loff_t) index_jnode(node)) << PAGE_CACHE_SHIFT;
1265 inode = mapping_jnode(node)->host;
1267 if (node->parent_item_id != 0)
1268 iplug = item_plugin_by_id(node->parent_item_id);
1269 else
1270 iplug = NULL;
1272 if (iplug != NULL && iplug->f.key_by_offset)
1273 iplug->f.key_by_offset(inode, off, key);
1274 else {
1275 file_plugin *fplug;
1277 fplug = inode_file_plugin(inode);
1278 assert("zam-1007", fplug != NULL);
1279 assert("zam-1008", fplug->key_by_inode != NULL);
1281 fplug->key_by_inode(inode, off, key);
1284 return key;
1287 /* ->parse() method for formatted nodes */
1288 static int parse_znode(jnode * node)
1290 return zparse(JZNODE(node));
1293 /* ->delete() method for formatted nodes */
1294 static void delete_znode(jnode * node, reiser4_tree * tree)
1296 znode *z;
1298 assert_rw_write_locked(&(tree->tree_lock));
1299 assert("vs-898", JF_ISSET(node, JNODE_HEARD_BANSHEE));
1301 z = JZNODE(node);
1302 assert("vs-899", z->c_count == 0);
1304 /* delete znode from sibling list. */
1305 sibling_list_remove(z);
1307 znode_remove(z, tree);
1310 /* ->remove() method for formatted nodes */
1311 static int remove_znode(jnode * node, reiser4_tree * tree)
1313 znode *z;
1315 assert_rw_write_locked(&(tree->tree_lock));
1316 z = JZNODE(node);
1318 if (z->c_count == 0) {
1319 /* detach znode from sibling list. */
1320 sibling_list_drop(z);
1321 /* this is called with tree spin-lock held, so call
1322 znode_remove() directly (rather than znode_lock_remove()). */
1323 znode_remove(z, tree);
1324 return 0;
1326 return RETERR(-EBUSY);
1329 /* ->init() method for formatted nodes */
1330 static int init_znode(jnode * node)
1332 znode *z;
1334 z = JZNODE(node);
1335 /* call node plugin to do actual initialization */
1336 return z->nplug->init(z);
1339 /* ->clone() method for formatted nodes */
1340 static jnode *clone_formatted(jnode * node)
1342 znode *clone;
1344 assert("vs-1430", jnode_is_znode(node));
1345 clone = zalloc(reiser4_ctx_gfp_mask_get());
1346 if (clone == NULL)
1347 return ERR_PTR(RETERR(-ENOMEM));
1348 zinit(clone, NULL, current_tree);
1349 jnode_set_block(ZJNODE(clone), jnode_get_block(node));
1350 /* ZJNODE(clone)->key.z is not initialized */
1351 clone->level = JZNODE(node)->level;
1353 return ZJNODE(clone);
1356 /* jplug->clone for unformatted nodes */
1357 static jnode *clone_unformatted(jnode * node)
1359 jnode *clone;
1361 assert("vs-1431", jnode_is_unformatted(node));
1362 clone = jalloc();
1363 if (clone == NULL)
1364 return ERR_PTR(RETERR(-ENOMEM));
1366 jnode_init(clone, current_tree, JNODE_UNFORMATTED_BLOCK);
1367 jnode_set_block(clone, jnode_get_block(node));
1369 return clone;
1374 * Setup jnode plugin methods for various jnode types.
1376 jnode_plugin jnode_plugins[LAST_JNODE_TYPE] = {
1377 [JNODE_UNFORMATTED_BLOCK] = {
1378 .h = {
1379 .type_id = REISER4_JNODE_PLUGIN_TYPE,
1380 .id = JNODE_UNFORMATTED_BLOCK,
1381 .pops = NULL,
1382 .label = "unformatted",
1383 .desc = "unformatted node",
1384 .linkage = {NULL, NULL}
1386 .init = init_noinit,
1387 .parse = parse_noparse,
1388 .mapping = mapping_jnode,
1389 .index = index_jnode,
1390 .clone = clone_unformatted
1392 [JNODE_FORMATTED_BLOCK] = {
1393 .h = {
1394 .type_id = REISER4_JNODE_PLUGIN_TYPE,
1395 .id = JNODE_FORMATTED_BLOCK,
1396 .pops = NULL,
1397 .label = "formatted",
1398 .desc = "formatted tree node",
1399 .linkage = {NULL, NULL}
1401 .init = init_znode,
1402 .parse = parse_znode,
1403 .mapping = mapping_znode,
1404 .index = index_znode,
1405 .clone = clone_formatted
1407 [JNODE_BITMAP] = {
1408 .h = {
1409 .type_id = REISER4_JNODE_PLUGIN_TYPE,
1410 .id = JNODE_BITMAP,
1411 .pops = NULL,
1412 .label = "bitmap",
1413 .desc = "bitmap node",
1414 .linkage = {NULL, NULL}
1416 .init = init_noinit,
1417 .parse = parse_noparse,
1418 .mapping = mapping_bitmap,
1419 .index = index_is_address,
1420 .clone = NULL
1422 [JNODE_IO_HEAD] = {
1423 .h = {
1424 .type_id = REISER4_JNODE_PLUGIN_TYPE,
1425 .id = JNODE_IO_HEAD,
1426 .pops = NULL,
1427 .label = "io head",
1428 .desc = "io head",
1429 .linkage = {NULL, NULL}
1431 .init = init_noinit,
1432 .parse = parse_noparse,
1433 .mapping = mapping_bitmap,
1434 .index = index_is_address,
1435 .clone = NULL
1437 [JNODE_INODE] = {
1438 .h = {
1439 .type_id = REISER4_JNODE_PLUGIN_TYPE,
1440 .id = JNODE_INODE,
1441 .pops = NULL,
1442 .label = "inode",
1443 .desc = "inode's builtin jnode",
1444 .linkage = {NULL, NULL}
1446 .init = NULL,
1447 .parse = NULL,
1448 .mapping = NULL,
1449 .index = NULL,
1450 .clone = NULL
1455 * jnode destruction.
1457 * Thread may use a jnode after it acquired a reference to it. References are
1458 * counted in ->x_count field. Reference protects jnode from being
1459 * recycled. This is different from protecting jnode data (that are stored in
1460 * jnode page) from being evicted from memory. Data are protected by jload()
1461 * and released by jrelse().
1463 * If thread already possesses a reference to the jnode it can acquire another
1464 * one through jref(). Initial reference is obtained (usually) by locating
1465 * jnode in some indexing structure that depends on jnode type: formatted
1466 * nodes are kept in global hash table, where they are indexed by block
1467 * number, and also in the cbk cache. Unformatted jnodes are also kept in hash
1468 * table, which is indexed by oid and offset within file, and in per-inode
1469 * radix tree.
1471 * Reference to jnode is released by jput(). If last reference is released,
1472 * jput_final() is called. This function determines whether jnode has to be
1473 * deleted (this happens when corresponding node is removed from the file
1474 * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it
1475 * should be just "removed" (deleted from memory).
1477 * Jnode destruction is signally delicate dance because of locking and RCU.
1481 * Returns true if jnode cannot be removed right now. This check is called
1482 * under tree lock. If it returns true, jnode is irrevocably committed to be
1483 * deleted/removed.
1485 static inline int jnode_is_busy(const jnode * node, jnode_type jtype)
1487 /* if other thread managed to acquire a reference to this jnode, don't
1488 * free it. */
1489 if (atomic_read(&node->x_count) > 0)
1490 return 1;
1491 /* also, don't free znode that has children in memory */
1492 if (jtype == JNODE_FORMATTED_BLOCK && JZNODE(node)->c_count > 0)
1493 return 1;
1494 return 0;
1498 * this is called as part of removing jnode. Based on jnode type, call
1499 * corresponding function that removes jnode from indices and returns it back
1500 * to the appropriate slab (through RCU).
1502 static inline void
1503 jnode_remove(jnode * node, jnode_type jtype, reiser4_tree * tree)
1505 switch (jtype) {
1506 case JNODE_UNFORMATTED_BLOCK:
1507 remove_jnode(node, tree);
1508 break;
1509 case JNODE_IO_HEAD:
1510 case JNODE_BITMAP:
1511 break;
1512 case JNODE_INODE:
1513 break;
1514 case JNODE_FORMATTED_BLOCK:
1515 remove_znode(node, tree);
1516 break;
1517 default:
1518 wrong_return_value("nikita-3196", "Wrong jnode type");
1523 * this is called as part of deleting jnode. Based on jnode type, call
1524 * corresponding function that removes jnode from indices and returns it back
1525 * to the appropriate slab (through RCU).
1527 * This differs from jnode_remove() only for formatted nodes---for them
1528 * sibling list handling is different for removal and deletion.
1530 static inline void
1531 jnode_delete(jnode * node, jnode_type jtype, reiser4_tree * tree UNUSED_ARG)
1533 switch (jtype) {
1534 case JNODE_UNFORMATTED_BLOCK:
1535 remove_jnode(node, tree);
1536 break;
1537 case JNODE_IO_HEAD:
1538 case JNODE_BITMAP:
1539 break;
1540 case JNODE_FORMATTED_BLOCK:
1541 delete_znode(node, tree);
1542 break;
1543 case JNODE_INODE:
1544 default:
1545 wrong_return_value("nikita-3195", "Wrong jnode type");
1549 #if REISER4_DEBUG
1551 * remove jnode from the debugging list of all jnodes hanging off super-block.
1553 void jnode_list_remove(jnode * node)
1555 reiser4_super_info_data *sbinfo;
1557 sbinfo = get_super_private(jnode_get_tree(node)->super);
1559 spin_lock_irq(&sbinfo->all_guard);
1560 assert("nikita-2422", !list_empty(&node->jnodes));
1561 list_del_init(&node->jnodes);
1562 spin_unlock_irq(&sbinfo->all_guard);
1564 #endif
1567 * this is called by jput_final() to remove jnode when last reference to it is
1568 * released.
1570 static int jnode_try_drop(jnode * node)
1572 int result;
1573 reiser4_tree *tree;
1574 jnode_type jtype;
1576 assert("nikita-2491", node != NULL);
1577 assert("nikita-2583", JF_ISSET(node, JNODE_RIP));
1579 tree = jnode_get_tree(node);
1580 jtype = jnode_get_type(node);
1582 spin_lock_jnode(node);
1583 write_lock_tree(tree);
1585 * if jnode has a page---leave it alone. Memory pressure will
1586 * eventually kill page and jnode.
1588 if (jnode_page(node) != NULL) {
1589 write_unlock_tree(tree);
1590 spin_unlock_jnode(node);
1591 JF_CLR(node, JNODE_RIP);
1592 return RETERR(-EBUSY);
1595 /* re-check ->x_count under tree lock. */
1596 result = jnode_is_busy(node, jtype);
1597 if (result == 0) {
1598 assert("nikita-2582", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
1599 assert("jmacd-511/b", atomic_read(&node->d_count) == 0);
1601 spin_unlock_jnode(node);
1602 /* no page and no references---despatch him. */
1603 jnode_remove(node, jtype, tree);
1604 write_unlock_tree(tree);
1605 jnode_free(node, jtype);
1606 } else {
1607 /* busy check failed: reference was acquired by concurrent
1608 * thread. */
1609 write_unlock_tree(tree);
1610 spin_unlock_jnode(node);
1611 JF_CLR(node, JNODE_RIP);
1613 return result;
1616 /* jdelete() -- Delete jnode from the tree and file system */
1617 static int jdelete(jnode * node/* jnode to finish with */)
1619 struct page *page;
1620 int result;
1621 reiser4_tree *tree;
1622 jnode_type jtype;
1624 assert("nikita-467", node != NULL);
1625 assert("nikita-2531", JF_ISSET(node, JNODE_RIP));
1627 jtype = jnode_get_type(node);
1629 page = jnode_lock_page(node);
1630 assert_spin_locked(&(node->guard));
1632 tree = jnode_get_tree(node);
1634 write_lock_tree(tree);
1635 /* re-check ->x_count under tree lock. */
1636 result = jnode_is_busy(node, jtype);
1637 if (likely(!result)) {
1638 assert("nikita-2123", JF_ISSET(node, JNODE_HEARD_BANSHEE));
1639 assert("jmacd-511", atomic_read(&node->d_count) == 0);
1641 /* detach page */
1642 if (page != NULL) {
1644 * FIXME this is racy against jnode_extent_write().
1646 page_clear_jnode(page, node);
1648 spin_unlock_jnode(node);
1649 /* goodbye */
1650 jnode_delete(node, jtype, tree);
1651 write_unlock_tree(tree);
1652 jnode_free(node, jtype);
1653 /* @node is no longer valid pointer */
1654 if (page != NULL)
1655 reiser4_drop_page(page);
1656 } else {
1657 /* busy check failed: reference was acquired by concurrent
1658 * thread. */
1659 JF_CLR(node, JNODE_RIP);
1660 write_unlock_tree(tree);
1661 spin_unlock_jnode(node);
1662 if (page != NULL)
1663 unlock_page(page);
1665 return result;
1668 /* drop jnode on the floor.
1670 Return value:
1672 -EBUSY: failed to drop jnode, because there are still references to it
1674 0: successfully dropped jnode
1677 static int jdrop_in_tree(jnode * node, reiser4_tree * tree)
1679 struct page *page;
1680 jnode_type jtype;
1681 int result;
1683 assert("zam-602", node != NULL);
1684 assert_rw_not_read_locked(&(tree->tree_lock));
1685 assert_rw_not_write_locked(&(tree->tree_lock));
1686 assert("nikita-2403", !JF_ISSET(node, JNODE_HEARD_BANSHEE));
1688 jtype = jnode_get_type(node);
1690 page = jnode_lock_page(node);
1691 assert_spin_locked(&(node->guard));
1693 write_lock_tree(tree);
1695 /* re-check ->x_count under tree lock. */
1696 result = jnode_is_busy(node, jtype);
1697 if (!result) {
1698 assert("nikita-2488", page == jnode_page(node));
1699 assert("nikita-2533", atomic_read(&node->d_count) == 0);
1700 if (page != NULL) {
1701 assert("nikita-2126", !PageDirty(page));
1702 assert("nikita-2127", PageUptodate(page));
1703 assert("nikita-2181", PageLocked(page));
1704 page_clear_jnode(page, node);
1706 spin_unlock_jnode(node);
1707 jnode_remove(node, jtype, tree);
1708 write_unlock_tree(tree);
1709 jnode_free(node, jtype);
1710 if (page != NULL)
1711 reiser4_drop_page(page);
1712 } else {
1713 /* busy check failed: reference was acquired by concurrent
1714 * thread. */
1715 JF_CLR(node, JNODE_RIP);
1716 write_unlock_tree(tree);
1717 spin_unlock_jnode(node);
1718 if (page != NULL)
1719 unlock_page(page);
1721 return result;
1724 /* This function frees jnode "if possible". In particular, [dcx]_count has to
1725 be 0 (where applicable). */
1726 void jdrop(jnode * node)
1728 jdrop_in_tree(node, jnode_get_tree(node));
1731 /* IO head jnode implementation; The io heads are simple j-nodes with limited
1732 functionality (these j-nodes are not in any hash table) just for reading
1733 from and writing to disk. */
1735 jnode *reiser4_alloc_io_head(const reiser4_block_nr * block)
1737 jnode *jal = jalloc();
1739 if (jal != NULL) {
1740 jnode_init(jal, current_tree, JNODE_IO_HEAD);
1741 jnode_set_block(jal, block);
1744 jref(jal);
1746 return jal;
1749 void reiser4_drop_io_head(jnode * node)
1751 assert("zam-648", jnode_get_type(node) == JNODE_IO_HEAD);
1753 jput(node);
1754 jdrop(node);
1757 /* protect keep jnode data from reiser4_releasepage() */
1758 void pin_jnode_data(jnode * node)
1760 assert("zam-671", jnode_page(node) != NULL);
1761 page_cache_get(jnode_page(node));
1764 /* make jnode data free-able again */
1765 void unpin_jnode_data(jnode * node)
1767 assert("zam-672", jnode_page(node) != NULL);
1768 page_cache_release(jnode_page(node));
1771 struct address_space *jnode_get_mapping(const jnode * node)
1773 assert("nikita-3162", node != NULL);
1774 return jnode_ops(node)->mapping(node);
1777 #if REISER4_DEBUG
1778 /* debugging aid: jnode invariant */
1779 int jnode_invariant_f(const jnode * node, char const **msg)
1781 #define _ergo(ant, con) \
1782 ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
1783 #define _check(exp) ((*msg) = #exp, (exp))
1785 return _check(node != NULL) &&
1786 /* [jnode-queued] */
1787 /* only relocated node can be queued, except that when znode
1788 * is being deleted, its JNODE_RELOC bit is cleared */
1789 _ergo(JF_ISSET(node, JNODE_FLUSH_QUEUED),
1790 JF_ISSET(node, JNODE_RELOC) ||
1791 JF_ISSET(node, JNODE_HEARD_BANSHEE)) &&
1792 _check(node->jnodes.prev != NULL) &&
1793 _check(node->jnodes.next != NULL) &&
1794 /* [jnode-dirty] invariant */
1795 /* dirty inode is part of atom */
1796 _ergo(JF_ISSET(node, JNODE_DIRTY), node->atom != NULL) &&
1797 /* [jnode-oid] invariant */
1798 /* for unformatted node ->objectid and ->mapping fields are
1799 * consistent */
1800 _ergo(jnode_is_unformatted(node) && node->key.j.mapping != NULL,
1801 node->key.j.objectid ==
1802 get_inode_oid(node->key.j.mapping->host)) &&
1803 /* [jnode-atom-valid] invariant */
1804 /* node atom has valid state */
1805 _ergo(node->atom != NULL, node->atom->stage != ASTAGE_INVALID) &&
1806 /* [jnode-page-binding] invariant */
1807 /* if node points to page, it points back to node */
1808 _ergo(node->pg != NULL, jprivate(node->pg) == node) &&
1809 /* [jnode-refs] invariant */
1810 /* only referenced jnode can be loaded */
1811 _check(atomic_read(&node->x_count) >= atomic_read(&node->d_count));
1815 static const char *jnode_type_name(jnode_type type)
1817 switch (type) {
1818 case JNODE_UNFORMATTED_BLOCK:
1819 return "unformatted";
1820 case JNODE_FORMATTED_BLOCK:
1821 return "formatted";
1822 case JNODE_BITMAP:
1823 return "bitmap";
1824 case JNODE_IO_HEAD:
1825 return "io head";
1826 case JNODE_INODE:
1827 return "inode";
1828 case LAST_JNODE_TYPE:
1829 return "last";
1830 default:{
1831 static char unknown[30];
1833 sprintf(unknown, "unknown %i", type);
1834 return unknown;
1839 #define jnode_state_name(node, flag) \
1840 (JF_ISSET((node), (flag)) ? ((#flag "|")+6) : "")
1842 /* debugging aid: output human readable information about @node */
1843 static void info_jnode(const char *prefix /* prefix to print */ ,
1844 const jnode * node/* node to print */)
1846 assert("umka-068", prefix != NULL);
1848 if (node == NULL) {
1849 printk("%s: null\n", prefix);
1850 return;
1853 printk
1854 ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i,"
1855 " block: %s, d_count: %d, x_count: %d, "
1856 "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix, node,
1857 node->state,
1858 jnode_state_name(node, JNODE_PARSED),
1859 jnode_state_name(node, JNODE_HEARD_BANSHEE),
1860 jnode_state_name(node, JNODE_LEFT_CONNECTED),
1861 jnode_state_name(node, JNODE_RIGHT_CONNECTED),
1862 jnode_state_name(node, JNODE_ORPHAN),
1863 jnode_state_name(node, JNODE_CREATED),
1864 jnode_state_name(node, JNODE_RELOC),
1865 jnode_state_name(node, JNODE_OVRWR),
1866 jnode_state_name(node, JNODE_DIRTY),
1867 jnode_state_name(node, JNODE_IS_DYING),
1868 jnode_state_name(node, JNODE_RIP),
1869 jnode_state_name(node, JNODE_MISSED_IN_CAPTURE),
1870 jnode_state_name(node, JNODE_WRITEBACK),
1871 jnode_state_name(node, JNODE_NEW),
1872 jnode_state_name(node, JNODE_DKSET),
1873 jnode_state_name(node, JNODE_REPACK),
1874 jnode_state_name(node, JNODE_CLUSTER_PAGE),
1875 jnode_get_level(node), sprint_address(jnode_get_block(node)),
1876 atomic_read(&node->d_count), atomic_read(&node->x_count),
1877 jnode_page(node), node->atom, 0, 0,
1878 jnode_type_name(jnode_get_type(node)));
1879 if (jnode_is_unformatted(node)) {
1880 printk("inode: %llu, index: %lu, ",
1881 node->key.j.objectid, node->key.j.index);
1885 /* debugging aid: check znode invariant and panic if it doesn't hold */
1886 static int jnode_invariant(const jnode * node, int tlocked, int jlocked)
1888 char const *failed_msg;
1889 int result;
1890 reiser4_tree *tree;
1892 tree = jnode_get_tree(node);
1894 assert("umka-063312", node != NULL);
1895 assert("umka-064321", tree != NULL);
1897 if (!jlocked && !tlocked)
1898 spin_lock_jnode((jnode *) node);
1899 if (!tlocked)
1900 read_lock_tree(jnode_get_tree(node));
1901 result = jnode_invariant_f(node, &failed_msg);
1902 if (!result) {
1903 info_jnode("corrupted node", node);
1904 warning("jmacd-555", "Condition %s failed", failed_msg);
1906 if (!tlocked)
1907 read_unlock_tree(jnode_get_tree(node));
1908 if (!jlocked && !tlocked)
1909 spin_unlock_jnode((jnode *) node);
1910 return result;
1913 #endif /* REISER4_DEBUG */
1915 /* Make Linus happy.
1916 Local variables:
1917 c-indentation-style: "K&R"
1918 mode-name: "LC"
1919 c-basic-offset: 8
1920 tab-width: 8
1921 fill-column: 80
1922 End: