1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
3 /* Jnode manipulation functions. */
4 /* Jnode is entity used to track blocks with data and meta-data in reiser4.
6 In particular, jnodes are used to track transactional information
7 associated with each block. Each znode contains jnode as ->zjnode field.
9 Jnode stands for either Josh or Journal node.
15 * Jnode represents block containing data or meta-data. There are jnodes
18 * unformatted blocks (jnodes proper). There are plans, however to
19 * have a handle per extent unit rather than per each unformatted
20 * block, because there are so many of them.
22 * For bitmaps. Each bitmap is actually represented by two jnodes--one
23 * for working and another for "commit" data, together forming bnode.
25 * For io-heads. These are used by log writer.
27 * For formatted nodes (znode). See comment at the top of znode.c for
28 * details specific to the formatted nodes (znodes).
32 * Jnode provides access to the data of node it represents. Data are
33 * stored in a page. Page is kept in a page cache. This means, that jnodes
34 * are highly interconnected with page cache and VM internals.
36 * jnode has a pointer to page (->pg) containing its data. Pointer to data
37 * themselves is cached in ->data field to avoid frequent calls to
40 * jnode and page are attached to each other by jnode_attach_page(). This
41 * function places pointer to jnode in set_page_private(), sets PG_private
42 * flag and increments page counter.
44 * Opposite operation is performed by page_clear_jnode().
46 * jnode->pg is protected by jnode spin lock, and page->private is
47 * protected by page lock. See comment at the top of page_cache.c for
50 * page can be detached from jnode for two reasons:
52 * . jnode is removed from a tree (file is truncated, of formatted
53 * node is removed by balancing).
55 * . during memory pressure, VM calls ->releasepage() method
56 * (reiser4_releasepage()) to evict page from memory.
58 * (there, of course, is also umount, but this is special case we are not
59 * concerned with here).
61 * To protect jnode page from eviction, one calls jload() function that
62 * "pins" page in memory (loading it if necessary), increments
63 * jnode->d_count, and kmap()s page. Page is unpinned through call to
68 * jnode is created, placed in hash table, and, optionally, in per-inode
69 * radix tree. Page can be attached to jnode, pinned, released, etc.
71 * When jnode is captured into atom its reference counter is
72 * increased. While being part of an atom, jnode can be "early
73 * flushed". This means that as part of flush procedure, jnode is placed
74 * into "relocate set", and its page is submitted to the disk. After io
75 * completes, page can be detached, then loaded again, re-dirtied, etc.
77 * Thread acquired reference to jnode by calling jref() and releases it by
78 * jput(). When last reference is removed, jnode is still retained in
79 * memory (cached) if it has page attached, _unless_ it is scheduled for
80 * destruction (has JNODE_HEARD_BANSHEE bit set).
82 * Tree read-write lock was used as "existential" lock for jnodes. That is,
83 * jnode->x_count could be changed from 0 to 1 only under tree write lock,
84 * that is, tree lock protected unreferenced jnodes stored in the hash
85 * table, from recycling.
87 * This resulted in high contention on tree lock, because jref()/jput() is
88 * frequent operation. To ameliorate this problem, RCU is used: when jput()
89 * is just about to release last reference on jnode it sets JNODE_RIP bit
90 * on it, and then proceed with jnode destruction (removing jnode from hash
91 * table, cbk_cache, detaching page, etc.). All places that change jnode
92 * reference counter from 0 to 1 (jlookup(), zlook(), zget(), and
93 * cbk_cache_scan_slots()) check for JNODE_RIP bit (this is done by
94 * jnode_rip_check() function), and pretend that nothing was found in hash
95 * table if bit is set.
97 * jput defers actual return of jnode into slab cache to some later time
98 * (by call_rcu()), this guarantees that other threads can safely continue
99 * working with JNODE_RIP-ped jnode.
107 #include "plugin/plugin_header.h"
108 #include "plugin/plugin.h"
110 /*#include "jnode.h"*/
113 #include "tree_walk.h"
116 #include "page_cache.h"
118 #include <asm/uaccess.h> /* UML needs this for PAGE_OFFSET */
119 #include <linux/types.h>
120 #include <linux/slab.h>
121 #include <linux/pagemap.h>
122 #include <linux/swap.h>
123 #include <linux/fs.h> /* for struct address_space */
124 #include <linux/writeback.h> /* for inode_lock */
126 static struct kmem_cache
*_jnode_slab
= NULL
;
128 static void jnode_set_type(jnode
* node
, jnode_type type
);
129 static int jdelete(jnode
* node
);
130 static int jnode_try_drop(jnode
* node
);
133 static int jnode_invariant(const jnode
* node
, int tlocked
, int jlocked
);
136 /* true if valid page is attached to jnode */
137 static inline int jnode_is_parsed(jnode
* node
)
139 return JF_ISSET(node
, JNODE_PARSED
);
142 /* hash table support */
144 /* compare two jnode keys for equality. Used by hash-table macros */
145 static inline int jnode_key_eq(const struct jnode_key
*k1
,
146 const struct jnode_key
*k2
)
148 assert("nikita-2350", k1
!= NULL
);
149 assert("nikita-2351", k2
!= NULL
);
151 return (k1
->index
== k2
->index
&& k1
->objectid
== k2
->objectid
);
154 /* Hash jnode by its key (inode plus offset). Used by hash-table macros */
155 static inline __u32
jnode_key_hashfn(j_hash_table
* table
,
156 const struct jnode_key
*key
)
158 assert("nikita-2352", key
!= NULL
);
159 assert("nikita-3346", IS_POW(table
->_buckets
));
161 /* yes, this is remarkable simply (where not stupid) hash function. */
162 return (key
->objectid
+ key
->index
) & (table
->_buckets
- 1);
165 /* The hash table definition */
166 #define KMALLOC(size) reiser4_vmalloc(size)
167 #define KFREE(ptr, size) vfree(ptr)
168 TYPE_SAFE_HASH_DEFINE(j
, jnode
, struct jnode_key
, key
.j
, link
.j
,
169 jnode_key_hashfn
, jnode_key_eq
);
173 /* call this to initialise jnode hash table */
174 int jnodes_tree_init(reiser4_tree
* tree
/* tree to initialise jnodes for */)
176 assert("nikita-2359", tree
!= NULL
);
177 return j_hash_init(&tree
->jhash_table
, 16384);
180 /* call this to destroy jnode hash table. This is called during umount. */
181 int jnodes_tree_done(reiser4_tree
* tree
/* tree to destroy jnodes for */)
183 j_hash_table
*jtable
;
187 assert("nikita-2360", tree
!= NULL
);
190 * Scan hash table and free all jnodes.
192 jtable
= &tree
->jhash_table
;
193 if (jtable
->_table
) {
194 for_all_in_htable(jtable
, j
, node
, next
) {
195 assert("nikita-2361", !atomic_read(&node
->x_count
));
199 j_hash_done(&tree
->jhash_table
);
205 * init_jnodes - create jnode cache
207 * Initializes slab cache jnodes. It is part of reiser4 module initialization.
209 int init_jnodes(void)
211 assert("umka-168", _jnode_slab
== NULL
);
213 _jnode_slab
= kmem_cache_create("jnode", sizeof(jnode
), 0,
215 SLAB_RECLAIM_ACCOUNT
, NULL
);
216 if (_jnode_slab
== NULL
)
217 return RETERR(-ENOMEM
);
223 * done_znodes - delete znode cache
225 * This is called on reiser4 module unloading or system shutdown.
227 void done_jnodes(void)
229 destroy_reiser4_cache(&_jnode_slab
);
232 /* Initialize a jnode. */
233 void jnode_init(jnode
* node
, reiser4_tree
* tree
, jnode_type type
)
235 assert("umka-175", node
!= NULL
);
237 memset(node
, 0, sizeof(jnode
));
238 ON_DEBUG(node
->magic
= JMAGIC
);
239 jnode_set_type(node
, type
);
240 atomic_set(&node
->d_count
, 0);
241 atomic_set(&node
->x_count
, 0);
242 spin_lock_init(&node
->guard
);
243 spin_lock_init(&node
->load
);
246 INIT_LIST_HEAD(&node
->capture_link
);
248 ASSIGN_NODE_LIST(node
, NOT_CAPTURED
);
250 INIT_RCU_HEAD(&node
->rcu
);
254 reiser4_super_info_data
*sbinfo
;
256 sbinfo
= get_super_private(tree
->super
);
257 spin_lock_irq(&sbinfo
->all_guard
);
258 list_add(&node
->jnodes
, &sbinfo
->all_jnodes
);
259 spin_unlock_irq(&sbinfo
->all_guard
);
266 * Remove jnode from ->all_jnodes list.
268 static void jnode_done(jnode
* node
, reiser4_tree
* tree
)
270 reiser4_super_info_data
*sbinfo
;
272 sbinfo
= get_super_private(tree
->super
);
274 spin_lock_irq(&sbinfo
->all_guard
);
275 assert("nikita-2422", !list_empty(&node
->jnodes
));
276 list_del_init(&node
->jnodes
);
277 spin_unlock_irq(&sbinfo
->all_guard
);
281 /* return already existing jnode of page */
282 jnode
*jnode_by_page(struct page
*pg
)
284 assert("nikita-2066", pg
!= NULL
);
285 assert("nikita-2400", PageLocked(pg
));
286 assert("nikita-2068", PagePrivate(pg
));
287 assert("nikita-2067", jprivate(pg
) != NULL
);
291 /* exported functions to allocate/free jnode objects outside this file */
294 jnode
*jal
= kmem_cache_alloc(_jnode_slab
, reiser4_ctx_gfp_mask_get());
298 /* return jnode back to the slab allocator */
299 inline void jfree(jnode
* node
)
301 assert("zam-449", node
!= NULL
);
303 assert("nikita-2663", (list_empty_careful(&node
->capture_link
) &&
304 NODE_LIST(node
) == NOT_CAPTURED
));
305 assert("nikita-3222", list_empty(&node
->jnodes
));
306 assert("nikita-3221", jnode_page(node
) == NULL
);
308 /* not yet phash_jnode_destroy(node); */
310 kmem_cache_free(_jnode_slab
, node
);
314 * This function is supplied as RCU callback. It actually frees jnode when
315 * last reference to it is gone.
317 static void jnode_free_actor(struct rcu_head
*head
)
322 node
= container_of(head
, jnode
, rcu
);
323 jtype
= jnode_get_type(node
);
325 ON_DEBUG(jnode_done(node
, jnode_get_tree(node
)));
330 case JNODE_UNFORMATTED_BLOCK
:
333 case JNODE_FORMATTED_BLOCK
:
338 wrong_return_value("nikita-3197", "Wrong jnode type");
343 * Free a jnode. Post a callback to be executed later through RCU when all
344 * references to @node are released.
346 static inline void jnode_free(jnode
* node
, jnode_type jtype
)
348 if (jtype
!= JNODE_INODE
) {
349 /*assert("nikita-3219", list_empty(&node->rcu.list)); */
350 call_rcu(&node
->rcu
, jnode_free_actor
);
352 jnode_list_remove(node
);
355 /* allocate new unformatted jnode */
356 static jnode
*jnew_unformatted(void)
364 jnode_init(jal
, current_tree
, JNODE_UNFORMATTED_BLOCK
);
365 jal
->key
.j
.mapping
= NULL
;
366 jal
->key
.j
.index
= (unsigned long)-1;
367 jal
->key
.j
.objectid
= 0;
371 /* look for jnode with given mapping and offset within hash table */
372 jnode
*jlookup(reiser4_tree
* tree
, oid_t objectid
, unsigned long index
)
374 struct jnode_key jkey
;
377 assert("nikita-2353", tree
!= NULL
);
379 jkey
.objectid
= objectid
;
383 * hash table is _not_ protected by any lock during lookups. All we
384 * have to do is to disable preemption to keep RCU happy.
388 node
= j_hash_find(&tree
->jhash_table
, &jkey
);
390 /* protect @node from recycling */
392 assert("nikita-2955", jnode_invariant(node
, 0, 0));
393 node
= jnode_rip_check(tree
, node
);
399 /* per inode radix tree of jnodes is protected by tree's read write spin lock */
400 static jnode
*jfind_nolock(struct address_space
*mapping
, unsigned long index
)
402 assert("vs-1694", mapping
->host
!= NULL
);
404 return radix_tree_lookup(jnode_tree_by_inode(mapping
->host
), index
);
407 jnode
*jfind(struct address_space
*mapping
, unsigned long index
)
412 assert("vs-1694", mapping
->host
!= NULL
);
413 tree
= reiser4_tree_by_inode(mapping
->host
);
415 read_lock_tree(tree
);
416 node
= jfind_nolock(mapping
, index
);
419 read_unlock_tree(tree
);
423 static void inode_attach_jnode(jnode
* node
)
427 struct radix_tree_root
*rtree
;
429 assert_rw_write_locked(&(jnode_get_tree(node
)->tree_lock
));
430 assert("zam-1043", node
->key
.j
.mapping
!= NULL
);
431 inode
= node
->key
.j
.mapping
->host
;
432 info
= reiser4_inode_data(inode
);
433 rtree
= jnode_tree_by_reiser4_inode(info
);
434 if (rtree
->rnode
== NULL
) {
435 /* prevent inode from being pruned when it has jnodes attached
437 spin_lock_irq(&inode
->i_data
.tree_lock
);
438 inode
->i_data
.nrpages
++;
439 spin_unlock_irq(&inode
->i_data
.tree_lock
);
441 assert("zam-1049", equi(rtree
->rnode
!= NULL
, info
->nr_jnodes
!= 0));
443 !radix_tree_insert(rtree
, node
->key
.j
.index
, node
));
444 ON_DEBUG(info
->nr_jnodes
++);
447 static void inode_detach_jnode(jnode
* node
)
451 struct radix_tree_root
*rtree
;
453 assert_rw_write_locked(&(jnode_get_tree(node
)->tree_lock
));
454 assert("zam-1044", node
->key
.j
.mapping
!= NULL
);
455 inode
= node
->key
.j
.mapping
->host
;
456 info
= reiser4_inode_data(inode
);
457 rtree
= jnode_tree_by_reiser4_inode(info
);
459 assert("zam-1051", info
->nr_jnodes
!= 0);
460 assert("zam-1052", rtree
->rnode
!= NULL
);
461 ON_DEBUG(info
->nr_jnodes
--);
463 /* delete jnode from inode's radix tree of jnodes */
464 check_me("zam-1046", radix_tree_delete(rtree
, node
->key
.j
.index
));
465 if (rtree
->rnode
== NULL
) {
466 /* inode can be pruned now */
467 spin_lock_irq(&inode
->i_data
.tree_lock
);
468 inode
->i_data
.nrpages
--;
469 spin_unlock_irq(&inode
->i_data
.tree_lock
);
473 /* put jnode into hash table (where they can be found by flush who does not know
474 mapping) and to inode's tree of jnodes (where they can be found (hopefully
475 faster) in places where mapping is known). Currently it is used by
476 fs/reiser4/plugin/item/extent_file_ops.c:index_extent_jnode when new jnode is
479 hash_unformatted_jnode(jnode
* node
, struct address_space
*mapping
,
482 j_hash_table
*jtable
;
484 assert("vs-1446", jnode_is_unformatted(node
));
485 assert("vs-1442", node
->key
.j
.mapping
== 0);
486 assert("vs-1443", node
->key
.j
.objectid
== 0);
487 assert("vs-1444", node
->key
.j
.index
== (unsigned long)-1);
488 assert_rw_write_locked(&(jnode_get_tree(node
)->tree_lock
));
490 node
->key
.j
.mapping
= mapping
;
491 node
->key
.j
.objectid
= get_inode_oid(mapping
->host
);
492 node
->key
.j
.index
= index
;
494 jtable
= &jnode_get_tree(node
)->jhash_table
;
496 /* race with some other thread inserting jnode into the hash table is
497 * impossible, because we keep the page lock. */
499 * following assertion no longer holds because of RCU: it is possible
500 * jnode is in the hash table, but with JNODE_RIP bit set.
502 /* assert("nikita-3211", j_hash_find(jtable, &node->key.j) == NULL); */
503 j_hash_insert_rcu(jtable
, node
);
504 inode_attach_jnode(node
);
507 static void unhash_unformatted_node_nolock(jnode
* node
)
509 assert("vs-1683", node
->key
.j
.mapping
!= NULL
);
511 node
->key
.j
.objectid
==
512 get_inode_oid(node
->key
.j
.mapping
->host
));
514 /* remove jnode from hash-table */
515 j_hash_remove_rcu(&node
->tree
->jhash_table
, node
);
516 inode_detach_jnode(node
);
517 node
->key
.j
.mapping
= NULL
;
518 node
->key
.j
.index
= (unsigned long)-1;
519 node
->key
.j
.objectid
= 0;
523 /* remove jnode from hash table and from inode's tree of jnodes. This is used in
524 reiser4_invalidatepage and in kill_hook_extent -> truncate_inode_jnodes ->
525 reiser4_uncapture_jnode */
526 void unhash_unformatted_jnode(jnode
* node
)
528 assert("vs-1445", jnode_is_unformatted(node
));
530 write_lock_tree(node
->tree
);
531 unhash_unformatted_node_nolock(node
);
532 write_unlock_tree(node
->tree
);
536 * search hash table for a jnode with given oid and index. If not found,
537 * allocate new jnode, insert it, and also insert into radix tree for the
538 * given inode/mapping.
540 static jnode
*find_get_jnode(reiser4_tree
* tree
,
541 struct address_space
*mapping
,
542 oid_t oid
, unsigned long index
)
548 result
= jnew_unformatted();
550 if (unlikely(result
== NULL
))
551 return ERR_PTR(RETERR(-ENOMEM
));
553 preload
= radix_tree_preload(reiser4_ctx_gfp_mask_get());
555 return ERR_PTR(preload
);
557 write_lock_tree(tree
);
558 shadow
= jfind_nolock(mapping
, index
);
559 if (likely(shadow
== NULL
)) {
560 /* add new jnode to hash table and inode's radix tree of
563 hash_unformatted_jnode(result
, mapping
, index
);
565 /* jnode is found in inode's radix tree of jnodes */
567 jnode_free(result
, JNODE_UNFORMATTED_BLOCK
);
568 assert("vs-1498", shadow
->key
.j
.mapping
== mapping
);
571 write_unlock_tree(tree
);
573 assert("nikita-2955",
574 ergo(result
!= NULL
, jnode_invariant(result
, 0, 0)));
575 radix_tree_preload_end();
579 /* jget() (a la zget() but for unformatted nodes). Returns (and possibly
580 creates) jnode corresponding to page @pg. jnode is attached to page and
581 inserted into jnode hash-table. */
582 static jnode
*do_jget(reiser4_tree
* tree
, struct page
*pg
)
585 * There are two ways to create jnode: starting with pre-existing page
588 * When page already exists, jnode is created
589 * (jnode_of_page()->do_jget()) under page lock. This is done in
590 * ->writepage(), or when capturing anonymous page dirtied through
593 * Jnode without page is created by index_extent_jnode().
598 oid_t oid
= get_inode_oid(pg
->mapping
->host
);
600 assert("umka-176", pg
!= NULL
);
601 assert("nikita-2394", PageLocked(pg
));
603 result
= jprivate(pg
);
604 if (likely(result
!= NULL
))
607 tree
= reiser4_tree_by_page(pg
);
609 /* check hash-table first */
610 result
= jfind(pg
->mapping
, pg
->index
);
611 if (unlikely(result
!= NULL
)) {
612 spin_lock_jnode(result
);
613 jnode_attach_page(result
, pg
);
614 spin_unlock_jnode(result
);
615 result
->key
.j
.mapping
= pg
->mapping
;
619 /* since page is locked, jnode should be allocated with GFP_NOFS flag */
620 reiser4_ctx_gfp_mask_force(GFP_NOFS
);
621 result
= find_get_jnode(tree
, pg
->mapping
, oid
, pg
->index
);
622 if (unlikely(IS_ERR(result
)))
624 /* attach jnode to page */
625 spin_lock_jnode(result
);
626 jnode_attach_page(result
, pg
);
627 spin_unlock_jnode(result
);
632 * return jnode for @pg, creating it if necessary.
634 jnode
*jnode_of_page(struct page
*pg
)
638 assert("umka-176", pg
!= NULL
);
639 assert("nikita-2394", PageLocked(pg
));
641 result
= do_jget(reiser4_tree_by_page(pg
), pg
);
643 if (REISER4_DEBUG
&& !IS_ERR(result
)) {
644 assert("nikita-3210", result
== jprivate(pg
));
645 assert("nikita-2046", jnode_page(jprivate(pg
)) == pg
);
646 if (jnode_is_unformatted(jprivate(pg
))) {
647 assert("nikita-2364",
648 jprivate(pg
)->key
.j
.index
== pg
->index
);
649 assert("nikita-2367",
650 jprivate(pg
)->key
.j
.mapping
== pg
->mapping
);
651 assert("nikita-2365",
652 jprivate(pg
)->key
.j
.objectid
==
653 get_inode_oid(pg
->mapping
->host
));
655 jprivate(pg
)->key
.j
.objectid
==
656 pg
->mapping
->host
->i_ino
);
657 assert("nikita-2356",
658 jnode_is_unformatted(jnode_by_page(pg
)));
660 assert("nikita-2956", jnode_invariant(jprivate(pg
), 0, 0));
665 /* attach page to jnode: set ->pg pointer in jnode, and ->private one in the
667 void jnode_attach_page(jnode
* node
, struct page
*pg
)
669 assert("nikita-2060", node
!= NULL
);
670 assert("nikita-2061", pg
!= NULL
);
672 assert("nikita-2050", jprivate(pg
) == 0ul);
673 assert("nikita-2393", !PagePrivate(pg
));
674 assert("vs-1741", node
->pg
== NULL
);
676 assert("nikita-2396", PageLocked(pg
));
677 assert_spin_locked(&(node
->guard
));
680 set_page_private(pg
, (unsigned long)node
);
685 /* Dual to jnode_attach_page: break a binding between page and jnode */
686 void page_clear_jnode(struct page
*page
, jnode
* node
)
688 assert("nikita-2424", page
!= NULL
);
689 assert("nikita-2425", PageLocked(page
));
690 assert("nikita-2426", node
!= NULL
);
691 assert_spin_locked(&(node
->guard
));
692 assert("nikita-2428", PagePrivate(page
));
694 assert("nikita-3551", !PageWriteback(page
));
696 JF_CLR(node
, JNODE_PARSED
);
697 set_page_private(page
, 0ul);
698 ClearPagePrivate(page
);
700 page_cache_release(page
);
704 /* it is only used in one place to handle error */
706 page_detach_jnode(struct page
*page
, struct address_space
*mapping
,
709 assert("nikita-2395", page
!= NULL
);
712 if ((page
->mapping
== mapping
) && (page
->index
== index
)
713 && PagePrivate(page
)) {
716 node
= jprivate(page
);
717 spin_lock_jnode(node
);
718 page_clear_jnode(page
, node
);
719 spin_unlock_jnode(node
);
725 /* return @node page locked.
727 Locking ordering requires that one first takes page lock and afterwards
728 spin lock on node attached to this page. Sometimes it is necessary to go in
729 the opposite direction. This is done through standard trylock-and-release
732 static struct page
*jnode_lock_page(jnode
* node
)
736 assert("nikita-2052", node
!= NULL
);
737 assert("nikita-2401", LOCK_CNT_NIL(spin_locked_jnode
));
741 spin_lock_jnode(node
);
742 page
= jnode_page(node
);
746 /* no need to page_cache_get( page ) here, because page cannot
747 be evicted from memory without detaching it from jnode and
748 this requires spin lock on jnode that we already hold.
750 if (trylock_page(page
)) {
751 /* We won a lock on jnode page, proceed. */
755 /* Page is locked by someone else. */
756 page_cache_get(page
);
757 spin_unlock_jnode(node
);
758 wait_on_page_locked(page
);
759 /* it is possible that page was detached from jnode and
760 returned to the free pool, or re-assigned while we were
761 waiting on locked bit. This will be rechecked on the next
764 page_cache_release(page
);
772 * is JNODE_PARSED bit is not set, call ->parse() method of jnode, to verify
773 * validness of jnode content.
775 static inline int jparse(jnode
* node
)
779 assert("nikita-2466", node
!= NULL
);
781 spin_lock_jnode(node
);
782 if (likely(!jnode_is_parsed(node
))) {
783 result
= jnode_ops(node
)->parse(node
);
784 if (likely(result
== 0))
785 JF_SET(node
, JNODE_PARSED
);
788 spin_unlock_jnode(node
);
792 /* Lock a page attached to jnode, create and attach page to jnode if it had no
794 static struct page
*jnode_get_page_locked(jnode
* node
, gfp_t gfp_flags
)
798 spin_lock_jnode(node
);
799 page
= jnode_page(node
);
802 spin_unlock_jnode(node
);
803 page
= find_or_create_page(jnode_get_mapping(node
),
804 jnode_get_index(node
), gfp_flags
);
806 return ERR_PTR(RETERR(-ENOMEM
));
808 if (trylock_page(page
)) {
809 spin_unlock_jnode(node
);
812 page_cache_get(page
);
813 spin_unlock_jnode(node
);
815 assert("nikita-3134", page
->mapping
== jnode_get_mapping(node
));
818 spin_lock_jnode(node
);
819 if (!jnode_page(node
))
820 jnode_attach_page(node
, page
);
821 spin_unlock_jnode(node
);
823 page_cache_release(page
);
824 assert("zam-894", jnode_page(node
) == page
);
828 /* Start read operation for jnode's page if page is not up-to-date. */
829 static int jnode_start_read(jnode
* node
, struct page
*page
)
831 assert("zam-893", PageLocked(page
));
833 if (PageUptodate(page
)) {
837 return reiser4_page_io(page
, node
, READ
, reiser4_ctx_gfp_mask_get());
841 static void check_jload(jnode
* node
, struct page
*page
)
843 if (jnode_is_znode(node
)) {
848 if (znode_is_any_locked(z
)) {
849 nh
= (node40_header
*) kmap(page
);
850 /* this only works for node40-only file systems. For
852 assert("nikita-3253",
853 z
->nr_items
== le16_to_cpu(get_unaligned(&nh
->nr_items
)));
856 assert("nikita-3565", znode_invariant(z
));
860 #define check_jload(node, page) noop
863 /* prefetch jnode to speed up next call to jload. Call this when you are going
864 * to call jload() shortly. This will bring appropriate portion of jnode into
866 void jload_prefetch(jnode
* node
)
868 prefetchw(&node
->x_count
);
871 /* load jnode's data into memory */
872 int jload_gfp(jnode
* node
/* node to load */ ,
873 gfp_t gfp_flags
/* allocation flags */ ,
874 int do_kmap
/* true if page should be kmapped */)
880 assert("nikita-3010", reiser4_schedulable());
882 prefetchw(&node
->pg
);
884 /* taking d-reference implies taking x-reference. */
888 * acquiring d-reference to @jnode and check for JNODE_PARSED bit
889 * should be atomic, otherwise there is a race against
890 * reiser4_releasepage().
892 spin_lock(&(node
->load
));
894 parsed
= jnode_is_parsed(node
);
895 spin_unlock(&(node
->load
));
897 if (unlikely(!parsed
)) {
898 page
= jnode_get_page_locked(node
, gfp_flags
);
899 if (unlikely(IS_ERR(page
))) {
900 result
= PTR_ERR(page
);
904 result
= jnode_start_read(node
, page
);
905 if (unlikely(result
!= 0))
908 wait_on_page_locked(page
);
909 if (unlikely(!PageUptodate(page
))) {
910 result
= RETERR(-EIO
);
915 node
->data
= kmap(page
);
917 result
= jparse(node
);
918 if (unlikely(result
!= 0)) {
923 check_jload(node
, page
);
925 page
= jnode_page(node
);
926 check_jload(node
, page
);
928 node
->data
= kmap(page
);
931 if (!is_writeout_mode())
932 /* We do not mark pages active if jload is called as a part of
933 * jnode_flush() or reiser4_write_logs(). Both jnode_flush()
934 * and write_logs() add no value to cached data, there is no
935 * sense to mark pages as active when they go to disk, it just
936 * confuses vm scanning routines because clean page could be
937 * moved out from inactive list as a result of this
938 * mark_page_accessed() call. */
939 mark_page_accessed(page
);
949 /* start asynchronous reading for given jnode's page. */
950 int jstartio(jnode
* node
)
954 page
= jnode_get_page_locked(node
, reiser4_ctx_gfp_mask_get());
956 return PTR_ERR(page
);
958 return jnode_start_read(node
, page
);
961 /* Initialize a node by calling appropriate plugin instead of reading
962 * node from disk as in jload(). */
963 int jinit_new(jnode
* node
, gfp_t gfp_flags
)
971 page
= jnode_get_page_locked(node
, gfp_flags
);
973 result
= PTR_ERR(page
);
977 SetPageUptodate(page
);
980 node
->data
= kmap(page
);
982 if (!jnode_is_parsed(node
)) {
983 jnode_plugin
*jplug
= jnode_ops(node
);
984 spin_lock_jnode(node
);
985 result
= jplug
->init(node
);
986 spin_unlock_jnode(node
);
991 JF_SET(node
, JNODE_PARSED
);
1001 /* release a reference to jnode acquired by jload(), decrement ->d_count */
1002 void jrelse_tail(jnode
* node
/* jnode to release references to */)
1004 assert("nikita-489", atomic_read(&node
->d_count
) > 0);
1005 atomic_dec(&node
->d_count
);
1006 /* release reference acquired in jload_gfp() or jinit_new() */
1008 if (jnode_is_unformatted(node
) || jnode_is_znode(node
))
1009 LOCK_CNT_DEC(d_refs
);
1012 /* drop reference to node data. When last reference is dropped, data are
1014 void jrelse(jnode
* node
/* jnode to release references to */)
1018 assert("nikita-487", node
!= NULL
);
1019 assert_spin_not_locked(&(node
->guard
));
1021 page
= jnode_page(node
);
1022 if (likely(page
!= NULL
)) {
1024 * it is safe not to lock jnode here, because at this point
1025 * @node->d_count is greater than zero (if jrelse() is used
1026 * correctly, that is). JNODE_PARSED may be not set yet, if,
1027 * for example, we got here as a result of error handling path
1028 * in jload(). Anyway, page cannot be detached by
1029 * reiser4_releasepage(). truncate will invalidate page
1030 * regardless, but this should not be a problem.
1037 /* called from jput() to wait for io completion */
1038 static void jnode_finish_io(jnode
* node
)
1042 assert("nikita-2922", node
!= NULL
);
1044 spin_lock_jnode(node
);
1045 page
= jnode_page(node
);
1047 page_cache_get(page
);
1048 spin_unlock_jnode(node
);
1049 wait_on_page_writeback(page
);
1050 page_cache_release(page
);
1052 spin_unlock_jnode(node
);
1056 * This is called by jput() when last reference to jnode is released. This is
1057 * separate function, because we want fast path of jput() to be inline and,
1060 void jput_final(jnode
* node
)
1064 /* A fast check for keeping node in cache. We always keep node in cache
1065 * if its page is present and node was not marked for deletion */
1066 if (jnode_page(node
) != NULL
&& !JF_ISSET(node
, JNODE_HEARD_BANSHEE
)) {
1070 r_i_p
= !JF_TEST_AND_SET(node
, JNODE_RIP
);
1072 * if r_i_p is true, we were first to set JNODE_RIP on this node. In
1073 * this case it is safe to access node after unlock.
1077 jnode_finish_io(node
);
1078 if (JF_ISSET(node
, JNODE_HEARD_BANSHEE
))
1079 /* node is removed from the tree. */
1082 jnode_try_drop(node
);
1084 /* if !r_i_p some other thread is already killing it */
1087 int jwait_io(jnode
* node
, int rw
)
1092 assert("zam-447", node
!= NULL
);
1093 assert("zam-448", jnode_page(node
) != NULL
);
1095 page
= jnode_page(node
);
1099 wait_on_page_locked(page
);
1101 assert("nikita-2227", rw
== WRITE
);
1102 wait_on_page_writeback(page
);
1104 if (PageError(page
))
1105 result
= RETERR(-EIO
);
1111 * jnode types and plugins.
1113 * jnode by itself is a "base type". There are several different jnode
1114 * flavors, called "jnode types" (see jnode_type for a list). Sometimes code
1115 * has to do different things based on jnode type. In the standard reiser4 way
1116 * this is done by having jnode plugin (see fs/reiser4/plugin.h:jnode_plugin).
1118 * Functions below deal with jnode types and define methods of jnode plugin.
1122 /* set jnode type. This is done during jnode initialization. */
1123 static void jnode_set_type(jnode
* node
, jnode_type type
)
1125 static unsigned long type_to_mask
[] = {
1126 [JNODE_UNFORMATTED_BLOCK
] = 1,
1127 [JNODE_FORMATTED_BLOCK
] = 0,
1129 [JNODE_IO_HEAD
] = 6,
1133 assert("zam-647", type
< LAST_JNODE_TYPE
);
1134 assert("nikita-2815", !jnode_is_loaded(node
));
1135 assert("nikita-3386", node
->state
== 0);
1137 node
->state
|= (type_to_mask
[type
] << JNODE_TYPE_1
);
1140 /* ->init() method of jnode plugin for jnodes that don't require plugin
1141 * specific initialization. */
1142 static int init_noinit(jnode
* node UNUSED_ARG
)
1147 /* ->parse() method of jnode plugin for jnodes that don't require plugin
1148 * specific pasring. */
1149 static int parse_noparse(jnode
* node UNUSED_ARG
)
1154 /* ->mapping() method for unformatted jnode */
1155 struct address_space
*mapping_jnode(const jnode
* node
)
1157 struct address_space
*map
;
1159 assert("nikita-2713", node
!= NULL
);
1161 /* mapping is stored in jnode */
1163 map
= node
->key
.j
.mapping
;
1164 assert("nikita-2714", map
!= NULL
);
1165 assert("nikita-2897", is_reiser4_inode(map
->host
));
1166 assert("nikita-2715", get_inode_oid(map
->host
) == node
->key
.j
.objectid
);
1170 /* ->index() method for unformatted jnodes */
1171 unsigned long index_jnode(const jnode
* node
)
1173 /* index is stored in jnode */
1174 return node
->key
.j
.index
;
1177 /* ->remove() method for unformatted jnodes */
1178 static inline void remove_jnode(jnode
* node
, reiser4_tree
* tree
)
1180 /* remove jnode from hash table and radix tree */
1181 if (node
->key
.j
.mapping
)
1182 unhash_unformatted_node_nolock(node
);
1185 /* ->mapping() method for znodes */
1186 static struct address_space
*mapping_znode(const jnode
* node
)
1188 /* all znodes belong to fake inode */
1189 return reiser4_get_super_fake(jnode_get_tree(node
)->super
)->i_mapping
;
1192 /* ->index() method for znodes */
1193 static unsigned long index_znode(const jnode
* node
)
1196 assert("nikita-3317", (1 << znode_shift_order
) < sizeof(znode
));
1198 /* index of znode is just its address (shifted) */
1199 addr
= (unsigned long)node
;
1200 return (addr
- PAGE_OFFSET
) >> znode_shift_order
;
1203 /* ->mapping() method for bitmap jnode */
1204 static struct address_space
*mapping_bitmap(const jnode
* node
)
1206 /* all bitmap blocks belong to special bitmap inode */
1207 return get_super_private(jnode_get_tree(node
)->super
)->bitmap
->
1211 /* ->index() method for jnodes that are indexed by address */
1212 static unsigned long index_is_address(const jnode
* node
)
1216 ind
= (unsigned long)node
;
1217 return ind
- PAGE_OFFSET
;
1220 /* resolve race with jput */
1221 jnode
*jnode_rip_sync(reiser4_tree
*tree
, jnode
*node
)
1224 * This is used as part of RCU-based jnode handling.
1226 * jlookup(), zlook(), zget(), and cbk_cache_scan_slots() have to work
1227 * with unreferenced jnodes (ones with ->x_count == 0). Hash table is
1228 * not protected during this, so concurrent thread may execute
1229 * zget-set-HEARD_BANSHEE-zput, or somehow else cause jnode to be
1230 * freed in jput_final(). To avoid such races, jput_final() sets
1231 * JNODE_RIP on jnode (under tree lock). All places that work with
1232 * unreferenced jnodes call this function. It checks for JNODE_RIP bit
1233 * (first without taking tree lock), and if this bit is set, released
1234 * reference acquired by the current thread and returns NULL.
1236 * As a result, if jnode is being concurrently freed, NULL is returned
1237 * and caller should pretend that jnode wasn't found in the first
1240 * Otherwise it's safe to release "rcu-read-lock" and continue with
1243 if (unlikely(JF_ISSET(node
, JNODE_RIP
))) {
1244 read_lock_tree(tree
);
1245 if (JF_ISSET(node
, JNODE_RIP
)) {
1249 read_unlock_tree(tree
);
1254 reiser4_key
*jnode_build_key(const jnode
* node
, reiser4_key
* key
)
1256 struct inode
*inode
;
1260 assert("nikita-3092", node
!= NULL
);
1261 assert("nikita-3093", key
!= NULL
);
1262 assert("nikita-3094", jnode_is_unformatted(node
));
1264 off
= ((loff_t
) index_jnode(node
)) << PAGE_CACHE_SHIFT
;
1265 inode
= mapping_jnode(node
)->host
;
1267 if (node
->parent_item_id
!= 0)
1268 iplug
= item_plugin_by_id(node
->parent_item_id
);
1272 if (iplug
!= NULL
&& iplug
->f
.key_by_offset
)
1273 iplug
->f
.key_by_offset(inode
, off
, key
);
1277 fplug
= inode_file_plugin(inode
);
1278 assert("zam-1007", fplug
!= NULL
);
1279 assert("zam-1008", fplug
->key_by_inode
!= NULL
);
1281 fplug
->key_by_inode(inode
, off
, key
);
1287 /* ->parse() method for formatted nodes */
1288 static int parse_znode(jnode
* node
)
1290 return zparse(JZNODE(node
));
1293 /* ->delete() method for formatted nodes */
1294 static void delete_znode(jnode
* node
, reiser4_tree
* tree
)
1298 assert_rw_write_locked(&(tree
->tree_lock
));
1299 assert("vs-898", JF_ISSET(node
, JNODE_HEARD_BANSHEE
));
1302 assert("vs-899", z
->c_count
== 0);
1304 /* delete znode from sibling list. */
1305 sibling_list_remove(z
);
1307 znode_remove(z
, tree
);
1310 /* ->remove() method for formatted nodes */
1311 static int remove_znode(jnode
* node
, reiser4_tree
* tree
)
1315 assert_rw_write_locked(&(tree
->tree_lock
));
1318 if (z
->c_count
== 0) {
1319 /* detach znode from sibling list. */
1320 sibling_list_drop(z
);
1321 /* this is called with tree spin-lock held, so call
1322 znode_remove() directly (rather than znode_lock_remove()). */
1323 znode_remove(z
, tree
);
1326 return RETERR(-EBUSY
);
1329 /* ->init() method for formatted nodes */
1330 static int init_znode(jnode
* node
)
1335 /* call node plugin to do actual initialization */
1336 return z
->nplug
->init(z
);
1339 /* ->clone() method for formatted nodes */
1340 static jnode
*clone_formatted(jnode
* node
)
1344 assert("vs-1430", jnode_is_znode(node
));
1345 clone
= zalloc(reiser4_ctx_gfp_mask_get());
1347 return ERR_PTR(RETERR(-ENOMEM
));
1348 zinit(clone
, NULL
, current_tree
);
1349 jnode_set_block(ZJNODE(clone
), jnode_get_block(node
));
1350 /* ZJNODE(clone)->key.z is not initialized */
1351 clone
->level
= JZNODE(node
)->level
;
1353 return ZJNODE(clone
);
1356 /* jplug->clone for unformatted nodes */
1357 static jnode
*clone_unformatted(jnode
* node
)
1361 assert("vs-1431", jnode_is_unformatted(node
));
1364 return ERR_PTR(RETERR(-ENOMEM
));
1366 jnode_init(clone
, current_tree
, JNODE_UNFORMATTED_BLOCK
);
1367 jnode_set_block(clone
, jnode_get_block(node
));
1374 * Setup jnode plugin methods for various jnode types.
1376 jnode_plugin jnode_plugins
[LAST_JNODE_TYPE
] = {
1377 [JNODE_UNFORMATTED_BLOCK
] = {
1379 .type_id
= REISER4_JNODE_PLUGIN_TYPE
,
1380 .id
= JNODE_UNFORMATTED_BLOCK
,
1382 .label
= "unformatted",
1383 .desc
= "unformatted node",
1384 .linkage
= {NULL
, NULL
}
1386 .init
= init_noinit
,
1387 .parse
= parse_noparse
,
1388 .mapping
= mapping_jnode
,
1389 .index
= index_jnode
,
1390 .clone
= clone_unformatted
1392 [JNODE_FORMATTED_BLOCK
] = {
1394 .type_id
= REISER4_JNODE_PLUGIN_TYPE
,
1395 .id
= JNODE_FORMATTED_BLOCK
,
1397 .label
= "formatted",
1398 .desc
= "formatted tree node",
1399 .linkage
= {NULL
, NULL
}
1402 .parse
= parse_znode
,
1403 .mapping
= mapping_znode
,
1404 .index
= index_znode
,
1405 .clone
= clone_formatted
1409 .type_id
= REISER4_JNODE_PLUGIN_TYPE
,
1413 .desc
= "bitmap node",
1414 .linkage
= {NULL
, NULL
}
1416 .init
= init_noinit
,
1417 .parse
= parse_noparse
,
1418 .mapping
= mapping_bitmap
,
1419 .index
= index_is_address
,
1424 .type_id
= REISER4_JNODE_PLUGIN_TYPE
,
1425 .id
= JNODE_IO_HEAD
,
1429 .linkage
= {NULL
, NULL
}
1431 .init
= init_noinit
,
1432 .parse
= parse_noparse
,
1433 .mapping
= mapping_bitmap
,
1434 .index
= index_is_address
,
1439 .type_id
= REISER4_JNODE_PLUGIN_TYPE
,
1443 .desc
= "inode's builtin jnode",
1444 .linkage
= {NULL
, NULL
}
1455 * jnode destruction.
1457 * Thread may use a jnode after it acquired a reference to it. References are
1458 * counted in ->x_count field. Reference protects jnode from being
1459 * recycled. This is different from protecting jnode data (that are stored in
1460 * jnode page) from being evicted from memory. Data are protected by jload()
1461 * and released by jrelse().
1463 * If thread already possesses a reference to the jnode it can acquire another
1464 * one through jref(). Initial reference is obtained (usually) by locating
1465 * jnode in some indexing structure that depends on jnode type: formatted
1466 * nodes are kept in global hash table, where they are indexed by block
1467 * number, and also in the cbk cache. Unformatted jnodes are also kept in hash
1468 * table, which is indexed by oid and offset within file, and in per-inode
1471 * Reference to jnode is released by jput(). If last reference is released,
1472 * jput_final() is called. This function determines whether jnode has to be
1473 * deleted (this happens when corresponding node is removed from the file
1474 * system, jnode is marked with JNODE_HEARD_BANSHEE bit in this case), or it
1475 * should be just "removed" (deleted from memory).
1477 * Jnode destruction is signally delicate dance because of locking and RCU.
1481 * Returns true if jnode cannot be removed right now. This check is called
1482 * under tree lock. If it returns true, jnode is irrevocably committed to be
1485 static inline int jnode_is_busy(const jnode
* node
, jnode_type jtype
)
1487 /* if other thread managed to acquire a reference to this jnode, don't
1489 if (atomic_read(&node
->x_count
) > 0)
1491 /* also, don't free znode that has children in memory */
1492 if (jtype
== JNODE_FORMATTED_BLOCK
&& JZNODE(node
)->c_count
> 0)
1498 * this is called as part of removing jnode. Based on jnode type, call
1499 * corresponding function that removes jnode from indices and returns it back
1500 * to the appropriate slab (through RCU).
1503 jnode_remove(jnode
* node
, jnode_type jtype
, reiser4_tree
* tree
)
1506 case JNODE_UNFORMATTED_BLOCK
:
1507 remove_jnode(node
, tree
);
1514 case JNODE_FORMATTED_BLOCK
:
1515 remove_znode(node
, tree
);
1518 wrong_return_value("nikita-3196", "Wrong jnode type");
1523 * this is called as part of deleting jnode. Based on jnode type, call
1524 * corresponding function that removes jnode from indices and returns it back
1525 * to the appropriate slab (through RCU).
1527 * This differs from jnode_remove() only for formatted nodes---for them
1528 * sibling list handling is different for removal and deletion.
1531 jnode_delete(jnode
* node
, jnode_type jtype
, reiser4_tree
* tree UNUSED_ARG
)
1534 case JNODE_UNFORMATTED_BLOCK
:
1535 remove_jnode(node
, tree
);
1540 case JNODE_FORMATTED_BLOCK
:
1541 delete_znode(node
, tree
);
1545 wrong_return_value("nikita-3195", "Wrong jnode type");
1551 * remove jnode from the debugging list of all jnodes hanging off super-block.
1553 void jnode_list_remove(jnode
* node
)
1555 reiser4_super_info_data
*sbinfo
;
1557 sbinfo
= get_super_private(jnode_get_tree(node
)->super
);
1559 spin_lock_irq(&sbinfo
->all_guard
);
1560 assert("nikita-2422", !list_empty(&node
->jnodes
));
1561 list_del_init(&node
->jnodes
);
1562 spin_unlock_irq(&sbinfo
->all_guard
);
1567 * this is called by jput_final() to remove jnode when last reference to it is
1570 static int jnode_try_drop(jnode
* node
)
1576 assert("nikita-2491", node
!= NULL
);
1577 assert("nikita-2583", JF_ISSET(node
, JNODE_RIP
));
1579 tree
= jnode_get_tree(node
);
1580 jtype
= jnode_get_type(node
);
1582 spin_lock_jnode(node
);
1583 write_lock_tree(tree
);
1585 * if jnode has a page---leave it alone. Memory pressure will
1586 * eventually kill page and jnode.
1588 if (jnode_page(node
) != NULL
) {
1589 write_unlock_tree(tree
);
1590 spin_unlock_jnode(node
);
1591 JF_CLR(node
, JNODE_RIP
);
1592 return RETERR(-EBUSY
);
1595 /* re-check ->x_count under tree lock. */
1596 result
= jnode_is_busy(node
, jtype
);
1598 assert("nikita-2582", !JF_ISSET(node
, JNODE_HEARD_BANSHEE
));
1599 assert("jmacd-511/b", atomic_read(&node
->d_count
) == 0);
1601 spin_unlock_jnode(node
);
1602 /* no page and no references---despatch him. */
1603 jnode_remove(node
, jtype
, tree
);
1604 write_unlock_tree(tree
);
1605 jnode_free(node
, jtype
);
1607 /* busy check failed: reference was acquired by concurrent
1609 write_unlock_tree(tree
);
1610 spin_unlock_jnode(node
);
1611 JF_CLR(node
, JNODE_RIP
);
1616 /* jdelete() -- Delete jnode from the tree and file system */
1617 static int jdelete(jnode
* node
/* jnode to finish with */)
1624 assert("nikita-467", node
!= NULL
);
1625 assert("nikita-2531", JF_ISSET(node
, JNODE_RIP
));
1627 jtype
= jnode_get_type(node
);
1629 page
= jnode_lock_page(node
);
1630 assert_spin_locked(&(node
->guard
));
1632 tree
= jnode_get_tree(node
);
1634 write_lock_tree(tree
);
1635 /* re-check ->x_count under tree lock. */
1636 result
= jnode_is_busy(node
, jtype
);
1637 if (likely(!result
)) {
1638 assert("nikita-2123", JF_ISSET(node
, JNODE_HEARD_BANSHEE
));
1639 assert("jmacd-511", atomic_read(&node
->d_count
) == 0);
1644 * FIXME this is racy against jnode_extent_write().
1646 page_clear_jnode(page
, node
);
1648 spin_unlock_jnode(node
);
1650 jnode_delete(node
, jtype
, tree
);
1651 write_unlock_tree(tree
);
1652 jnode_free(node
, jtype
);
1653 /* @node is no longer valid pointer */
1655 reiser4_drop_page(page
);
1657 /* busy check failed: reference was acquired by concurrent
1659 JF_CLR(node
, JNODE_RIP
);
1660 write_unlock_tree(tree
);
1661 spin_unlock_jnode(node
);
1668 /* drop jnode on the floor.
1672 -EBUSY: failed to drop jnode, because there are still references to it
1674 0: successfully dropped jnode
1677 static int jdrop_in_tree(jnode
* node
, reiser4_tree
* tree
)
1683 assert("zam-602", node
!= NULL
);
1684 assert_rw_not_read_locked(&(tree
->tree_lock
));
1685 assert_rw_not_write_locked(&(tree
->tree_lock
));
1686 assert("nikita-2403", !JF_ISSET(node
, JNODE_HEARD_BANSHEE
));
1688 jtype
= jnode_get_type(node
);
1690 page
= jnode_lock_page(node
);
1691 assert_spin_locked(&(node
->guard
));
1693 write_lock_tree(tree
);
1695 /* re-check ->x_count under tree lock. */
1696 result
= jnode_is_busy(node
, jtype
);
1698 assert("nikita-2488", page
== jnode_page(node
));
1699 assert("nikita-2533", atomic_read(&node
->d_count
) == 0);
1701 assert("nikita-2126", !PageDirty(page
));
1702 assert("nikita-2127", PageUptodate(page
));
1703 assert("nikita-2181", PageLocked(page
));
1704 page_clear_jnode(page
, node
);
1706 spin_unlock_jnode(node
);
1707 jnode_remove(node
, jtype
, tree
);
1708 write_unlock_tree(tree
);
1709 jnode_free(node
, jtype
);
1711 reiser4_drop_page(page
);
1713 /* busy check failed: reference was acquired by concurrent
1715 JF_CLR(node
, JNODE_RIP
);
1716 write_unlock_tree(tree
);
1717 spin_unlock_jnode(node
);
1724 /* This function frees jnode "if possible". In particular, [dcx]_count has to
1725 be 0 (where applicable). */
1726 void jdrop(jnode
* node
)
1728 jdrop_in_tree(node
, jnode_get_tree(node
));
1731 /* IO head jnode implementation; The io heads are simple j-nodes with limited
1732 functionality (these j-nodes are not in any hash table) just for reading
1733 from and writing to disk. */
1735 jnode
*reiser4_alloc_io_head(const reiser4_block_nr
* block
)
1737 jnode
*jal
= jalloc();
1740 jnode_init(jal
, current_tree
, JNODE_IO_HEAD
);
1741 jnode_set_block(jal
, block
);
1749 void reiser4_drop_io_head(jnode
* node
)
1751 assert("zam-648", jnode_get_type(node
) == JNODE_IO_HEAD
);
1757 /* protect keep jnode data from reiser4_releasepage() */
1758 void pin_jnode_data(jnode
* node
)
1760 assert("zam-671", jnode_page(node
) != NULL
);
1761 page_cache_get(jnode_page(node
));
1764 /* make jnode data free-able again */
1765 void unpin_jnode_data(jnode
* node
)
1767 assert("zam-672", jnode_page(node
) != NULL
);
1768 page_cache_release(jnode_page(node
));
1771 struct address_space
*jnode_get_mapping(const jnode
* node
)
1773 assert("nikita-3162", node
!= NULL
);
1774 return jnode_ops(node
)->mapping(node
);
1778 /* debugging aid: jnode invariant */
1779 int jnode_invariant_f(const jnode
* node
, char const **msg
)
1781 #define _ergo(ant, con) \
1782 ((*msg) = "{" #ant "} ergo {" #con "}", ergo((ant), (con)))
1783 #define _check(exp) ((*msg) = #exp, (exp))
1785 return _check(node
!= NULL
) &&
1786 /* [jnode-queued] */
1787 /* only relocated node can be queued, except that when znode
1788 * is being deleted, its JNODE_RELOC bit is cleared */
1789 _ergo(JF_ISSET(node
, JNODE_FLUSH_QUEUED
),
1790 JF_ISSET(node
, JNODE_RELOC
) ||
1791 JF_ISSET(node
, JNODE_HEARD_BANSHEE
)) &&
1792 _check(node
->jnodes
.prev
!= NULL
) &&
1793 _check(node
->jnodes
.next
!= NULL
) &&
1794 /* [jnode-dirty] invariant */
1795 /* dirty inode is part of atom */
1796 _ergo(JF_ISSET(node
, JNODE_DIRTY
), node
->atom
!= NULL
) &&
1797 /* [jnode-oid] invariant */
1798 /* for unformatted node ->objectid and ->mapping fields are
1800 _ergo(jnode_is_unformatted(node
) && node
->key
.j
.mapping
!= NULL
,
1801 node
->key
.j
.objectid
==
1802 get_inode_oid(node
->key
.j
.mapping
->host
)) &&
1803 /* [jnode-atom-valid] invariant */
1804 /* node atom has valid state */
1805 _ergo(node
->atom
!= NULL
, node
->atom
->stage
!= ASTAGE_INVALID
) &&
1806 /* [jnode-page-binding] invariant */
1807 /* if node points to page, it points back to node */
1808 _ergo(node
->pg
!= NULL
, jprivate(node
->pg
) == node
) &&
1809 /* [jnode-refs] invariant */
1810 /* only referenced jnode can be loaded */
1811 _check(atomic_read(&node
->x_count
) >= atomic_read(&node
->d_count
));
1815 static const char *jnode_type_name(jnode_type type
)
1818 case JNODE_UNFORMATTED_BLOCK
:
1819 return "unformatted";
1820 case JNODE_FORMATTED_BLOCK
:
1828 case LAST_JNODE_TYPE
:
1831 static char unknown
[30];
1833 sprintf(unknown
, "unknown %i", type
);
1839 #define jnode_state_name(node, flag) \
1840 (JF_ISSET((node), (flag)) ? ((#flag "|")+6) : "")
1842 /* debugging aid: output human readable information about @node */
1843 static void info_jnode(const char *prefix
/* prefix to print */ ,
1844 const jnode
* node
/* node to print */)
1846 assert("umka-068", prefix
!= NULL
);
1849 printk("%s: null\n", prefix
);
1854 ("%s: %p: state: %lx: [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s], level: %i,"
1855 " block: %s, d_count: %d, x_count: %d, "
1856 "pg: %p, atom: %p, lock: %i:%i, type: %s, ", prefix
, node
,
1858 jnode_state_name(node
, JNODE_PARSED
),
1859 jnode_state_name(node
, JNODE_HEARD_BANSHEE
),
1860 jnode_state_name(node
, JNODE_LEFT_CONNECTED
),
1861 jnode_state_name(node
, JNODE_RIGHT_CONNECTED
),
1862 jnode_state_name(node
, JNODE_ORPHAN
),
1863 jnode_state_name(node
, JNODE_CREATED
),
1864 jnode_state_name(node
, JNODE_RELOC
),
1865 jnode_state_name(node
, JNODE_OVRWR
),
1866 jnode_state_name(node
, JNODE_DIRTY
),
1867 jnode_state_name(node
, JNODE_IS_DYING
),
1868 jnode_state_name(node
, JNODE_RIP
),
1869 jnode_state_name(node
, JNODE_MISSED_IN_CAPTURE
),
1870 jnode_state_name(node
, JNODE_WRITEBACK
),
1871 jnode_state_name(node
, JNODE_NEW
),
1872 jnode_state_name(node
, JNODE_DKSET
),
1873 jnode_state_name(node
, JNODE_REPACK
),
1874 jnode_state_name(node
, JNODE_CLUSTER_PAGE
),
1875 jnode_get_level(node
), sprint_address(jnode_get_block(node
)),
1876 atomic_read(&node
->d_count
), atomic_read(&node
->x_count
),
1877 jnode_page(node
), node
->atom
, 0, 0,
1878 jnode_type_name(jnode_get_type(node
)));
1879 if (jnode_is_unformatted(node
)) {
1880 printk("inode: %llu, index: %lu, ",
1881 node
->key
.j
.objectid
, node
->key
.j
.index
);
1885 /* debugging aid: check znode invariant and panic if it doesn't hold */
1886 static int jnode_invariant(const jnode
* node
, int tlocked
, int jlocked
)
1888 char const *failed_msg
;
1892 tree
= jnode_get_tree(node
);
1894 assert("umka-063312", node
!= NULL
);
1895 assert("umka-064321", tree
!= NULL
);
1897 if (!jlocked
&& !tlocked
)
1898 spin_lock_jnode((jnode
*) node
);
1900 read_lock_tree(jnode_get_tree(node
));
1901 result
= jnode_invariant_f(node
, &failed_msg
);
1903 info_jnode("corrupted node", node
);
1904 warning("jmacd-555", "Condition %s failed", failed_msg
);
1907 read_unlock_tree(jnode_get_tree(node
));
1908 if (!jlocked
&& !tlocked
)
1909 spin_unlock_jnode((jnode
*) node
);
1913 #endif /* REISER4_DEBUG */
1915 /* Make Linus happy.
1917 c-indentation-style: "K&R"