On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / jnode.h
blobc7ad1b720531f25202d0180f6262a6304de011d0
1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /* Declaration of jnode. See jnode.c for details. */
6 #ifndef __JNODE_H__
7 #define __JNODE_H__
9 #include "forward.h"
10 #include "type_safe_hash.h"
11 #include "txnmgr.h"
12 #include "key.h"
13 #include "debug.h"
14 #include "dformat.h"
15 #include "page_cache.h"
16 #include "context.h"
18 #include "plugin/plugin.h"
20 #include <linux/fs.h>
21 #include <linux/mm.h>
22 #include <linux/spinlock.h>
23 #include <asm/atomic.h>
24 #include <linux/bitops.h>
25 #include <linux/list.h>
26 #include <linux/rcupdate.h>
28 /* declare hash table of jnodes (jnodes proper, that is, unformatted
29 nodes) */
30 TYPE_SAFE_HASH_DECLARE(j, jnode);
32 /* declare hash table of znodes */
33 TYPE_SAFE_HASH_DECLARE(z, znode);
35 struct jnode_key {
36 __u64 objectid;
37 unsigned long index;
38 struct address_space *mapping;
42 Jnode is the "base class" of other nodes in reiser4. It is also happens to
43 be exactly the node we use for unformatted tree nodes.
45 Jnode provides following basic functionality:
47 . reference counting and indexing.
49 . integration with page cache. Jnode has ->pg reference to which page can
50 be attached.
52 . interface to transaction manager. It is jnode that is kept in transaction
53 manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this
54 means, there should be special type of jnode for inode.)
56 Locking:
58 Spin lock: the following fields are protected by the per-jnode spin lock:
60 ->state
61 ->atom
62 ->capture_link
64 Following fields are protected by the global tree lock:
66 ->link
67 ->key.z (content of ->key.z is only changed in znode_rehash())
68 ->key.j
70 Atomic counters
72 ->x_count
73 ->d_count
75 ->pg, and ->data are protected by spin lock for unused jnode and are
76 immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable()
77 is false).
79 ->tree is immutable after creation
81 Unclear
83 ->blocknr: should be under jnode spin-lock, but current interface is based
84 on passing of block address.
86 If you ever need to spin lock two nodes at once, do this in "natural"
87 memory order: lock znode with lower address first. (See lock_two_nodes().)
89 Invariants involving this data-type:
91 [jnode-dirty]
92 [jnode-refs]
93 [jnode-oid]
94 [jnode-queued]
95 [jnode-atom-valid]
96 [jnode-page-binding]
99 struct jnode {
100 #if REISER4_DEBUG
101 #define JMAGIC 0x52654973 /* "ReIs" */
102 int magic;
103 #endif
104 /* FIRST CACHE LINE (16 bytes): data used by jload */
106 /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */
107 /* 0 */ unsigned long state;
109 /* lock, protecting jnode's fields. */
110 /* 4 */ spinlock_t load;
112 /* counter of references to jnode itself. Increased on jref().
113 Decreased on jput().
115 /* 8 */ atomic_t x_count;
117 /* counter of references to jnode's data. Pin data page(s) in
118 memory while this is greater than 0. Increased on jload().
119 Decreased on jrelse().
121 /* 12 */ atomic_t d_count;
123 /* SECOND CACHE LINE: data used by hash table lookups */
125 /* 16 */ union {
126 /* znodes are hashed by block number */
127 reiser4_block_nr z;
128 /* unformatted nodes are hashed by mapping plus offset */
129 struct jnode_key j;
130 } key;
132 /* THIRD CACHE LINE */
134 /* 32 */ union {
135 /* pointers to maintain hash-table */
136 z_hash_link z;
137 j_hash_link j;
138 } link;
140 /* pointer to jnode page. */
141 /* 36 */ struct page *pg;
142 /* pointer to node itself. This is page_address(node->pg) when page is
143 attached to the jnode
145 /* 40 */ void *data;
147 /* 44 */ reiser4_tree *tree;
149 /* FOURTH CACHE LINE: atom related fields */
151 /* 48 */ spinlock_t guard;
153 /* atom the block is in, if any */
154 /* 52 */ txn_atom *atom;
156 /* capture list */
157 /* 56 */ struct list_head capture_link;
159 /* FIFTH CACHE LINE */
161 /* 64 */ struct rcu_head rcu;
162 /* crosses cache line */
164 /* SIXTH CACHE LINE */
166 /* the real blocknr (where io is going to/from) */
167 /* 80 */ reiser4_block_nr blocknr;
168 /* Parent item type, unformatted and CRC need it for
169 * offset => key conversion. */
170 /* NOTE: this parent_item_id looks like jnode type. */
171 /* 88 */ reiser4_plugin_id parent_item_id;
172 /* 92 */
173 #if REISER4_DEBUG
174 /* list of all jnodes for debugging purposes. */
175 struct list_head jnodes;
176 /* how many times this jnode was written in one transaction */
177 int written;
178 /* this indicates which atom's list the jnode is on */
179 atom_list list;
180 #endif
181 } __attribute__ ((aligned(16)));
184 * jnode types. Enumeration of existing jnode types.
186 typedef enum {
187 JNODE_UNFORMATTED_BLOCK, /* unformatted block */
188 JNODE_FORMATTED_BLOCK, /* formatted block, znode */
189 JNODE_BITMAP, /* bitmap */
190 JNODE_IO_HEAD, /* jnode representing a block in the
191 * wandering log */
192 JNODE_INODE, /* jnode embedded into inode */
193 LAST_JNODE_TYPE
194 } jnode_type;
196 /* jnode states */
197 typedef enum {
198 /* jnode's page is loaded and data checked */
199 JNODE_PARSED = 0,
200 /* node was deleted, not all locks on it were released. This
201 node is empty and is going to be removed from the tree
202 shortly. */
203 JNODE_HEARD_BANSHEE = 1,
204 /* left sibling pointer is valid */
205 JNODE_LEFT_CONNECTED = 2,
206 /* right sibling pointer is valid */
207 JNODE_RIGHT_CONNECTED = 3,
209 /* znode was just created and doesn't yet have a pointer from
210 its parent */
211 JNODE_ORPHAN = 4,
213 /* this node was created by its transaction and has not been assigned
214 a block address. */
215 JNODE_CREATED = 5,
217 /* this node is currently relocated */
218 JNODE_RELOC = 6,
219 /* this node is currently wandered */
220 JNODE_OVRWR = 7,
222 /* this znode has been modified */
223 JNODE_DIRTY = 8,
225 /* znode lock is being invalidated */
226 JNODE_IS_DYING = 9,
228 /* THIS PLACE IS INTENTIONALLY LEFT BLANK */
230 /* jnode is queued for flushing. */
231 JNODE_FLUSH_QUEUED = 12,
233 /* In the following bits jnode type is encoded. */
234 JNODE_TYPE_1 = 13,
235 JNODE_TYPE_2 = 14,
236 JNODE_TYPE_3 = 15,
238 /* jnode is being destroyed */
239 JNODE_RIP = 16,
241 /* znode was not captured during locking (it might so be because
242 ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */
243 JNODE_MISSED_IN_CAPTURE = 17,
245 /* write is in progress */
246 JNODE_WRITEBACK = 18,
248 /* FIXME: now it is used by crypto-compress plugin only */
249 JNODE_NEW = 19,
251 /* delimiting keys are already set for this znode. */
252 JNODE_DKSET = 20,
254 /* when this bit is set page and jnode can not be disconnected */
255 JNODE_WRITE_PREPARED = 21,
257 JNODE_CLUSTER_PAGE = 22,
258 /* Jnode is marked for repacking, that means the reiser4 flush and the
259 * block allocator should process this node special way */
260 JNODE_REPACK = 23,
261 /* node should be converted by flush in squalloc phase */
262 JNODE_CONVERTIBLE = 24,
264 * When jnode is dirtied for the first time in given transaction,
265 * do_jnode_make_dirty() checks whether this jnode can possible became
266 * member of overwrite set. If so, this bit is set, and one block is
267 * reserved in the ->flush_reserved space of atom.
269 * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when
271 * (1) flush decides that we want this block to go into relocate
272 * set after all.
274 * (2) wandering log is allocated (by log writer)
276 * (3) extent is allocated
279 JNODE_FLUSH_RESERVED = 29
280 } reiser4_jnode_state;
282 /* Macros for accessing the jnode state. */
284 static inline void JF_CLR(jnode * j, int f)
286 assert("unknown-1", j->magic == JMAGIC);
287 clear_bit(f, &j->state);
289 static inline int JF_ISSET(const jnode * j, int f)
291 assert("unknown-2", j->magic == JMAGIC);
292 return test_bit(f, &((jnode *) j)->state);
294 static inline void JF_SET(jnode * j, int f)
296 assert("unknown-3", j->magic == JMAGIC);
297 set_bit(f, &j->state);
300 static inline int JF_TEST_AND_SET(jnode * j, int f)
302 assert("unknown-4", j->magic == JMAGIC);
303 return test_and_set_bit(f, &j->state);
306 static inline void spin_lock_jnode(jnode *node)
308 /* check that spinlocks of lower priorities are not held */
309 assert("", (LOCK_CNT_NIL(rw_locked_tree) &&
310 LOCK_CNT_NIL(spin_locked_txnh) &&
311 LOCK_CNT_NIL(spin_locked_zlock) &&
312 LOCK_CNT_NIL(rw_locked_dk) &&
313 LOCK_CNT_LT(spin_locked_jnode, 2)));
315 spin_lock(&(node->guard));
317 LOCK_CNT_INC(spin_locked_jnode);
318 LOCK_CNT_INC(spin_locked);
321 static inline void spin_unlock_jnode(jnode *node)
323 assert_spin_locked(&(node->guard));
324 assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode));
325 assert("nikita-1376", LOCK_CNT_GTZ(spin_locked));
327 LOCK_CNT_DEC(spin_locked_jnode);
328 LOCK_CNT_DEC(spin_locked);
330 spin_unlock(&(node->guard));
333 static inline int jnode_is_in_deleteset(const jnode * node)
335 return JF_ISSET(node, JNODE_RELOC);
338 extern int init_jnodes(void);
339 extern void done_jnodes(void);
341 /* Jnode routines */
342 extern jnode *jalloc(void);
343 extern void jfree(jnode * node) NONNULL;
344 extern jnode *jclone(jnode *);
345 extern jnode *jlookup(reiser4_tree * tree,
346 oid_t objectid, unsigned long ind) NONNULL;
347 extern jnode *jfind(struct address_space *, unsigned long index) NONNULL;
348 extern jnode *jnode_by_page(struct page *pg) NONNULL;
349 extern jnode *jnode_of_page(struct page *pg) NONNULL;
350 void jnode_attach_page(jnode * node, struct page *pg);
352 void unhash_unformatted_jnode(jnode *);
353 extern jnode *page_next_jnode(jnode * node) NONNULL;
354 extern void jnode_init(jnode * node, reiser4_tree * tree, jnode_type) NONNULL;
355 extern void jnode_make_dirty(jnode * node) NONNULL;
356 extern void jnode_make_clean(jnode * node) NONNULL;
357 extern void jnode_make_wander_nolock(jnode * node) NONNULL;
358 extern void jnode_make_wander(jnode *) NONNULL;
359 extern void znode_make_reloc(znode * , flush_queue_t *) NONNULL;
360 extern void unformatted_make_reloc(jnode *, flush_queue_t *) NONNULL;
361 extern struct address_space *jnode_get_mapping(const jnode * node) NONNULL;
364 * jnode_get_block
365 * @node: jnode to query
368 static inline const reiser4_block_nr *jnode_get_block(const jnode *node)
370 assert("nikita-528", node != NULL);
372 return &node->blocknr;
376 * jnode_set_block
377 * @node: jnode to update
378 * @blocknr: new block nr
380 static inline void jnode_set_block(jnode *node, const reiser4_block_nr *blocknr)
382 assert("nikita-2020", node != NULL);
383 assert("umka-055", blocknr != NULL);
384 node->blocknr = *blocknr;
388 /* block number for IO. Usually this is the same as jnode_get_block(), unless
389 * jnode was emergency flushed---then block number chosen by eflush is
390 * used. */
391 static inline const reiser4_block_nr *jnode_get_io_block(jnode * node)
393 assert("nikita-2768", node != NULL);
394 assert_spin_locked(&(node->guard));
396 return jnode_get_block(node);
399 /* Jnode flush interface. */
400 extern reiser4_blocknr_hint *reiser4_pos_hint(flush_pos_t *pos);
401 extern flush_queue_t *reiser4_pos_fq(flush_pos_t *pos);
403 /* FIXME-VS: these are used in plugin/item/extent.c */
405 /* does extent_get_block have to be called */
406 #define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED)
407 #define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED)
409 /* the node should be converted during flush squalloc phase */
410 #define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE)
411 #define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE)
413 /* Macros to convert from jnode to znode, znode to jnode. These are macros
414 because C doesn't allow overloading of const prototypes. */
415 #define ZJNODE(x) (&(x)->zjnode)
416 #define JZNODE(x) \
417 ({ \
418 typeof(x) __tmp_x; \
420 __tmp_x = (x); \
421 assert("jmacd-1300", jnode_is_znode(__tmp_x)); \
422 (znode*) __tmp_x; \
425 extern int jnodes_tree_init(reiser4_tree * tree);
426 extern int jnodes_tree_done(reiser4_tree * tree);
428 #if REISER4_DEBUG
430 extern int znode_is_any_locked(const znode * node);
431 extern void jnode_list_remove(jnode * node);
433 #else
435 #define jnode_list_remove(node) noop
437 #endif
439 int znode_is_root(const znode * node) NONNULL;
441 /* bump reference counter on @node */
442 static inline void add_x_ref(jnode * node/* node to increase x_count of */)
444 assert("nikita-1911", node != NULL);
446 atomic_inc(&node->x_count);
447 LOCK_CNT_INC(x_refs);
450 static inline void dec_x_ref(jnode * node)
452 assert("nikita-3215", node != NULL);
453 assert("nikita-3216", atomic_read(&node->x_count) > 0);
455 atomic_dec(&node->x_count);
456 assert("nikita-3217", LOCK_CNT_GTZ(x_refs));
457 LOCK_CNT_DEC(x_refs);
460 /* jref() - increase counter of references to jnode/znode (x_count) */
461 static inline jnode *jref(jnode * node)
463 assert("jmacd-508", (node != NULL) && !IS_ERR(node));
464 add_x_ref(node);
465 return node;
468 /* get the page of jnode */
469 static inline struct page *jnode_page(const jnode * node)
471 return node->pg;
474 /* return pointer to jnode data */
475 static inline char *jdata(const jnode * node)
477 assert("nikita-1415", node != NULL);
478 assert("nikita-3198", jnode_page(node) != NULL);
479 return node->data;
482 static inline int jnode_is_loaded(const jnode * node)
484 assert("zam-506", node != NULL);
485 return atomic_read(&node->d_count) > 0;
488 extern void page_clear_jnode(struct page *page, jnode * node) NONNULL;
490 static inline void jnode_set_reloc(jnode * node)
492 assert("nikita-2431", node != NULL);
493 assert("nikita-2432", !JF_ISSET(node, JNODE_OVRWR));
494 JF_SET(node, JNODE_RELOC);
497 /* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */
499 extern int jload_gfp(jnode *, gfp_t, int do_kmap) NONNULL;
501 static inline int jload(jnode *node)
503 return jload_gfp(node, reiser4_ctx_gfp_mask_get(), 1);
506 extern int jinit_new(jnode *, gfp_t) NONNULL;
507 extern int jstartio(jnode *) NONNULL;
509 extern void jdrop(jnode *) NONNULL;
510 extern int jwait_io(jnode *, int rw) NONNULL;
512 void jload_prefetch(jnode *);
514 extern jnode *reiser4_alloc_io_head(const reiser4_block_nr * block) NONNULL;
515 extern void reiser4_drop_io_head(jnode * node) NONNULL;
517 static inline reiser4_tree *jnode_get_tree(const jnode * node)
519 assert("nikita-2691", node != NULL);
520 return node->tree;
523 extern void pin_jnode_data(jnode *);
524 extern void unpin_jnode_data(jnode *);
526 static inline jnode_type jnode_get_type(const jnode * node)
528 static const unsigned long state_mask =
529 (1 << JNODE_TYPE_1) | (1 << JNODE_TYPE_2) | (1 << JNODE_TYPE_3);
531 static jnode_type mask_to_type[] = {
532 /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */
534 /* 000 */
535 [0] = JNODE_FORMATTED_BLOCK,
536 /* 001 */
537 [1] = JNODE_UNFORMATTED_BLOCK,
538 /* 010 */
539 [2] = JNODE_BITMAP,
540 /* 011 */
541 [3] = LAST_JNODE_TYPE, /*invalid */
542 /* 100 */
543 [4] = JNODE_INODE,
544 /* 101 */
545 [5] = LAST_JNODE_TYPE,
546 /* 110 */
547 [6] = JNODE_IO_HEAD,
548 /* 111 */
549 [7] = LAST_JNODE_TYPE, /* invalid */
552 return mask_to_type[(node->state & state_mask) >> JNODE_TYPE_1];
555 /* returns true if node is a znode */
556 static inline int jnode_is_znode(const jnode * node)
558 return jnode_get_type(node) == JNODE_FORMATTED_BLOCK;
561 static inline int jnode_is_flushprepped(jnode * node)
563 assert("jmacd-78212", node != NULL);
564 assert_spin_locked(&(node->guard));
565 return !JF_ISSET(node, JNODE_DIRTY) || JF_ISSET(node, JNODE_RELOC) ||
566 JF_ISSET(node, JNODE_OVRWR);
569 /* Return true if @node has already been processed by the squeeze and allocate
570 process. This implies the block address has been finalized for the
571 duration of this atom (or it is clean and will remain in place). If this
572 returns true you may use the block number as a hint. */
573 static inline int jnode_check_flushprepped(jnode * node)
575 int result;
577 /* It must be clean or relocated or wandered. New allocations are set
578 * to relocate. */
579 spin_lock_jnode(node);
580 result = jnode_is_flushprepped(node);
581 spin_unlock_jnode(node);
582 return result;
585 /* returns true if node is unformatted */
586 static inline int jnode_is_unformatted(const jnode * node)
588 assert("jmacd-0123", node != NULL);
589 return jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK;
592 /* returns true if node represents a cluster cache page */
593 static inline int jnode_is_cluster_page(const jnode * node)
595 assert("edward-50", node != NULL);
596 return (JF_ISSET(node, JNODE_CLUSTER_PAGE));
599 /* returns true is node is builtin inode's jnode */
600 static inline int jnode_is_inode(const jnode * node)
602 assert("vs-1240", node != NULL);
603 return jnode_get_type(node) == JNODE_INODE;
606 static inline jnode_plugin *jnode_ops_of(const jnode_type type)
608 assert("nikita-2367", type < LAST_JNODE_TYPE);
609 return jnode_plugin_by_id((reiser4_plugin_id) type);
612 static inline jnode_plugin *jnode_ops(const jnode * node)
614 assert("nikita-2366", node != NULL);
616 return jnode_ops_of(jnode_get_type(node));
619 /* Get the index of a block. */
620 static inline unsigned long jnode_get_index(jnode * node)
622 return jnode_ops(node)->index(node);
625 /* return true if "node" is the root */
626 static inline int jnode_is_root(const jnode * node)
628 return jnode_is_znode(node) && znode_is_root(JZNODE(node));
631 extern struct address_space *mapping_jnode(const jnode * node);
632 extern unsigned long index_jnode(const jnode * node);
634 static inline void jput(jnode * node);
635 extern void jput_final(jnode * node);
637 /* bump data counter on @node */
638 static inline void add_d_ref(jnode * node/* node to increase d_count of */)
640 assert("nikita-1962", node != NULL);
642 atomic_inc(&node->d_count);
643 if (jnode_is_unformatted(node) || jnode_is_znode(node))
644 LOCK_CNT_INC(d_refs);
647 /* jput() - decrement x_count reference counter on znode.
649 Count may drop to 0, jnode stays in cache until memory pressure causes the
650 eviction of its page. The c_count variable also ensures that children are
651 pressured out of memory before the parent. The jnode remains hashed as
652 long as the VM allows its page to stay in memory.
654 static inline void jput(jnode * node)
656 assert("jmacd-509", node != NULL);
657 assert("jmacd-510", atomic_read(&node->x_count) > 0);
658 assert("zam-926", reiser4_schedulable());
659 LOCK_CNT_DEC(x_refs);
661 rcu_read_lock();
663 * we don't need any kind of lock here--jput_final() uses RCU.
665 if (unlikely(atomic_dec_and_test(&node->x_count)))
666 jput_final(node);
667 else
668 rcu_read_unlock();
669 assert("nikita-3473", reiser4_schedulable());
672 extern void jrelse(jnode * node);
673 extern void jrelse_tail(jnode * node);
675 extern jnode *jnode_rip_sync(reiser4_tree * t, jnode * node);
677 /* resolve race with jput */
678 static inline jnode *jnode_rip_check(reiser4_tree * tree, jnode * node)
680 if (unlikely(JF_ISSET(node, JNODE_RIP)))
681 node = jnode_rip_sync(tree, node);
682 return node;
685 extern reiser4_key *jnode_build_key(const jnode *node, reiser4_key * key);
687 #if REISER4_DEBUG
688 extern int jnode_invariant_f(const jnode *node, char const **msg);
689 #endif
691 extern jnode_plugin jnode_plugins[LAST_JNODE_TYPE];
693 /* __JNODE_H__ */
694 #endif
696 /* Make Linus happy.
697 Local variables:
698 c-indentation-style: "K&R"
699 mode-name: "LC"
700 c-basic-offset: 8
701 tab-width: 8
702 fill-column: 120
703 End: