1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
4 /* Declaration of jnode. See jnode.c for details. */
10 #include "type_safe_hash.h"
15 #include "page_cache.h"
18 #include "plugin/plugin.h"
22 #include <linux/spinlock.h>
23 #include <asm/atomic.h>
24 #include <linux/bitops.h>
25 #include <linux/list.h>
26 #include <linux/rcupdate.h>
28 /* declare hash table of jnodes (jnodes proper, that is, unformatted
30 TYPE_SAFE_HASH_DECLARE(j
, jnode
);
32 /* declare hash table of znodes */
33 TYPE_SAFE_HASH_DECLARE(z
, znode
);
38 struct address_space
*mapping
;
42 Jnode is the "base class" of other nodes in reiser4. It is also happens to
43 be exactly the node we use for unformatted tree nodes.
45 Jnode provides following basic functionality:
47 . reference counting and indexing.
49 . integration with page cache. Jnode has ->pg reference to which page can
52 . interface to transaction manager. It is jnode that is kept in transaction
53 manager lists, attached to atoms, etc. (NOTE-NIKITA one may argue that this
54 means, there should be special type of jnode for inode.)
58 Spin lock: the following fields are protected by the per-jnode spin lock:
64 Following fields are protected by the global tree lock:
67 ->key.z (content of ->key.z is only changed in znode_rehash())
75 ->pg, and ->data are protected by spin lock for unused jnode and are
76 immutable for used jnode (one for which fs/reiser4/vfs_ops.c:releasable()
79 ->tree is immutable after creation
83 ->blocknr: should be under jnode spin-lock, but current interface is based
84 on passing of block address.
86 If you ever need to spin lock two nodes at once, do this in "natural"
87 memory order: lock znode with lower address first. (See lock_two_nodes().)
89 Invariants involving this data-type:
101 #define JMAGIC 0x52654973 /* "ReIs" */
104 /* FIRST CACHE LINE (16 bytes): data used by jload */
106 /* jnode's state: bitwise flags from the reiser4_jnode_state enum. */
107 /* 0 */ unsigned long state
;
109 /* lock, protecting jnode's fields. */
110 /* 4 */ spinlock_t load
;
112 /* counter of references to jnode itself. Increased on jref().
115 /* 8 */ atomic_t x_count
;
117 /* counter of references to jnode's data. Pin data page(s) in
118 memory while this is greater than 0. Increased on jload().
119 Decreased on jrelse().
121 /* 12 */ atomic_t d_count
;
123 /* SECOND CACHE LINE: data used by hash table lookups */
126 /* znodes are hashed by block number */
128 /* unformatted nodes are hashed by mapping plus offset */
132 /* THIRD CACHE LINE */
135 /* pointers to maintain hash-table */
140 /* pointer to jnode page. */
141 /* 36 */ struct page
*pg
;
142 /* pointer to node itself. This is page_address(node->pg) when page is
143 attached to the jnode
147 /* 44 */ reiser4_tree
*tree
;
149 /* FOURTH CACHE LINE: atom related fields */
151 /* 48 */ spinlock_t guard
;
153 /* atom the block is in, if any */
154 /* 52 */ txn_atom
*atom
;
157 /* 56 */ struct list_head capture_link
;
159 /* FIFTH CACHE LINE */
161 /* 64 */ struct rcu_head rcu
;
162 /* crosses cache line */
164 /* SIXTH CACHE LINE */
166 /* the real blocknr (where io is going to/from) */
167 /* 80 */ reiser4_block_nr blocknr
;
168 /* Parent item type, unformatted and CRC need it for offset => key conversion. */
169 /* NOTE: this parent_item_id looks like jnode type. */
170 /* 88 */ reiser4_plugin_id parent_item_id
;
173 /* list of all jnodes for debugging purposes. */
174 struct list_head jnodes
;
175 /* how many times this jnode was written in one transaction */
177 /* this indicates which atom's list the jnode is on */
180 } __attribute__ ((aligned(16)));
183 * jnode types. Enumeration of existing jnode types.
186 JNODE_UNFORMATTED_BLOCK
, /* unformatted block */
187 JNODE_FORMATTED_BLOCK
, /* formatted block, znode */
188 JNODE_BITMAP
, /* bitmap */
189 JNODE_IO_HEAD
, /* jnode representing a block in the
191 JNODE_INODE
, /* jnode embedded into inode */
197 /* jnode's page is loaded and data checked */
199 /* node was deleted, not all locks on it were released. This
200 node is empty and is going to be removed from the tree
202 JNODE_HEARD_BANSHEE
= 1,
203 /* left sibling pointer is valid */
204 JNODE_LEFT_CONNECTED
= 2,
205 /* right sibling pointer is valid */
206 JNODE_RIGHT_CONNECTED
= 3,
208 /* znode was just created and doesn't yet have a pointer from
212 /* this node was created by its transaction and has not been assigned
216 /* this node is currently relocated */
218 /* this node is currently wandered */
221 /* this znode has been modified */
224 /* znode lock is being invalidated */
227 /* THIS PLACE IS INTENTIONALLY LEFT BLANK */
229 /* jnode is queued for flushing. */
230 JNODE_FLUSH_QUEUED
= 12,
232 /* In the following bits jnode type is encoded. */
237 /* jnode is being destroyed */
240 /* znode was not captured during locking (it might so be because
241 ->level != LEAF_LEVEL and lock_mode == READ_LOCK) */
242 JNODE_MISSED_IN_CAPTURE
= 17,
244 /* write is in progress */
245 JNODE_WRITEBACK
= 18,
247 /* FIXME: now it is used by crypto-compress plugin only */
250 /* delimiting keys are already set for this znode. */
253 /* when this bit is set page and jnode can not be disconnected */
254 JNODE_WRITE_PREPARED
= 21,
256 JNODE_CLUSTER_PAGE
= 22,
257 /* Jnode is marked for repacking, that means the reiser4 flush and the
258 * block allocator should process this node special way */
260 /* node should be converted by flush in squalloc phase */
261 JNODE_CONVERTIBLE
= 24,
263 * When jnode is dirtied for the first time in given transaction,
264 * do_jnode_make_dirty() checks whether this jnode can possible became
265 * member of overwrite set. If so, this bit is set, and one block is
266 * reserved in the ->flush_reserved space of atom.
268 * This block is "used" (and JNODE_FLUSH_RESERVED bit is cleared) when
270 * (1) flush decides that we want this block to go into relocate
273 * (2) wandering log is allocated (by log writer)
275 * (3) extent is allocated
278 JNODE_FLUSH_RESERVED
= 29
279 } reiser4_jnode_state
;
281 /* Macros for accessing the jnode state. */
283 static inline void JF_CLR(jnode
* j
, int f
)
285 assert("unknown-1", j
->magic
== JMAGIC
);
286 clear_bit(f
, &j
->state
);
288 static inline int JF_ISSET(const jnode
* j
, int f
)
290 assert("unknown-2", j
->magic
== JMAGIC
);
291 return test_bit(f
, &((jnode
*) j
)->state
);
293 static inline void JF_SET(jnode
* j
, int f
)
295 assert("unknown-3", j
->magic
== JMAGIC
);
296 set_bit(f
, &j
->state
);
299 static inline int JF_TEST_AND_SET(jnode
* j
, int f
)
301 assert("unknown-4", j
->magic
== JMAGIC
);
302 return test_and_set_bit(f
, &j
->state
);
305 static inline void spin_lock_jnode(jnode
*node
)
307 /* check that spinlocks of lower priorities are not held */
308 assert("", (LOCK_CNT_NIL(rw_locked_tree
) &&
309 LOCK_CNT_NIL(spin_locked_txnh
) &&
310 LOCK_CNT_NIL(spin_locked_zlock
) &&
311 LOCK_CNT_NIL(rw_locked_dk
) &&
312 LOCK_CNT_LT(spin_locked_jnode
, 2)));
314 spin_lock(&(node
->guard
));
316 LOCK_CNT_INC(spin_locked_jnode
);
317 LOCK_CNT_INC(spin_locked
);
320 static inline void spin_unlock_jnode(jnode
*node
)
322 assert_spin_locked(&(node
->guard
));
323 assert("nikita-1375", LOCK_CNT_GTZ(spin_locked_jnode
));
324 assert("nikita-1376", LOCK_CNT_GTZ(spin_locked
));
326 LOCK_CNT_DEC(spin_locked_jnode
);
327 LOCK_CNT_DEC(spin_locked
);
329 spin_unlock(&(node
->guard
));
332 static inline int jnode_is_in_deleteset(const jnode
* node
)
334 return JF_ISSET(node
, JNODE_RELOC
);
337 extern int init_jnodes(void);
338 extern void done_jnodes(void);
341 extern jnode
*jalloc(void);
342 extern void jfree(jnode
* node
) NONNULL
;
343 extern jnode
*jclone(jnode
*);
344 extern jnode
*jlookup(reiser4_tree
* tree
,
345 oid_t objectid
, unsigned long ind
) NONNULL
;
346 extern jnode
*jfind(struct address_space
*, unsigned long index
) NONNULL
;
347 extern jnode
*jnode_by_page(struct page
*pg
) NONNULL
;
348 extern jnode
*jnode_of_page(struct page
*pg
) NONNULL
;
349 void jnode_attach_page(jnode
* node
, struct page
*pg
);
351 void unhash_unformatted_jnode(jnode
*);
352 extern jnode
*page_next_jnode(jnode
* node
) NONNULL
;
353 extern void jnode_init(jnode
* node
, reiser4_tree
* tree
, jnode_type
) NONNULL
;
354 extern void jnode_make_dirty(jnode
* node
) NONNULL
;
355 extern void jnode_make_clean(jnode
* node
) NONNULL
;
356 extern void jnode_make_wander_nolock(jnode
* node
) NONNULL
;
357 extern void jnode_make_wander(jnode
*) NONNULL
;
358 extern void znode_make_reloc(znode
*, flush_queue_t
*) NONNULL
;
359 extern void unformatted_make_reloc(jnode
*, flush_queue_t
*) NONNULL
;
360 extern struct address_space
*jnode_get_mapping(const jnode
* node
) NONNULL
;
364 * @node: jnode to query
367 static inline const reiser4_block_nr
*jnode_get_block(const jnode
*node
)
369 assert("nikita-528", node
!= NULL
);
371 return &node
->blocknr
;
376 * @node: jnode to update
377 * @blocknr: new block nr
379 static inline void jnode_set_block(jnode
*node
, const reiser4_block_nr
*blocknr
)
381 assert("nikita-2020", node
!= NULL
);
382 assert("umka-055", blocknr
!= NULL
);
383 node
->blocknr
= *blocknr
;
387 /* block number for IO. Usually this is the same as jnode_get_block(), unless
388 * jnode was emergency flushed---then block number chosen by eflush is
390 static inline const reiser4_block_nr
*jnode_get_io_block(jnode
* node
)
392 assert("nikita-2768", node
!= NULL
);
393 assert_spin_locked(&(node
->guard
));
395 return jnode_get_block(node
);
398 /* Jnode flush interface. */
399 extern reiser4_blocknr_hint
*reiser4_pos_hint(flush_pos_t
* pos
);
400 extern flush_queue_t
*reiser4_pos_fq(flush_pos_t
* pos
);
402 /* FIXME-VS: these are used in plugin/item/extent.c */
404 /* does extent_get_block have to be called */
405 #define jnode_mapped(node) JF_ISSET (node, JNODE_MAPPED)
406 #define jnode_set_mapped(node) JF_SET (node, JNODE_MAPPED)
408 /* the node should be converted during flush squalloc phase */
409 #define jnode_convertible(node) JF_ISSET (node, JNODE_CONVERTIBLE)
410 #define jnode_set_convertible(node) JF_SET (node, JNODE_CONVERTIBLE)
412 /* Macros to convert from jnode to znode, znode to jnode. These are macros
413 because C doesn't allow overloading of const prototypes. */
414 #define ZJNODE(x) (& (x) -> zjnode)
417 typeof (x) __tmp_x; \
420 assert ("jmacd-1300", jnode_is_znode (__tmp_x)); \
424 extern int jnodes_tree_init(reiser4_tree
* tree
);
425 extern int jnodes_tree_done(reiser4_tree
* tree
);
429 extern int znode_is_any_locked(const znode
* node
);
430 extern void jnode_list_remove(jnode
* node
);
434 #define jnode_list_remove(node) noop
438 int znode_is_root(const znode
* node
) NONNULL
;
440 /* bump reference counter on @node */
441 static inline void add_x_ref(jnode
* node
/* node to increase x_count of */ )
443 assert("nikita-1911", node
!= NULL
);
445 atomic_inc(&node
->x_count
);
446 LOCK_CNT_INC(x_refs
);
449 static inline void dec_x_ref(jnode
* node
)
451 assert("nikita-3215", node
!= NULL
);
452 assert("nikita-3216", atomic_read(&node
->x_count
) > 0);
454 atomic_dec(&node
->x_count
);
455 assert("nikita-3217", LOCK_CNT_GTZ(x_refs
));
456 LOCK_CNT_DEC(x_refs
);
459 /* jref() - increase counter of references to jnode/znode (x_count) */
460 static inline jnode
*jref(jnode
* node
)
462 assert("jmacd-508", (node
!= NULL
) && !IS_ERR(node
));
467 /* get the page of jnode */
468 static inline struct page
*jnode_page(const jnode
* node
)
473 /* return pointer to jnode data */
474 static inline char *jdata(const jnode
* node
)
476 assert("nikita-1415", node
!= NULL
);
477 assert("nikita-3198", jnode_page(node
) != NULL
);
481 static inline int jnode_is_loaded(const jnode
* node
)
483 assert("zam-506", node
!= NULL
);
484 return atomic_read(&node
->d_count
) > 0;
487 extern void page_clear_jnode(struct page
*page
, jnode
* node
) NONNULL
;
489 static inline void jnode_set_reloc(jnode
* node
)
491 assert("nikita-2431", node
!= NULL
);
492 assert("nikita-2432", !JF_ISSET(node
, JNODE_OVRWR
));
493 JF_SET(node
, JNODE_RELOC
);
496 /* jload/jwrite/junload give a bread/bwrite/brelse functionality for jnodes */
498 extern int jload_gfp(jnode
*, gfp_t
, int do_kmap
) NONNULL
;
500 static inline int jload(jnode
*node
)
502 return jload_gfp(node
, reiser4_ctx_gfp_mask_get(), 1);
505 extern int jinit_new(jnode
*, gfp_t
) NONNULL
;
506 extern int jstartio(jnode
*) NONNULL
;
508 extern void jdrop(jnode
*) NONNULL
;
509 extern int jwait_io(jnode
*, int rw
) NONNULL
;
511 void jload_prefetch(jnode
*);
513 extern jnode
*reiser4_alloc_io_head(const reiser4_block_nr
* block
) NONNULL
;
514 extern void reiser4_drop_io_head(jnode
* node
) NONNULL
;
516 static inline reiser4_tree
*jnode_get_tree(const jnode
* node
)
518 assert("nikita-2691", node
!= NULL
);
522 extern void pin_jnode_data(jnode
*);
523 extern void unpin_jnode_data(jnode
*);
525 static inline jnode_type
jnode_get_type(const jnode
* node
)
527 static const unsigned long state_mask
=
528 (1 << JNODE_TYPE_1
) | (1 << JNODE_TYPE_2
) | (1 << JNODE_TYPE_3
);
530 static jnode_type mask_to_type
[] = {
531 /* JNODE_TYPE_3 : JNODE_TYPE_2 : JNODE_TYPE_1 */
534 [0] = JNODE_FORMATTED_BLOCK
,
536 [1] = JNODE_UNFORMATTED_BLOCK
,
540 [3] = LAST_JNODE_TYPE
, /*invalid */
544 [5] = LAST_JNODE_TYPE
,
548 [7] = LAST_JNODE_TYPE
, /* invalid */
551 return mask_to_type
[(node
->state
& state_mask
) >> JNODE_TYPE_1
];
554 /* returns true if node is a znode */
555 static inline int jnode_is_znode(const jnode
* node
)
557 return jnode_get_type(node
) == JNODE_FORMATTED_BLOCK
;
560 static inline int jnode_is_flushprepped(jnode
* node
)
562 assert("jmacd-78212", node
!= NULL
);
563 assert_spin_locked(&(node
->guard
));
564 return !JF_ISSET(node
, JNODE_DIRTY
) || JF_ISSET(node
, JNODE_RELOC
) ||
565 JF_ISSET(node
, JNODE_OVRWR
);
568 /* Return true if @node has already been processed by the squeeze and allocate
569 process. This implies the block address has been finalized for the
570 duration of this atom (or it is clean and will remain in place). If this
571 returns true you may use the block number as a hint. */
572 static inline int jnode_check_flushprepped(jnode
* node
)
576 /* It must be clean or relocated or wandered. New allocations are set to relocate. */
577 spin_lock_jnode(node
);
578 result
= jnode_is_flushprepped(node
);
579 spin_unlock_jnode(node
);
583 /* returns true if node is unformatted */
584 static inline int jnode_is_unformatted(const jnode
* node
)
586 assert("jmacd-0123", node
!= NULL
);
587 return jnode_get_type(node
) == JNODE_UNFORMATTED_BLOCK
;
590 /* returns true if node represents a cluster cache page */
591 static inline int jnode_is_cluster_page(const jnode
* node
)
593 assert("edward-50", node
!= NULL
);
594 return (JF_ISSET(node
, JNODE_CLUSTER_PAGE
));
597 /* returns true is node is builtin inode's jnode */
598 static inline int jnode_is_inode(const jnode
* node
)
600 assert("vs-1240", node
!= NULL
);
601 return jnode_get_type(node
) == JNODE_INODE
;
604 static inline jnode_plugin
*jnode_ops_of(const jnode_type type
)
606 assert("nikita-2367", type
< LAST_JNODE_TYPE
);
607 return jnode_plugin_by_id((reiser4_plugin_id
) type
);
610 static inline jnode_plugin
*jnode_ops(const jnode
* node
)
612 assert("nikita-2366", node
!= NULL
);
614 return jnode_ops_of(jnode_get_type(node
));
617 /* Get the index of a block. */
618 static inline unsigned long jnode_get_index(jnode
* node
)
620 return jnode_ops(node
)->index(node
);
623 /* return true if "node" is the root */
624 static inline int jnode_is_root(const jnode
* node
)
626 return jnode_is_znode(node
) && znode_is_root(JZNODE(node
));
629 extern struct address_space
*mapping_jnode(const jnode
* node
);
630 extern unsigned long index_jnode(const jnode
* node
);
632 static inline void jput(jnode
* node
);
633 extern void jput_final(jnode
* node
);
635 /* bump data counter on @node */
636 static inline void add_d_ref(jnode
* node
/* node to increase d_count of */ )
638 assert("nikita-1962", node
!= NULL
);
640 atomic_inc(&node
->d_count
);
641 if (jnode_is_unformatted(node
) || jnode_is_znode(node
))
642 LOCK_CNT_INC(d_refs
);
645 /* jput() - decrement x_count reference counter on znode.
647 Count may drop to 0, jnode stays in cache until memory pressure causes the
648 eviction of its page. The c_count variable also ensures that children are
649 pressured out of memory before the parent. The jnode remains hashed as
650 long as the VM allows its page to stay in memory.
652 static inline void jput(jnode
* node
)
654 assert("jmacd-509", node
!= NULL
);
655 assert("jmacd-510", atomic_read(&node
->x_count
) > 0);
656 assert("zam-926", reiser4_schedulable());
657 LOCK_CNT_DEC(x_refs
);
661 * we don't need any kind of lock here--jput_final() uses RCU.
663 if (unlikely(atomic_dec_and_test(&node
->x_count
))) {
667 assert("nikita-3473", reiser4_schedulable());
670 extern void jrelse(jnode
* node
);
671 extern void jrelse_tail(jnode
* node
);
673 extern jnode
*jnode_rip_sync(reiser4_tree
* t
, jnode
* node
);
675 /* resolve race with jput */
676 static inline jnode
*jnode_rip_check(reiser4_tree
* tree
, jnode
* node
)
678 if (unlikely(JF_ISSET(node
, JNODE_RIP
)))
679 node
= jnode_rip_sync(tree
, node
);
683 extern reiser4_key
*jnode_build_key(const jnode
*node
, reiser4_key
* key
);
686 extern int jnode_invariant_f(const jnode
*node
, char const **msg
);
689 extern jnode_plugin jnode_plugins
[LAST_JNODE_TYPE
];
696 c-indentation-style: "K&R"