On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / znode.h
blob4894e928a277f10ea428ebf4b14d5483d2517668
1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /* Declaration of znode (Zam's node). See znode.c for more details. */
6 #ifndef __ZNODE_H__
7 #define __ZNODE_H__
9 #include "forward.h"
10 #include "debug.h"
11 #include "dformat.h"
12 #include "key.h"
13 #include "coord.h"
14 #include "plugin/node/node.h"
15 #include "jnode.h"
16 #include "lock.h"
17 #include "readahead.h"
19 #include <linux/types.h>
20 #include <linux/spinlock.h>
21 #include <linux/semaphore.h>
22 #include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
23 #include <asm/atomic.h>
25 /* znode tracks its position within parent (internal item in a parent node,
26 * that contains znode's block number). */
27 typedef struct parent_coord {
28 znode *node;
29 pos_in_node_t item_pos;
30 } parent_coord_t;
32 /* &znode - node in a reiser4 tree.
34 NOTE-NIKITA fields in this struct have to be rearranged (later) to reduce
35 cacheline pressure.
37 Locking:
39 Long term: data in a disk node attached to this znode are protected
40 by long term, deadlock aware lock ->lock;
42 Spin lock: the following fields are protected by the spin lock:
44 ->lock
46 Following fields are protected by the global tree lock:
48 ->left
49 ->right
50 ->in_parent
51 ->c_count
53 Following fields are protected by the global delimiting key lock (dk_lock):
55 ->ld_key (to update ->ld_key long-term lock on the node is also required)
56 ->rd_key
58 Following fields are protected by the long term lock:
60 ->nr_items
62 ->node_plugin is never changed once set. This means that after code made
63 itself sure that field is valid it can be accessed without any additional
64 locking.
66 ->level is immutable.
68 Invariants involving this data-type:
70 [znode-fake]
71 [znode-level]
72 [znode-connected]
73 [znode-c_count]
74 [znode-refs]
75 [jnode-refs]
76 [jnode-queued]
77 [znode-modify]
79 For this to be made into a clustering or NUMA filesystem, we would want to eliminate all of the global locks.
80 Suggestions for how to do that are desired.*/
81 struct znode {
82 /* Embedded jnode. */
83 jnode zjnode;
85 /* contains three subfields, node, pos_in_node, and pos_in_unit.
87 pos_in_node and pos_in_unit are only hints that are cached to
88 speed up lookups during balancing. They are not required to be up to
89 date. Synched in find_child_ptr().
91 This value allows us to avoid expensive binary searches.
93 in_parent->node points to the parent of this node, and is NOT a
94 hint.
96 parent_coord_t in_parent;
99 * sibling list pointers
102 /* left-neighbor */
103 znode *left;
104 /* right-neighbor */
105 znode *right;
107 /* long term lock on node content. This lock supports deadlock
108 detection. See lock.c
110 zlock lock;
112 /* You cannot remove from memory a node that has children in
113 memory. This is because we rely on the fact that parent of given
114 node can always be reached without blocking for io. When reading a
115 node into memory you must increase the c_count of its parent, when
116 removing it from memory you must decrease the c_count. This makes
117 the code simpler, and the cases where it is suboptimal are truly
118 obscure.
120 int c_count;
122 /* plugin of node attached to this znode. NULL if znode is not
123 loaded. */
124 node_plugin *nplug;
126 /* version of znode data. This is increased on each modification. This
127 * is necessary to implement seals (see seal.[ch]) efficiently. */
128 __u64 version;
130 /* left delimiting key. Necessary to efficiently perform
131 balancing with node-level locking. Kept in memory only. */
132 reiser4_key ld_key;
133 /* right delimiting key. */
134 reiser4_key rd_key;
136 /* znode's tree level */
137 __u16 level;
138 /* number of items in this node. This field is modified by node
139 * plugin. */
140 __u16 nr_items;
142 #if REISER4_DEBUG
143 void *creator;
144 reiser4_key first_key;
145 unsigned long times_locked;
146 int left_version; /* when node->left was updated */
147 int right_version; /* when node->right was updated */
148 int ld_key_version; /* when node->ld_key was updated */
149 int rd_key_version; /* when node->rd_key was updated */
150 #endif
152 } __attribute__ ((aligned(16)));
154 ON_DEBUG(extern atomic_t delim_key_version;
157 /* In general I think these macros should not be exposed. */
158 #define znode_is_locked(node) (lock_is_locked(&node->lock))
159 #define znode_is_rlocked(node) (lock_is_rlocked(&node->lock))
160 #define znode_is_wlocked(node) (lock_is_wlocked(&node->lock))
161 #define znode_is_wlocked_once(node) (lock_is_wlocked_once(&node->lock))
162 #define znode_can_be_rlocked(node) (lock_can_be_rlocked(&node->lock))
163 #define is_lock_compatible(node, mode) (lock_mode_compatible(&node->lock, mode))
164 /* Macros for accessing the znode state. */
165 #define ZF_CLR(p,f) JF_CLR (ZJNODE(p), (f))
166 #define ZF_ISSET(p,f) JF_ISSET(ZJNODE(p), (f))
167 #define ZF_SET(p,f) JF_SET (ZJNODE(p), (f))
168 extern znode *zget(reiser4_tree * tree, const reiser4_block_nr * const block,
169 znode * parent, tree_level level, gfp_t gfp_flag);
170 extern znode *zlook(reiser4_tree * tree, const reiser4_block_nr * const block);
171 extern int zload(znode * node);
172 extern int zload_ra(znode * node, ra_info_t * info);
173 extern int zinit_new(znode * node, gfp_t gfp_flags);
174 extern void zrelse(znode * node);
175 extern void znode_change_parent(znode * new_parent, reiser4_block_nr * block);
177 /* size of data in znode */
178 static inline unsigned
179 znode_size(const znode * node UNUSED_ARG /* znode to query */ )
181 assert("nikita-1416", node != NULL);
182 return PAGE_CACHE_SIZE;
185 extern void parent_coord_to_coord(const parent_coord_t * pcoord,
186 coord_t * coord);
187 extern void coord_to_parent_coord(const coord_t * coord,
188 parent_coord_t * pcoord);
189 extern void init_parent_coord(parent_coord_t * pcoord, const znode * node);
191 extern unsigned znode_free_space(znode * node);
193 extern reiser4_key *znode_get_rd_key(znode * node);
194 extern reiser4_key *znode_get_ld_key(znode * node);
196 extern reiser4_key *znode_set_rd_key(znode * node, const reiser4_key * key);
197 extern reiser4_key *znode_set_ld_key(znode * node, const reiser4_key * key);
199 /* `connected' state checks */
200 static inline int znode_is_right_connected(const znode * node)
202 return ZF_ISSET(node, JNODE_RIGHT_CONNECTED);
205 static inline int znode_is_left_connected(const znode * node)
207 return ZF_ISSET(node, JNODE_LEFT_CONNECTED);
210 static inline int znode_is_connected(const znode * node)
212 return znode_is_right_connected(node) && znode_is_left_connected(node);
215 extern int znode_shift_order;
216 extern int znode_rehash(znode * node, const reiser4_block_nr * new_block_nr);
217 extern void znode_remove(znode *, reiser4_tree *);
218 extern znode *znode_parent(const znode * node);
219 extern znode *znode_parent_nolock(const znode * node);
220 extern int znode_above_root(const znode * node);
221 extern int init_znodes(void);
222 extern void done_znodes(void);
223 extern int znodes_tree_init(reiser4_tree * ztree);
224 extern void znodes_tree_done(reiser4_tree * ztree);
225 extern int znode_contains_key(znode * node, const reiser4_key * key);
226 extern int znode_contains_key_lock(znode * node, const reiser4_key * key);
227 extern unsigned znode_save_free_space(znode * node);
228 extern unsigned znode_recover_free_space(znode * node);
229 extern znode *zalloc(gfp_t gfp_flag);
230 extern void zinit(znode *, const znode * parent, reiser4_tree *);
231 extern int zparse(znode * node);
233 extern int znode_just_created(const znode * node);
235 extern void zfree(znode * node);
237 #if REISER4_DEBUG
238 extern void print_znode(const char *prefix, const znode * node);
239 #else
240 #define print_znode( p, n ) noop
241 #endif
243 /* Make it look like various znode functions exist instead of treating znodes as
244 jnodes in znode-specific code. */
245 #define znode_page(x) jnode_page ( ZJNODE(x) )
246 #define zdata(x) jdata ( ZJNODE(x) )
247 #define znode_get_block(x) jnode_get_block ( ZJNODE(x) )
248 #define znode_created(x) jnode_created ( ZJNODE(x) )
249 #define znode_set_created(x) jnode_set_created ( ZJNODE(x) )
250 #define znode_convertible(x) jnode_convertible (ZJNODE(x))
251 #define znode_set_convertible(x) jnode_set_convertible (ZJNODE(x))
253 #define znode_is_dirty(x) jnode_is_dirty ( ZJNODE(x) )
254 #define znode_check_dirty(x) jnode_check_dirty ( ZJNODE(x) )
255 #define znode_make_clean(x) jnode_make_clean ( ZJNODE(x) )
256 #define znode_set_block(x, b) jnode_set_block ( ZJNODE(x), (b) )
258 #define spin_lock_znode(x) spin_lock_jnode ( ZJNODE(x) )
259 #define spin_unlock_znode(x) spin_unlock_jnode ( ZJNODE(x) )
260 #define spin_trylock_znode(x) spin_trylock_jnode ( ZJNODE(x) )
261 #define spin_znode_is_locked(x) spin_jnode_is_locked ( ZJNODE(x) )
262 #define spin_znode_is_not_locked(x) spin_jnode_is_not_locked ( ZJNODE(x) )
264 #if REISER4_DEBUG
265 extern int znode_x_count_is_protected(const znode * node);
266 extern int znode_invariant(znode * node);
267 #endif
269 /* acquire reference to @node */
270 static inline znode *zref(znode * node)
272 /* change of x_count from 0 to 1 is protected by tree spin-lock */
273 return JZNODE(jref(ZJNODE(node)));
276 /* release reference to @node */
277 static inline void zput(znode * node)
279 assert("nikita-3564", znode_invariant(node));
280 jput(ZJNODE(node));
283 /* get the level field for a znode */
284 static inline tree_level znode_get_level(const znode * node)
286 return node->level;
289 /* get the level field for a jnode */
290 static inline tree_level jnode_get_level(const jnode * node)
292 if (jnode_is_znode(node))
293 return znode_get_level(JZNODE(node));
294 else
295 /* unformatted nodes are all at the LEAF_LEVEL and for
296 "semi-formatted" nodes like bitmaps, level doesn't matter. */
297 return LEAF_LEVEL;
300 /* true if jnode is on leaf level */
301 static inline int jnode_is_leaf(const jnode * node)
303 if (jnode_is_znode(node))
304 return (znode_get_level(JZNODE(node)) == LEAF_LEVEL);
305 if (jnode_get_type(node) == JNODE_UNFORMATTED_BLOCK)
306 return 1;
307 return 0;
310 /* return znode's tree */
311 static inline reiser4_tree *znode_get_tree(const znode * node)
313 assert("nikita-2692", node != NULL);
314 return jnode_get_tree(ZJNODE(node));
317 /* resolve race with zput */
318 static inline znode *znode_rip_check(reiser4_tree * tree, znode * node)
320 jnode *j;
322 j = jnode_rip_sync(tree, ZJNODE(node));
323 if (likely(j != NULL))
324 node = JZNODE(j);
325 else
326 node = NULL;
327 return node;
330 #if defined(REISER4_DEBUG)
331 int znode_is_loaded(const znode * node /* znode to query */ );
332 #endif
334 extern __u64 znode_build_version(reiser4_tree * tree);
336 /* Data-handles. A data handle object manages pairing calls to zload() and zrelse(). We
337 must load the data for a node in many places. We could do this by simply calling
338 zload() everywhere, the difficulty arises when we must release the loaded data by
339 calling zrelse. In a function with many possible error/return paths, it requires extra
340 work to figure out which exit paths must call zrelse and those which do not. The data
341 handle automatically calls zrelse for every zload that it is responsible for. In that
342 sense, it acts much like a lock_handle.
344 typedef struct load_count {
345 znode *node;
346 int d_ref;
347 } load_count;
349 extern void init_load_count(load_count * lc); /* Initialize a load_count set the current node to NULL. */
350 extern void done_load_count(load_count * dh); /* Finalize a load_count: call zrelse() if necessary */
351 extern int incr_load_count_znode(load_count * dh, znode * node); /* Set the argument znode to the current node, call zload(). */
352 extern int incr_load_count_jnode(load_count * dh, jnode * node); /* If the argument jnode is formatted, do the same as
353 * incr_load_count_znode, otherwise do nothing (unformatted nodes
354 * don't require zload/zrelse treatment). */
355 extern void move_load_count(load_count * new, load_count * old); /* Move the contents of a load_count. Old handle is released. */
356 extern void copy_load_count(load_count * new, load_count * old); /* Copy the contents of a load_count. Old handle remains held. */
358 /* Variable initializers for load_count. */
359 #define INIT_LOAD_COUNT ( load_count * ){ .node = NULL, .d_ref = 0 }
360 #define INIT_LOAD_COUNT_NODE( n ) ( load_count ){ .node = ( n ), .d_ref = 0 }
361 /* A convenience macro for use in assertions or debug-only code, where loaded
362 data is only required to perform the debugging check. This macro
363 encapsulates an expression inside a pair of calls to zload()/zrelse(). */
364 #define WITH_DATA( node, exp ) \
365 ({ \
366 long __with_dh_result; \
367 znode *__with_dh_node; \
369 __with_dh_node = ( node ); \
370 __with_dh_result = zload( __with_dh_node ); \
371 if( __with_dh_result == 0 ) { \
372 __with_dh_result = ( long )( exp ); \
373 zrelse( __with_dh_node ); \
375 __with_dh_result; \
378 /* Same as above, but accepts a return value in case zload fails. */
379 #define WITH_DATA_RET( node, ret, exp ) \
380 ({ \
381 int __with_dh_result; \
382 znode *__with_dh_node; \
384 __with_dh_node = ( node ); \
385 __with_dh_result = zload( __with_dh_node ); \
386 if( __with_dh_result == 0 ) { \
387 __with_dh_result = ( int )( exp ); \
388 zrelse( __with_dh_node ); \
389 } else \
390 __with_dh_result = ( ret ); \
391 __with_dh_result; \
394 #define WITH_COORD(coord, exp) \
395 ({ \
396 coord_t *__coord; \
398 __coord = (coord); \
399 coord_clear_iplug(__coord); \
400 WITH_DATA(__coord->node, exp); \
403 #if REISER4_DEBUG
404 #define STORE_COUNTERS \
405 reiser4_lock_cnt_info __entry_counters = \
406 *reiser4_lock_counters()
407 #define CHECK_COUNTERS \
408 ON_DEBUG_CONTEXT( \
409 ({ \
410 __entry_counters.x_refs = reiser4_lock_counters() -> x_refs; \
411 __entry_counters.t_refs = reiser4_lock_counters() -> t_refs; \
412 __entry_counters.d_refs = reiser4_lock_counters() -> d_refs; \
413 assert("nikita-2159", \
414 !memcmp(&__entry_counters, reiser4_lock_counters(), \
415 sizeof __entry_counters)); \
416 }) )
418 #else
419 #define STORE_COUNTERS
420 #define CHECK_COUNTERS noop
421 #endif
423 /* __ZNODE_H__ */
424 #endif
426 /* Make Linus happy.
427 Local variables:
428 c-indentation-style: "K&R"
429 mode-name: "LC"
430 c-basic-offset: 8
431 tab-width: 8
432 fill-column: 120
433 End: