On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / key.h
blob2ad4ee277e61ee291bde65a3bcae313d640a5392
1 /* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /* Declarations of key-related data-structures and operations on keys. */
6 #if !defined(__REISER4_KEY_H__)
7 #define __REISER4_KEY_H__
9 #include "dformat.h"
10 #include "forward.h"
11 #include "debug.h"
13 #include <linux/types.h> /* for __u?? */
15 /* Operations on keys in reiser4 tree */
17 /* No access to any of these fields shall be done except via a
18 wrapping macro/function, and that wrapping macro/function shall
19 convert to little endian order. Compare keys will consider cpu byte order. */
21 /* A storage layer implementation difference between a regular unix file body
22 and its attributes is in the typedef below which causes all of the attributes
23 of a file to be near in key to all of the other attributes for all of the
24 files within that directory, and not near to the file itself. It is
25 interesting to consider whether this is the wrong approach, and whether there
26 should be no difference at all. For current usage patterns this choice is
27 probably the right one. */
29 /* possible values for minor packing locality (4 bits required) */
30 typedef enum {
31 /* file name */
32 KEY_FILE_NAME_MINOR = 0,
33 /* stat-data */
34 KEY_SD_MINOR = 1,
35 /* file attribute name */
36 KEY_ATTR_NAME_MINOR = 2,
37 /* file attribute value */
38 KEY_ATTR_BODY_MINOR = 3,
39 /* file body (tail or extent) */
40 KEY_BODY_MINOR = 4,
41 } key_minor_locality;
43 /* Everything stored in the tree has a unique key, which means that the tree is
44 (logically) fully ordered by key. Physical order is determined by dynamic
45 heuristics that attempt to reflect key order when allocating available space,
46 and by the repacker. It is stylistically better to put aggregation
47 information into the key. Thus, if you want to segregate extents from tails,
48 it is better to give them distinct minor packing localities rather than
49 changing block_alloc.c to check the node type when deciding where to allocate
50 the node.
52 The need to randomly displace new directories and large files disturbs this
53 symmetry unfortunately. However, it should be noted that this is a need that
54 is not clearly established given the existence of a repacker. Also, in our
55 current implementation tails have a different minor packing locality from
56 extents, and no files have both extents and tails, so maybe symmetry can be
57 had without performance cost after all. Symmetry is what we ship for now....
60 /* Arbitrary major packing localities can be assigned to objects using
61 the reiser4(filenameA/..packing<=some_number) system call.
63 In reiser4, the creat() syscall creates a directory
65 whose default flow (that which is referred to if the directory is
66 read as a file) is the traditional unix file body.
68 whose directory plugin is the 'filedir'
70 whose major packing locality is that of the parent of the object created.
72 The static_stat item is a particular commonly used directory
73 compression (the one for normal unix files).
75 The filedir plugin checks to see if the static_stat item exists.
76 There is a unique key for static_stat. If yes, then it uses the
77 static_stat item for all of the values that it contains. The
78 static_stat item contains a flag for each stat it contains which
79 indicates whether one should look outside the static_stat item for its
80 contents.
83 /* offset of fields in reiser4_key. Value of each element of this enum
84 is index within key (thought as array of __u64's) where this field
85 is. */
86 typedef enum {
87 /* major "locale", aka dirid. Sits in 1st element */
88 KEY_LOCALITY_INDEX = 0,
89 /* minor "locale", aka item type. Sits in 1st element */
90 KEY_TYPE_INDEX = 0,
91 ON_LARGE_KEY(KEY_ORDERING_INDEX,)
92 /* "object band". Sits in 2nd element */
93 KEY_BAND_INDEX,
94 /* objectid. Sits in 2nd element */
95 KEY_OBJECTID_INDEX = KEY_BAND_INDEX,
96 /* full objectid. Sits in 2nd element */
97 KEY_FULLOID_INDEX = KEY_BAND_INDEX,
98 /* Offset. Sits in 3rd element */
99 KEY_OFFSET_INDEX,
100 /* Name hash. Sits in 3rd element */
101 KEY_HASH_INDEX = KEY_OFFSET_INDEX,
102 KEY_CACHELINE_END = KEY_OFFSET_INDEX,
103 KEY_LAST_INDEX
104 } reiser4_key_field_index;
106 /* key in reiser4 internal "balanced" tree. It is just array of three
107 64bit integers in disk byte order (little-endian by default). This
108 array is actually indexed by reiser4_key_field. Each __u64 within
109 this array is called "element". Logical key component encoded within
110 elements are called "fields".
112 We declare this as union with second component dummy to suppress
113 inconvenient array<->pointer casts implied in C. */
114 union reiser4_key {
115 __le64 el[KEY_LAST_INDEX];
116 int pad;
119 /* bitmasks showing where within reiser4_key particular key is stored. */
120 /* major locality occupies higher 60 bits of the first element */
121 #define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
123 /* minor locality occupies lower 4 bits of the first element */
124 #define KEY_TYPE_MASK 0xfull
126 /* controversial band occupies higher 4 bits of the 2nd element */
127 #define KEY_BAND_MASK 0xf000000000000000ull
129 /* objectid occupies lower 60 bits of the 2nd element */
130 #define KEY_OBJECTID_MASK 0x0fffffffffffffffull
132 /* full 64bit objectid*/
133 #define KEY_FULLOID_MASK 0xffffffffffffffffull
135 /* offset is just 3rd L.M.Nt itself */
136 #define KEY_OFFSET_MASK 0xffffffffffffffffull
138 /* ordering is whole second element */
139 #define KEY_ORDERING_MASK 0xffffffffffffffffull
141 /* how many bits key element should be shifted to left to get particular field
143 typedef enum {
144 KEY_LOCALITY_SHIFT = 4,
145 KEY_TYPE_SHIFT = 0,
146 KEY_BAND_SHIFT = 60,
147 KEY_OBJECTID_SHIFT = 0,
148 KEY_FULLOID_SHIFT = 0,
149 KEY_OFFSET_SHIFT = 0,
150 KEY_ORDERING_SHIFT = 0,
151 } reiser4_key_field_shift;
153 static inline __u64
154 get_key_el(const reiser4_key * key, reiser4_key_field_index off)
156 assert("nikita-753", key != NULL);
157 assert("nikita-754", off < KEY_LAST_INDEX);
158 return le64_to_cpu(get_unaligned(&key->el[off]));
161 static inline void
162 set_key_el(reiser4_key * key, reiser4_key_field_index off, __u64 value)
164 assert("nikita-755", key != NULL);
165 assert("nikita-756", off < KEY_LAST_INDEX);
166 put_unaligned(cpu_to_le64(value), &key->el[off]);
169 /* macro to define getter and setter functions for field F with type T */
170 #define DEFINE_KEY_FIELD(L, U, T) \
171 static inline T get_key_ ## L(const reiser4_key *key) \
173 assert("nikita-750", key != NULL); \
174 return (T) (get_key_el(key, KEY_ ## U ## _INDEX) & \
175 KEY_ ## U ## _MASK) >> KEY_ ## U ## _SHIFT; \
178 static inline void set_key_ ## L(reiser4_key * key, T loc) \
180 __u64 el; \
182 assert("nikita-752", key != NULL); \
184 el = get_key_el(key, KEY_ ## U ## _INDEX); \
185 /* clear field bits in the key */ \
186 el &= ~KEY_ ## U ## _MASK; \
187 /* actually it should be \
189 el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
191 but we trust user to never pass values that wouldn't fit \
192 into field. Clearing extra bits is one operation, but this \
193 function is time-critical. \
194 But check this in assertion. */ \
195 assert("nikita-759", ((loc << KEY_ ## U ## _SHIFT) & \
196 ~KEY_ ## U ## _MASK) == 0); \
197 el |= (loc << KEY_ ## U ## _SHIFT); \
198 set_key_el(key, KEY_ ## U ## _INDEX, el); \
201 typedef __u64 oid_t;
203 /* define get_key_locality(), set_key_locality() */
204 DEFINE_KEY_FIELD(locality, LOCALITY, oid_t);
205 /* define get_key_type(), set_key_type() */
206 DEFINE_KEY_FIELD(type, TYPE, key_minor_locality);
207 /* define get_key_band(), set_key_band() */
208 DEFINE_KEY_FIELD(band, BAND, __u64);
209 /* define get_key_objectid(), set_key_objectid() */
210 DEFINE_KEY_FIELD(objectid, OBJECTID, oid_t);
211 /* define get_key_fulloid(), set_key_fulloid() */
212 DEFINE_KEY_FIELD(fulloid, FULLOID, oid_t);
213 /* define get_key_offset(), set_key_offset() */
214 DEFINE_KEY_FIELD(offset, OFFSET, __u64);
215 #if (REISER4_LARGE_KEY)
216 /* define get_key_ordering(), set_key_ordering() */
217 DEFINE_KEY_FIELD(ordering, ORDERING, __u64);
218 #else
219 static inline __u64 get_key_ordering(const reiser4_key * key)
221 return 0;
224 static inline void set_key_ordering(reiser4_key * key, __u64 val)
227 #endif
229 /* key comparison result */
230 typedef enum { LESS_THAN = -1, /* if first key is less than second */
231 EQUAL_TO = 0, /* if keys are equal */
232 GREATER_THAN = +1 /* if first key is greater than second */
233 } cmp_t;
235 void reiser4_key_init(reiser4_key * key);
237 /* minimal possible key in the tree. Return pointer to the static storage. */
238 extern const reiser4_key *reiser4_min_key(void);
239 extern const reiser4_key *reiser4_max_key(void);
241 /* helper macro for keycmp() */
242 #define KEY_DIFF(k1, k2, field) \
243 ({ \
244 typeof(get_key_ ## field(k1)) f1; \
245 typeof(get_key_ ## field(k2)) f2; \
247 f1 = get_key_ ## field(k1); \
248 f2 = get_key_ ## field(k2); \
250 (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
253 /* helper macro for keycmp() */
254 #define KEY_DIFF_EL(k1, k2, off) \
255 ({ \
256 __u64 e1; \
257 __u64 e2; \
259 e1 = get_key_el(k1, off); \
260 e2 = get_key_el(k2, off); \
262 (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
265 /* compare `k1' and `k2'. This function is a heart of "key allocation
266 policy". All you need to implement new policy is to add yet another
267 clause here. */
268 static inline cmp_t keycmp(const reiser4_key * k1 /* first key to compare */ ,
269 const reiser4_key * k2/* second key to compare */)
271 cmp_t result;
274 * This function is the heart of reiser4 tree-routines. Key comparison
275 * is among most heavily used operations in the file system.
278 assert("nikita-439", k1 != NULL);
279 assert("nikita-440", k2 != NULL);
281 /* there is no actual branch here: condition is compile time constant
282 * and constant folding and propagation ensures that only one branch
283 * is actually compiled in. */
285 if (REISER4_PLANA_KEY_ALLOCATION) {
286 /* if physical order of fields in a key is identical
287 with logical order, we can implement key comparison
288 as three 64bit comparisons. */
289 /* logical order of fields in plan-a:
290 locality->type->objectid->offset. */
291 /* compare locality and type at once */
292 result = KEY_DIFF_EL(k1, k2, 0);
293 if (result == EQUAL_TO) {
294 /* compare objectid (and band if it's there) */
295 result = KEY_DIFF_EL(k1, k2, 1);
296 /* compare offset */
297 if (result == EQUAL_TO) {
298 result = KEY_DIFF_EL(k1, k2, 2);
299 if (REISER4_LARGE_KEY && result == EQUAL_TO)
300 result = KEY_DIFF_EL(k1, k2, 3);
303 } else if (REISER4_3_5_KEY_ALLOCATION) {
304 result = KEY_DIFF(k1, k2, locality);
305 if (result == EQUAL_TO) {
306 result = KEY_DIFF(k1, k2, objectid);
307 if (result == EQUAL_TO) {
308 result = KEY_DIFF(k1, k2, type);
309 if (result == EQUAL_TO)
310 result = KEY_DIFF(k1, k2, offset);
313 } else
314 impossible("nikita-441", "Unknown key allocation scheme!");
315 return result;
318 /* true if @k1 equals @k2 */
319 static inline int keyeq(const reiser4_key * k1 /* first key to compare */ ,
320 const reiser4_key * k2/* second key to compare */)
322 assert("nikita-1879", k1 != NULL);
323 assert("nikita-1880", k2 != NULL);
324 return !memcmp(k1, k2, sizeof *k1);
327 /* true if @k1 is less than @k2 */
328 static inline int keylt(const reiser4_key * k1 /* first key to compare */ ,
329 const reiser4_key * k2/* second key to compare */)
331 assert("nikita-1952", k1 != NULL);
332 assert("nikita-1953", k2 != NULL);
333 return keycmp(k1, k2) == LESS_THAN;
336 /* true if @k1 is less than or equal to @k2 */
337 static inline int keyle(const reiser4_key * k1 /* first key to compare */ ,
338 const reiser4_key * k2/* second key to compare */)
340 assert("nikita-1954", k1 != NULL);
341 assert("nikita-1955", k2 != NULL);
342 return keycmp(k1, k2) != GREATER_THAN;
345 /* true if @k1 is greater than @k2 */
346 static inline int keygt(const reiser4_key * k1 /* first key to compare */ ,
347 const reiser4_key * k2/* second key to compare */)
349 assert("nikita-1959", k1 != NULL);
350 assert("nikita-1960", k2 != NULL);
351 return keycmp(k1, k2) == GREATER_THAN;
354 /* true if @k1 is greater than or equal to @k2 */
355 static inline int keyge(const reiser4_key * k1 /* first key to compare */ ,
356 const reiser4_key * k2/* second key to compare */)
358 assert("nikita-1956", k1 != NULL);
359 assert("nikita-1957", k2 != NULL); /* October 4: sputnik launched
360 * November 3: Laika */
361 return keycmp(k1, k2) != LESS_THAN;
364 static inline void prefetchkey(reiser4_key * key)
366 prefetch(key);
367 prefetch(&key->el[KEY_CACHELINE_END]);
370 /* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
371 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
372 /* size of a buffer suitable to hold human readable key representation */
373 #define KEY_BUF_LEN (80)
375 #if REISER4_DEBUG
376 extern void reiser4_print_key(const char *prefix, const reiser4_key * key);
377 #else
378 #define reiser4_print_key(p, k) noop
379 #endif
381 /* __FS_REISERFS_KEY_H__ */
382 #endif
384 /* Make Linus happy.
385 Local variables:
386 c-indentation-style: "K&R"
387 mode-name: "LC"
388 c-basic-offset: 8
389 tab-width: 8
390 fill-column: 120
391 End: