1 /* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by reiser4/README */
3 /* Declarations of key-related data-structures and operations on keys. */
5 #if !defined( __REISER4_KEY_H__ )
6 #define __REISER4_KEY_H__
12 #include <linux/types.h> /* for __u?? */
14 /* Operations on keys in reiser4 tree */
16 /* No access to any of these fields shall be done except via a
17 wrapping macro/function, and that wrapping macro/function shall
18 convert to little endian order. Compare keys will consider cpu byte order. */
20 /* A storage layer implementation difference between a regular unix file body and its attributes is in the typedef below
21 which causes all of the attributes of a file to be near in key to all of the other attributes for all of the files
22 within that directory, and not near to the file itself. It is interesting to consider whether this is the wrong
23 approach, and whether there should be no difference at all. For current usage patterns this choice is probably the
26 /* possible values for minor packing locality (4 bits required) */
29 KEY_FILE_NAME_MINOR
= 0,
32 /* file attribute name */
33 KEY_ATTR_NAME_MINOR
= 2,
34 /* file attribute value */
35 KEY_ATTR_BODY_MINOR
= 3,
36 /* file body (tail or extent) */
40 /* everything stored in the tree has a unique key, which means that the tree is (logically) fully ordered by key.
41 Physical order is determined by dynamic heuristics that attempt to reflect key order when allocating available space,
42 and by the repacker. It is stylistically better to put aggregation information into the key. Thus, if you want to
43 segregate extents from tails, it is better to give them distinct minor packing localities rather than changing
44 block_alloc.c to check the node type when deciding where to allocate the node.
46 The need to randomly displace new directories and large files disturbs this symmetry unfortunately. However, it
47 should be noted that this is a need that is not clearly established given the existence of a repacker. Also, in our
48 current implementation tails have a different minor packing locality from extents, and no files have both extents and
49 tails, so maybe symmetry can be had without performance cost after all. Symmetry is what we ship for now....
52 /* Arbitrary major packing localities can be assigned to objects using
53 the reiser4(filenameA/..packing<=some_number) system call.
55 In reiser4, the creat() syscall creates a directory
57 whose default flow (that which is referred to if the directory is
58 read as a file) is the traditional unix file body.
60 whose directory plugin is the 'filedir'
62 whose major packing locality is that of the parent of the object created.
64 The static_stat item is a particular commonly used directory
65 compression (the one for normal unix files).
67 The filedir plugin checks to see if the static_stat item exists.
68 There is a unique key for static_stat. If yes, then it uses the
69 static_stat item for all of the values that it contains. The
70 static_stat item contains a flag for each stat it contains which
71 indicates whether one should look outside the static_stat item for its
75 /* offset of fields in reiser4_key. Value of each element of this enum
76 is index within key (thought as array of __u64's) where this field
79 /* major "locale", aka dirid. Sits in 1st element */
80 KEY_LOCALITY_INDEX
= 0,
81 /* minor "locale", aka item type. Sits in 1st element */
83 ON_LARGE_KEY(KEY_ORDERING_INDEX
,)
84 /* "object band". Sits in 2nd element */
86 /* objectid. Sits in 2nd element */
87 KEY_OBJECTID_INDEX
= KEY_BAND_INDEX
,
88 /* full objectid. Sits in 2nd element */
89 KEY_FULLOID_INDEX
= KEY_BAND_INDEX
,
90 /* Offset. Sits in 3rd element */
92 /* Name hash. Sits in 3rd element */
93 KEY_HASH_INDEX
= KEY_OFFSET_INDEX
,
94 KEY_CACHELINE_END
= KEY_OFFSET_INDEX
,
96 } reiser4_key_field_index
;
98 /* key in reiser4 internal "balanced" tree. It is just array of three
99 64bit integers in disk byte order (little-endian by default). This
100 array is actually indexed by reiser4_key_field. Each __u64 within
101 this array is called "element". Logical key component encoded within
102 elements are called "fields".
104 We declare this as union with second component dummy to suppress
105 inconvenient array<->pointer casts implied in C. */
107 __le64 el
[KEY_LAST_INDEX
];
111 /* bitmasks showing where within reiser4_key particular key is stored. */
112 /* major locality occupies higher 60 bits of the first element */
113 #define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
115 /* minor locality occupies lower 4 bits of the first element */
116 #define KEY_TYPE_MASK 0xfull
118 /* controversial band occupies higher 4 bits of the 2nd element */
119 #define KEY_BAND_MASK 0xf000000000000000ull
121 /* objectid occupies lower 60 bits of the 2nd element */
122 #define KEY_OBJECTID_MASK 0x0fffffffffffffffull
124 /* full 64bit objectid*/
125 #define KEY_FULLOID_MASK 0xffffffffffffffffull
127 /* offset is just 3rd L.M.Nt itself */
128 #define KEY_OFFSET_MASK 0xffffffffffffffffull
130 /* ordering is whole second element */
131 #define KEY_ORDERING_MASK 0xffffffffffffffffull
133 /* how many bits key element should be shifted to left to get particular field */
135 KEY_LOCALITY_SHIFT
= 4,
138 KEY_OBJECTID_SHIFT
= 0,
139 KEY_FULLOID_SHIFT
= 0,
140 KEY_OFFSET_SHIFT
= 0,
141 KEY_ORDERING_SHIFT
= 0,
142 } reiser4_key_field_shift
;
145 get_key_el(const reiser4_key
* key
, reiser4_key_field_index off
)
147 assert("nikita-753", key
!= NULL
);
148 assert("nikita-754", off
< KEY_LAST_INDEX
);
149 return le64_to_cpu(get_unaligned(&key
->el
[off
]));
153 set_key_el(reiser4_key
* key
, reiser4_key_field_index off
, __u64 value
)
155 assert("nikita-755", key
!= NULL
);
156 assert("nikita-756", off
< KEY_LAST_INDEX
);
157 put_unaligned(cpu_to_le64(value
), &key
->el
[off
]);
160 /* macro to define getter and setter functions for field F with type T */
161 #define DEFINE_KEY_FIELD( L, U, T ) \
162 static inline T get_key_ ## L ( const reiser4_key *key ) \
164 assert( "nikita-750", key != NULL ); \
165 return ( T ) ( get_key_el( key, KEY_ ## U ## _INDEX ) & \
166 KEY_ ## U ## _MASK ) >> KEY_ ## U ## _SHIFT; \
169 static inline void set_key_ ## L ( reiser4_key *key, T loc ) \
173 assert( "nikita-752", key != NULL ); \
175 el = get_key_el( key, KEY_ ## U ## _INDEX ); \
176 /* clear field bits in the key */ \
177 el &= ~KEY_ ## U ## _MASK; \
178 /* actually it should be \
180 el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
182 but we trust user to never pass values that wouldn't fit \
183 into field. Clearing extra bits is one operation, but this \
184 function is time-critical. \
185 But check this in assertion. */ \
186 assert( "nikita-759", ( ( loc << KEY_ ## U ## _SHIFT ) & \
187 ~KEY_ ## U ## _MASK ) == 0 ); \
188 el |= ( loc << KEY_ ## U ## _SHIFT ); \
189 set_key_el( key, KEY_ ## U ## _INDEX, el ); \
194 /* define get_key_locality(), set_key_locality() */
195 DEFINE_KEY_FIELD(locality
, LOCALITY
, oid_t
);
196 /* define get_key_type(), set_key_type() */
197 DEFINE_KEY_FIELD(type
, TYPE
, key_minor_locality
);
198 /* define get_key_band(), set_key_band() */
199 DEFINE_KEY_FIELD(band
, BAND
, __u64
);
200 /* define get_key_objectid(), set_key_objectid() */
201 DEFINE_KEY_FIELD(objectid
, OBJECTID
, oid_t
);
202 /* define get_key_fulloid(), set_key_fulloid() */
203 DEFINE_KEY_FIELD(fulloid
, FULLOID
, oid_t
);
204 /* define get_key_offset(), set_key_offset() */
205 DEFINE_KEY_FIELD(offset
, OFFSET
, __u64
);
206 #if (REISER4_LARGE_KEY)
207 /* define get_key_ordering(), set_key_ordering() */
208 DEFINE_KEY_FIELD(ordering
, ORDERING
, __u64
);
210 static inline __u64
get_key_ordering(const reiser4_key
* key
)
215 static inline void set_key_ordering(reiser4_key
* key
, __u64 val
)
220 /* key comparison result */
221 typedef enum { LESS_THAN
= -1, /* if first key is less than second */
222 EQUAL_TO
= 0, /* if keys are equal */
223 GREATER_THAN
= +1 /* if first key is greater than second */
226 void reiser4_key_init(reiser4_key
* key
);
228 /* minimal possible key in the tree. Return pointer to the static storage. */
229 extern const reiser4_key
*reiser4_min_key(void);
230 extern const reiser4_key
*reiser4_max_key(void);
232 /* helper macro for keycmp() */
233 #define KEY_DIFF(k1, k2, field) \
235 typeof (get_key_ ## field (k1)) f1; \
236 typeof (get_key_ ## field (k2)) f2; \
238 f1 = get_key_ ## field (k1); \
239 f2 = get_key_ ## field (k2); \
241 (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
244 /* helper macro for keycmp() */
245 #define KEY_DIFF_EL(k1, k2, off) \
250 e1 = get_key_el(k1, off); \
251 e2 = get_key_el(k2, off); \
253 (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
256 /* compare `k1' and `k2'. This function is a heart of "key allocation
257 policy". All you need to implement new policy is to add yet another
259 static inline cmp_t
keycmp(const reiser4_key
* k1
/* first key to compare */ ,
260 const reiser4_key
* k2
/* second key to compare */ )
265 * This function is the heart of reiser4 tree-routines. Key comparison
266 * is among most heavily used operations in the file system.
269 assert("nikita-439", k1
!= NULL
);
270 assert("nikita-440", k2
!= NULL
);
272 /* there is no actual branch here: condition is compile time constant
273 * and constant folding and propagation ensures that only one branch
274 * is actually compiled in. */
276 if (REISER4_PLANA_KEY_ALLOCATION
) {
277 /* if physical order of fields in a key is identical
278 with logical order, we can implement key comparison
279 as three 64bit comparisons. */
280 /* logical order of fields in plan-a:
281 locality->type->objectid->offset. */
282 /* compare locality and type at once */
283 result
= KEY_DIFF_EL(k1
, k2
, 0);
284 if (result
== EQUAL_TO
) {
285 /* compare objectid (and band if it's there) */
286 result
= KEY_DIFF_EL(k1
, k2
, 1);
288 if (result
== EQUAL_TO
) {
289 result
= KEY_DIFF_EL(k1
, k2
, 2);
290 if (REISER4_LARGE_KEY
&& result
== EQUAL_TO
) {
291 result
= KEY_DIFF_EL(k1
, k2
, 3);
295 } else if (REISER4_3_5_KEY_ALLOCATION
) {
296 result
= KEY_DIFF(k1
, k2
, locality
);
297 if (result
== EQUAL_TO
) {
298 result
= KEY_DIFF(k1
, k2
, objectid
);
299 if (result
== EQUAL_TO
) {
300 result
= KEY_DIFF(k1
, k2
, type
);
301 if (result
== EQUAL_TO
)
302 result
= KEY_DIFF(k1
, k2
, offset
);
306 impossible("nikita-441", "Unknown key allocation scheme!");
310 /* true if @k1 equals @k2 */
311 static inline int keyeq(const reiser4_key
* k1
/* first key to compare */ ,
312 const reiser4_key
* k2
/* second key to compare */ )
314 assert("nikita-1879", k1
!= NULL
);
315 assert("nikita-1880", k2
!= NULL
);
316 return !memcmp(k1
, k2
, sizeof *k1
);
319 /* true if @k1 is less than @k2 */
320 static inline int keylt(const reiser4_key
* k1
/* first key to compare */ ,
321 const reiser4_key
* k2
/* second key to compare */ )
323 assert("nikita-1952", k1
!= NULL
);
324 assert("nikita-1953", k2
!= NULL
);
325 return keycmp(k1
, k2
) == LESS_THAN
;
328 /* true if @k1 is less than or equal to @k2 */
329 static inline int keyle(const reiser4_key
* k1
/* first key to compare */ ,
330 const reiser4_key
* k2
/* second key to compare */ )
332 assert("nikita-1954", k1
!= NULL
);
333 assert("nikita-1955", k2
!= NULL
);
334 return keycmp(k1
, k2
) != GREATER_THAN
;
337 /* true if @k1 is greater than @k2 */
338 static inline int keygt(const reiser4_key
* k1
/* first key to compare */ ,
339 const reiser4_key
* k2
/* second key to compare */ )
341 assert("nikita-1959", k1
!= NULL
);
342 assert("nikita-1960", k2
!= NULL
);
343 return keycmp(k1
, k2
) == GREATER_THAN
;
346 /* true if @k1 is greater than or equal to @k2 */
347 static inline int keyge(const reiser4_key
* k1
/* first key to compare */ ,
348 const reiser4_key
* k2
/* second key to compare */ )
350 assert("nikita-1956", k1
!= NULL
);
351 assert("nikita-1957", k2
!= NULL
); /* October 4: sputnik launched
352 * November 3: Laika */
353 return keycmp(k1
, k2
) != LESS_THAN
;
356 static inline void prefetchkey(reiser4_key
* key
)
359 prefetch(&key
->el
[KEY_CACHELINE_END
]);
362 /* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
363 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
364 /* size of a buffer suitable to hold human readable key representation */
365 #define KEY_BUF_LEN (80)
368 extern void reiser4_print_key(const char *prefix
, const reiser4_key
* key
);
370 #define reiser4_print_key(p,k) noop
373 /* __FS_REISERFS_KEY_H__ */
378 c-indentation-style: "K&R"