1 /* Copyright 2000, 2001, 2002, 2003 by Hans Reiser, licensing governed by
4 /* Declarations of key-related data-structures and operations on keys. */
6 #if !defined(__REISER4_KEY_H__)
7 #define __REISER4_KEY_H__
13 #include <linux/types.h> /* for __u?? */
15 /* Operations on keys in reiser4 tree */
17 /* No access to any of these fields shall be done except via a
18 wrapping macro/function, and that wrapping macro/function shall
19 convert to little endian order. Compare keys will consider cpu byte order. */
21 /* A storage layer implementation difference between a regular unix file body
22 and its attributes is in the typedef below which causes all of the attributes
23 of a file to be near in key to all of the other attributes for all of the
24 files within that directory, and not near to the file itself. It is
25 interesting to consider whether this is the wrong approach, and whether there
26 should be no difference at all. For current usage patterns this choice is
27 probably the right one. */
29 /* possible values for minor packing locality (4 bits required) */
32 KEY_FILE_NAME_MINOR
= 0,
35 /* file attribute name */
36 KEY_ATTR_NAME_MINOR
= 2,
37 /* file attribute value */
38 KEY_ATTR_BODY_MINOR
= 3,
39 /* file body (tail or extent) */
43 /* Everything stored in the tree has a unique key, which means that the tree is
44 (logically) fully ordered by key. Physical order is determined by dynamic
45 heuristics that attempt to reflect key order when allocating available space,
46 and by the repacker. It is stylistically better to put aggregation
47 information into the key. Thus, if you want to segregate extents from tails,
48 it is better to give them distinct minor packing localities rather than
49 changing block_alloc.c to check the node type when deciding where to allocate
52 The need to randomly displace new directories and large files disturbs this
53 symmetry unfortunately. However, it should be noted that this is a need that
54 is not clearly established given the existence of a repacker. Also, in our
55 current implementation tails have a different minor packing locality from
56 extents, and no files have both extents and tails, so maybe symmetry can be
57 had without performance cost after all. Symmetry is what we ship for now....
60 /* Arbitrary major packing localities can be assigned to objects using
61 the reiser4(filenameA/..packing<=some_number) system call.
63 In reiser4, the creat() syscall creates a directory
65 whose default flow (that which is referred to if the directory is
66 read as a file) is the traditional unix file body.
68 whose directory plugin is the 'filedir'
70 whose major packing locality is that of the parent of the object created.
72 The static_stat item is a particular commonly used directory
73 compression (the one for normal unix files).
75 The filedir plugin checks to see if the static_stat item exists.
76 There is a unique key for static_stat. If yes, then it uses the
77 static_stat item for all of the values that it contains. The
78 static_stat item contains a flag for each stat it contains which
79 indicates whether one should look outside the static_stat item for its
83 /* offset of fields in reiser4_key. Value of each element of this enum
84 is index within key (thought as array of __u64's) where this field
87 /* major "locale", aka dirid. Sits in 1st element */
88 KEY_LOCALITY_INDEX
= 0,
89 /* minor "locale", aka item type. Sits in 1st element */
91 ON_LARGE_KEY(KEY_ORDERING_INDEX
,)
92 /* "object band". Sits in 2nd element */
94 /* objectid. Sits in 2nd element */
95 KEY_OBJECTID_INDEX
= KEY_BAND_INDEX
,
96 /* full objectid. Sits in 2nd element */
97 KEY_FULLOID_INDEX
= KEY_BAND_INDEX
,
98 /* Offset. Sits in 3rd element */
100 /* Name hash. Sits in 3rd element */
101 KEY_HASH_INDEX
= KEY_OFFSET_INDEX
,
102 KEY_CACHELINE_END
= KEY_OFFSET_INDEX
,
104 } reiser4_key_field_index
;
106 /* key in reiser4 internal "balanced" tree. It is just array of three
107 64bit integers in disk byte order (little-endian by default). This
108 array is actually indexed by reiser4_key_field. Each __u64 within
109 this array is called "element". Logical key component encoded within
110 elements are called "fields".
112 We declare this as union with second component dummy to suppress
113 inconvenient array<->pointer casts implied in C. */
115 __le64 el
[KEY_LAST_INDEX
];
119 /* bitmasks showing where within reiser4_key particular key is stored. */
120 /* major locality occupies higher 60 bits of the first element */
121 #define KEY_LOCALITY_MASK 0xfffffffffffffff0ull
123 /* minor locality occupies lower 4 bits of the first element */
124 #define KEY_TYPE_MASK 0xfull
126 /* controversial band occupies higher 4 bits of the 2nd element */
127 #define KEY_BAND_MASK 0xf000000000000000ull
129 /* objectid occupies lower 60 bits of the 2nd element */
130 #define KEY_OBJECTID_MASK 0x0fffffffffffffffull
132 /* full 64bit objectid*/
133 #define KEY_FULLOID_MASK 0xffffffffffffffffull
135 /* offset is just 3rd L.M.Nt itself */
136 #define KEY_OFFSET_MASK 0xffffffffffffffffull
138 /* ordering is whole second element */
139 #define KEY_ORDERING_MASK 0xffffffffffffffffull
141 /* how many bits key element should be shifted to left to get particular field
144 KEY_LOCALITY_SHIFT
= 4,
147 KEY_OBJECTID_SHIFT
= 0,
148 KEY_FULLOID_SHIFT
= 0,
149 KEY_OFFSET_SHIFT
= 0,
150 KEY_ORDERING_SHIFT
= 0,
151 } reiser4_key_field_shift
;
154 get_key_el(const reiser4_key
* key
, reiser4_key_field_index off
)
156 assert("nikita-753", key
!= NULL
);
157 assert("nikita-754", off
< KEY_LAST_INDEX
);
158 return le64_to_cpu(get_unaligned(&key
->el
[off
]));
162 set_key_el(reiser4_key
* key
, reiser4_key_field_index off
, __u64 value
)
164 assert("nikita-755", key
!= NULL
);
165 assert("nikita-756", off
< KEY_LAST_INDEX
);
166 put_unaligned(cpu_to_le64(value
), &key
->el
[off
]);
169 /* macro to define getter and setter functions for field F with type T */
170 #define DEFINE_KEY_FIELD(L, U, T) \
171 static inline T get_key_ ## L(const reiser4_key *key) \
173 assert("nikita-750", key != NULL); \
174 return (T) (get_key_el(key, KEY_ ## U ## _INDEX) & \
175 KEY_ ## U ## _MASK) >> KEY_ ## U ## _SHIFT; \
178 static inline void set_key_ ## L(reiser4_key * key, T loc) \
182 assert("nikita-752", key != NULL); \
184 el = get_key_el(key, KEY_ ## U ## _INDEX); \
185 /* clear field bits in the key */ \
186 el &= ~KEY_ ## U ## _MASK; \
187 /* actually it should be \
189 el |= ( loc << KEY_ ## U ## _SHIFT ) & KEY_ ## U ## _MASK; \
191 but we trust user to never pass values that wouldn't fit \
192 into field. Clearing extra bits is one operation, but this \
193 function is time-critical. \
194 But check this in assertion. */ \
195 assert("nikita-759", ((loc << KEY_ ## U ## _SHIFT) & \
196 ~KEY_ ## U ## _MASK) == 0); \
197 el |= (loc << KEY_ ## U ## _SHIFT); \
198 set_key_el(key, KEY_ ## U ## _INDEX, el); \
203 /* define get_key_locality(), set_key_locality() */
204 DEFINE_KEY_FIELD(locality
, LOCALITY
, oid_t
);
205 /* define get_key_type(), set_key_type() */
206 DEFINE_KEY_FIELD(type
, TYPE
, key_minor_locality
);
207 /* define get_key_band(), set_key_band() */
208 DEFINE_KEY_FIELD(band
, BAND
, __u64
);
209 /* define get_key_objectid(), set_key_objectid() */
210 DEFINE_KEY_FIELD(objectid
, OBJECTID
, oid_t
);
211 /* define get_key_fulloid(), set_key_fulloid() */
212 DEFINE_KEY_FIELD(fulloid
, FULLOID
, oid_t
);
213 /* define get_key_offset(), set_key_offset() */
214 DEFINE_KEY_FIELD(offset
, OFFSET
, __u64
);
215 #if (REISER4_LARGE_KEY)
216 /* define get_key_ordering(), set_key_ordering() */
217 DEFINE_KEY_FIELD(ordering
, ORDERING
, __u64
);
219 static inline __u64
get_key_ordering(const reiser4_key
* key
)
224 static inline void set_key_ordering(reiser4_key
* key
, __u64 val
)
229 /* key comparison result */
230 typedef enum { LESS_THAN
= -1, /* if first key is less than second */
231 EQUAL_TO
= 0, /* if keys are equal */
232 GREATER_THAN
= +1 /* if first key is greater than second */
235 void reiser4_key_init(reiser4_key
* key
);
237 /* minimal possible key in the tree. Return pointer to the static storage. */
238 extern const reiser4_key
*reiser4_min_key(void);
239 extern const reiser4_key
*reiser4_max_key(void);
241 /* helper macro for keycmp() */
242 #define KEY_DIFF(k1, k2, field) \
244 typeof(get_key_ ## field(k1)) f1; \
245 typeof(get_key_ ## field(k2)) f2; \
247 f1 = get_key_ ## field(k1); \
248 f2 = get_key_ ## field(k2); \
250 (f1 < f2) ? LESS_THAN : ((f1 == f2) ? EQUAL_TO : GREATER_THAN); \
253 /* helper macro for keycmp() */
254 #define KEY_DIFF_EL(k1, k2, off) \
259 e1 = get_key_el(k1, off); \
260 e2 = get_key_el(k2, off); \
262 (e1 < e2) ? LESS_THAN : ((e1 == e2) ? EQUAL_TO : GREATER_THAN); \
265 /* compare `k1' and `k2'. This function is a heart of "key allocation
266 policy". All you need to implement new policy is to add yet another
268 static inline cmp_t
keycmp(const reiser4_key
* k1
/* first key to compare */ ,
269 const reiser4_key
* k2
/* second key to compare */)
274 * This function is the heart of reiser4 tree-routines. Key comparison
275 * is among most heavily used operations in the file system.
278 assert("nikita-439", k1
!= NULL
);
279 assert("nikita-440", k2
!= NULL
);
281 /* there is no actual branch here: condition is compile time constant
282 * and constant folding and propagation ensures that only one branch
283 * is actually compiled in. */
285 if (REISER4_PLANA_KEY_ALLOCATION
) {
286 /* if physical order of fields in a key is identical
287 with logical order, we can implement key comparison
288 as three 64bit comparisons. */
289 /* logical order of fields in plan-a:
290 locality->type->objectid->offset. */
291 /* compare locality and type at once */
292 result
= KEY_DIFF_EL(k1
, k2
, 0);
293 if (result
== EQUAL_TO
) {
294 /* compare objectid (and band if it's there) */
295 result
= KEY_DIFF_EL(k1
, k2
, 1);
297 if (result
== EQUAL_TO
) {
298 result
= KEY_DIFF_EL(k1
, k2
, 2);
299 if (REISER4_LARGE_KEY
&& result
== EQUAL_TO
)
300 result
= KEY_DIFF_EL(k1
, k2
, 3);
303 } else if (REISER4_3_5_KEY_ALLOCATION
) {
304 result
= KEY_DIFF(k1
, k2
, locality
);
305 if (result
== EQUAL_TO
) {
306 result
= KEY_DIFF(k1
, k2
, objectid
);
307 if (result
== EQUAL_TO
) {
308 result
= KEY_DIFF(k1
, k2
, type
);
309 if (result
== EQUAL_TO
)
310 result
= KEY_DIFF(k1
, k2
, offset
);
314 impossible("nikita-441", "Unknown key allocation scheme!");
318 /* true if @k1 equals @k2 */
319 static inline int keyeq(const reiser4_key
* k1
/* first key to compare */ ,
320 const reiser4_key
* k2
/* second key to compare */)
322 assert("nikita-1879", k1
!= NULL
);
323 assert("nikita-1880", k2
!= NULL
);
324 return !memcmp(k1
, k2
, sizeof *k1
);
327 /* true if @k1 is less than @k2 */
328 static inline int keylt(const reiser4_key
* k1
/* first key to compare */ ,
329 const reiser4_key
* k2
/* second key to compare */)
331 assert("nikita-1952", k1
!= NULL
);
332 assert("nikita-1953", k2
!= NULL
);
333 return keycmp(k1
, k2
) == LESS_THAN
;
336 /* true if @k1 is less than or equal to @k2 */
337 static inline int keyle(const reiser4_key
* k1
/* first key to compare */ ,
338 const reiser4_key
* k2
/* second key to compare */)
340 assert("nikita-1954", k1
!= NULL
);
341 assert("nikita-1955", k2
!= NULL
);
342 return keycmp(k1
, k2
) != GREATER_THAN
;
345 /* true if @k1 is greater than @k2 */
346 static inline int keygt(const reiser4_key
* k1
/* first key to compare */ ,
347 const reiser4_key
* k2
/* second key to compare */)
349 assert("nikita-1959", k1
!= NULL
);
350 assert("nikita-1960", k2
!= NULL
);
351 return keycmp(k1
, k2
) == GREATER_THAN
;
354 /* true if @k1 is greater than or equal to @k2 */
355 static inline int keyge(const reiser4_key
* k1
/* first key to compare */ ,
356 const reiser4_key
* k2
/* second key to compare */)
358 assert("nikita-1956", k1
!= NULL
);
359 assert("nikita-1957", k2
!= NULL
); /* October 4: sputnik launched
360 * November 3: Laika */
361 return keycmp(k1
, k2
) != LESS_THAN
;
364 static inline void prefetchkey(reiser4_key
* key
)
367 prefetch(&key
->el
[KEY_CACHELINE_END
]);
370 /* (%Lx:%x:%Lx:%Lx:%Lx:%Lx) =
371 1 + 16 + 1 + 1 + 1 + 1 + 1 + 16 + 1 + 16 + 1 + 16 + 1 */
372 /* size of a buffer suitable to hold human readable key representation */
373 #define KEY_BUF_LEN (80)
376 extern void reiser4_print_key(const char *prefix
, const reiser4_key
* key
);
378 #define reiser4_print_key(p, k) noop
381 /* __FS_REISERFS_KEY_H__ */
386 c-indentation-style: "K&R"