[PATCH] fix memory scribble in arch/i386/pci/fixup.c
[linux-2.6/verdex.git] / fs / reiserfs / stree.c
blobda23ba75f3d51006fe6b9486916e8dcd5e23497f
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 /*
6 * Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7 * Programm System Institute
8 * Pereslavl-Zalessky Russia
9 */
12 * This file contains functions dealing with S+tree
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
26 * decrement_counters_in_path
27 * reiserfs_check_path
28 * pathrelse_and_restore
29 * pathrelse
30 * search_by_key_reada
31 * search_by_key
32 * search_for_position_by_key
33 * comp_items
34 * prepare_for_direct_item
35 * prepare_for_direntry_item
36 * prepare_for_delete_or_cut
37 * calc_deleted_bytes_number
38 * init_tb_struct
39 * padd_item
40 * reiserfs_delete_item
41 * reiserfs_delete_solid_item
42 * reiserfs_delete_object
43 * maybe_indirect_to_direct
44 * indirect_to_direct_roll_back
45 * reiserfs_cut_from_item
46 * truncate_directory
47 * reiserfs_do_truncate
48 * reiserfs_paste_into_item
49 * reiserfs_insert_item
52 #include <linux/config.h>
53 #include <linux/time.h>
54 #include <linux/string.h>
55 #include <linux/pagemap.h>
56 #include <linux/reiserfs_fs.h>
57 #include <linux/smp_lock.h>
58 #include <linux/buffer_head.h>
59 #include <linux/quotaops.h>
61 /* Does the buffer contain a disk block which is in the tree. */
62 inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh)
65 RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT,
66 "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
68 return ( B_LEVEL (p_s_bh) != FREE_LEVEL );
72 // to gets item head in le form
74 inline void copy_item_head(struct item_head * p_v_to,
75 const struct item_head * p_v_from)
77 memcpy (p_v_to, p_v_from, IH_SIZE);
81 /* k1 is pointer to on-disk structure which is stored in little-endian
82 form. k2 is pointer to cpu variable. For key of items of the same
83 object this returns 0.
84 Returns: -1 if key1 < key2
85 0 if key1 == key2
86 1 if key1 > key2 */
87 inline int comp_short_keys (const struct reiserfs_key * le_key,
88 const struct cpu_key * cpu_key)
90 __u32 n;
91 n = le32_to_cpu(le_key->k_dir_id);
92 if (n < cpu_key->on_disk_key.k_dir_id)
93 return -1;
94 if (n > cpu_key->on_disk_key.k_dir_id)
95 return 1;
96 n = le32_to_cpu(le_key->k_objectid);
97 if (n < cpu_key->on_disk_key.k_objectid)
98 return -1;
99 if (n > cpu_key->on_disk_key.k_objectid)
100 return 1;
101 return 0;
104 /* k1 is pointer to on-disk structure which is stored in little-endian
105 form. k2 is pointer to cpu variable.
106 Compare keys using all 4 key fields.
107 Returns: -1 if key1 < key2 0
108 if key1 = key2 1 if key1 > key2 */
109 static inline int comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key)
111 int retval;
113 retval = comp_short_keys (le_key, cpu_key);
114 if (retval)
115 return retval;
116 if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key))
117 return -1;
118 if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key))
119 return 1;
121 if (cpu_key->key_length == 3)
122 return 0;
124 /* this part is needed only when tail conversion is in progress */
125 if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key))
126 return -1;
128 if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key))
129 return 1;
131 return 0;
135 inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2)
137 __u32 * p_s_1_u32, * p_s_2_u32;
138 int n_key_length = REISERFS_SHORT_KEY_LEN;
140 p_s_1_u32 = (__u32 *)key1;
141 p_s_2_u32 = (__u32 *)key2;
142 for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
143 if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) )
144 return -1;
145 if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) )
146 return 1;
148 return 0;
151 inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from)
153 int version;
154 to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
155 to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
157 // find out version of the key
158 version = le_key_version (from);
159 to->version = version;
160 to->on_disk_key.k_offset = le_key_k_offset(version, from);
161 to->on_disk_key.k_type = le_key_k_type(version, from);
166 // this does not say which one is bigger, it only returns 1 if keys
167 // are not equal, 0 otherwise
168 inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2)
170 return memcmp (k1, k2, sizeof (struct reiserfs_key));
173 /**************************************************************************
174 * Binary search toolkit function *
175 * Search for an item in the array by the item key *
176 * Returns: 1 if found, 0 if not found; *
177 * *p_n_pos = number of the searched element if found, else the *
178 * number of the first element that is larger than p_v_key. *
179 **************************************************************************/
180 /* For those not familiar with binary search: n_lbound is the leftmost item that it
181 could be, n_rbound the rightmost item that it could be. We examine the item
182 halfway between n_lbound and n_rbound, and that tells us either that we can increase
183 n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
184 there are no possible items, and we have not found it. With each examination we
185 cut the number of possible items it could be by one more than half rounded down,
186 or we find it. */
187 static inline int bin_search (
188 const void * p_v_key, /* Key to search for. */
189 const void * p_v_base,/* First item in the array. */
190 int p_n_num, /* Number of items in the array. */
191 int p_n_width, /* Item size in the array.
192 searched. Lest the reader be
193 confused, note that this is crafted
194 as a general function, and when it
195 is applied specifically to the array
196 of item headers in a node, p_n_width
197 is actually the item header size not
198 the item size. */
199 int * p_n_pos /* Number of the searched for element. */
201 int n_rbound, n_lbound, n_j;
203 for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 )
204 switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) ) {
205 case -1: n_lbound = n_j + 1; continue;
206 case 1: n_rbound = n_j - 1; continue;
207 case 0: *p_n_pos = n_j; return ITEM_FOUND; /* Key found in the array. */
210 /* bin_search did not find given key, it returns position of key,
211 that is minimal and greater than the given one. */
212 *p_n_pos = n_lbound;
213 return ITEM_NOT_FOUND;
216 #ifdef CONFIG_REISERFS_CHECK
217 extern struct tree_balance * cur_tb;
218 #endif
222 /* Minimal possible key. It is never in the tree. */
223 const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}};
225 /* Maximal possible key. It is never in the tree. */
226 const struct reiserfs_key MAX_KEY = {
227 __constant_cpu_to_le32(0xffffffff),
228 __constant_cpu_to_le32(0xffffffff),
229 {{__constant_cpu_to_le32(0xffffffff),
230 __constant_cpu_to_le32(0xffffffff)},}
233 const struct in_core_key MAX_IN_CORE_KEY = {~0U, ~0U, ~0ULL>>4, 15};
235 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
236 of the path, and going upwards. We must check the path's validity at each step. If the key is not in
237 the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
238 case we return a special key, either MIN_KEY or MAX_KEY. */
239 static inline const struct reiserfs_key * get_lkey (
240 const struct path * p_s_chk_path,
241 const struct super_block * p_s_sb
243 int n_position, n_path_offset = p_s_chk_path->path_length;
244 struct buffer_head * p_s_parent;
246 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
247 "PAP-5010: invalid offset in the path");
249 /* While not higher in path than first element. */
250 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
252 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
253 "PAP-5020: parent is not uptodate");
255 /* Parent at the path is not in the tree now. */
256 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
257 return &MAX_KEY;
258 /* Check whether position in the parent is correct. */
259 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
260 return &MAX_KEY;
261 /* Check whether parent at the path really points to the child. */
262 if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
263 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
264 return &MAX_KEY;
265 /* Return delimiting key if position in the parent is not equal to zero. */
266 if ( n_position )
267 return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
269 /* Return MIN_KEY if we are in the root of the buffer tree. */
270 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
271 SB_ROOT_BLOCK (p_s_sb) )
272 return &MIN_KEY;
273 return &MAX_KEY;
277 /* Get delimiting key of the buffer at the path and its right neighbor. */
278 inline const struct reiserfs_key * get_rkey (
279 const struct path * p_s_chk_path,
280 const struct super_block * p_s_sb
282 int n_position,
283 n_path_offset = p_s_chk_path->path_length;
284 struct buffer_head * p_s_parent;
286 RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
287 "PAP-5030: invalid offset in the path");
289 while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
291 RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
292 "PAP-5040: parent is not uptodate");
294 /* Parent at the path is not in the tree now. */
295 if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
296 return &MIN_KEY;
297 /* Check whether position in the parent is correct. */
298 if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
299 return &MIN_KEY;
300 /* Check whether parent at the path really points to the child. */
301 if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
302 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
303 return &MIN_KEY;
304 /* Return delimiting key if position in the parent is not the last one. */
305 if ( n_position != B_NR_ITEMS(p_s_parent) )
306 return B_N_PDELIM_KEY(p_s_parent, n_position);
308 /* Return MAX_KEY if we are in the root of the buffer tree. */
309 if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
310 SB_ROOT_BLOCK (p_s_sb) )
311 return &MAX_KEY;
312 return &MIN_KEY;
316 /* Check whether a key is contained in the tree rooted from a buffer at a path. */
317 /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
318 the path. These delimiting keys are stored at least one level above that buffer in the tree. If the
319 buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
320 this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
321 static inline int key_in_buffer (
322 struct path * p_s_chk_path, /* Path which should be checked. */
323 const struct cpu_key * p_s_key, /* Key which should be checked. */
324 struct super_block * p_s_sb /* Super block pointer. */
327 RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
328 p_s_chk_path->path_length > MAX_HEIGHT,
329 "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
330 p_s_key, p_s_chk_path->path_length);
331 RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
332 "PAP-5060: device must not be NODEV");
334 if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
335 /* left delimiting key is bigger, that the key we look for */
336 return 0;
337 // if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
338 if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
339 /* p_s_key must be less than right delimitiing key */
340 return 0;
341 return 1;
345 inline void decrement_bcount(
346 struct buffer_head * p_s_bh
347 ) {
348 if ( p_s_bh ) {
349 if ( atomic_read (&(p_s_bh->b_count)) ) {
350 put_bh(p_s_bh) ;
351 return;
353 reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh);
358 /* Decrement b_count field of the all buffers in the path. */
359 void decrement_counters_in_path (
360 struct path * p_s_search_path
362 int n_path_offset = p_s_search_path->path_length;
364 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
365 n_path_offset > EXTENDED_MAX_HEIGHT - 1,
366 "PAP-5080: invalid path offset of %d", n_path_offset);
368 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
369 struct buffer_head * bh;
371 bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
372 decrement_bcount (bh);
374 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
378 int reiserfs_check_path(struct path *p) {
379 RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
380 "path not properly relsed") ;
381 return 0 ;
385 /* Release all buffers in the path. Restore dirty bits clean
386 ** when preparing the buffer for the log
388 ** only called from fix_nodes()
390 void pathrelse_and_restore (
391 struct super_block *s,
392 struct path * p_s_search_path
394 int n_path_offset = p_s_search_path->path_length;
396 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
397 "clm-4000: invalid path offset");
399 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
400 reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path,
401 n_path_offset));
402 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
404 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
407 /* Release all buffers in the path. */
408 void pathrelse (
409 struct path * p_s_search_path
411 int n_path_offset = p_s_search_path->path_length;
413 RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
414 "PAP-5090: invalid path offset");
416 while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )
417 brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
419 p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
424 static int is_leaf (char * buf, int blocksize, struct buffer_head * bh)
426 struct block_head * blkh;
427 struct item_head * ih;
428 int used_space;
429 int prev_location;
430 int i;
431 int nr;
433 blkh = (struct block_head *)buf;
434 if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
435 reiserfs_warning (NULL, "is_leaf: this should be caught earlier");
436 return 0;
439 nr = blkh_nr_item(blkh);
440 if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
441 /* item number is too big or too small */
442 reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh);
443 return 0;
445 ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
446 used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih));
447 if (used_space != blocksize - blkh_free_space(blkh)) {
448 /* free space does not match to calculated amount of use space */
449 reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh);
450 return 0;
453 // FIXME: it is_leaf will hit performance too much - we may have
454 // return 1 here
456 /* check tables of item heads */
457 ih = (struct item_head *)(buf + BLKH_SIZE);
458 prev_location = blocksize;
459 for (i = 0; i < nr; i ++, ih ++) {
460 if ( le_ih_k_type(ih) == TYPE_ANY) {
461 reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih);
462 return 0;
464 if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) {
465 reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih);
466 return 0;
468 if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) {
469 reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih);
470 return 0;
472 if (prev_location - ih_location (ih) != ih_item_len (ih)) {
473 reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih);
474 return 0;
476 prev_location = ih_location (ih);
479 // one may imagine much more checks
480 return 1;
484 /* returns 1 if buf looks like an internal node, 0 otherwise */
485 static int is_internal (char * buf, int blocksize, struct buffer_head * bh)
487 struct block_head * blkh;
488 int nr;
489 int used_space;
491 blkh = (struct block_head *)buf;
492 nr = blkh_level(blkh);
493 if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
494 /* this level is not possible for internal nodes */
495 reiserfs_warning (NULL, "is_internal: this should be caught earlier");
496 return 0;
499 nr = blkh_nr_item(blkh);
500 if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
501 /* for internal which is not root we might check min number of keys */
502 reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh);
503 return 0;
506 used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
507 if (used_space != blocksize - blkh_free_space(blkh)) {
508 reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh);
509 return 0;
512 // one may imagine much more checks
513 return 1;
517 // make sure that bh contains formatted node of reiserfs tree of
518 // 'level'-th level
519 static int is_tree_node (struct buffer_head * bh, int level)
521 if (B_LEVEL (bh) != level) {
522 reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d",
523 B_LEVEL (bh), level);
524 return 0;
526 if (level == DISK_LEAF_NODE_LEVEL)
527 return is_leaf (bh->b_data, bh->b_size, bh);
529 return is_internal (bh->b_data, bh->b_size, bh);
534 #define SEARCH_BY_KEY_READA 16
536 /* The function is NOT SCHEDULE-SAFE! */
537 static void search_by_key_reada (struct super_block * s,
538 struct buffer_head **bh,
539 unsigned long *b, int num)
541 int i,j;
543 for (i = 0 ; i < num ; i++) {
544 bh[i] = sb_getblk (s, b[i]);
546 for (j = 0 ; j < i ; j++) {
548 * note, this needs attention if we are getting rid of the BKL
549 * you have to make sure the prepared bit isn't set on this buffer
551 if (!buffer_uptodate(bh[j]))
552 ll_rw_block(READA, 1, bh + j);
553 brelse(bh[j]);
557 /**************************************************************************
558 * Algorithm SearchByKey *
559 * look for item in the Disk S+Tree by its key *
560 * Input: p_s_sb - super block *
561 * p_s_key - pointer to the key to search *
562 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR *
563 * p_s_search_path - path from the root to the needed leaf *
564 **************************************************************************/
566 /* This function fills up the path from the root to the leaf as it
567 descends the tree looking for the key. It uses reiserfs_bread to
568 try to find buffers in the cache given their block number. If it
569 does not find them in the cache it reads them from disk. For each
570 node search_by_key finds using reiserfs_bread it then uses
571 bin_search to look through that node. bin_search will find the
572 position of the block_number of the next node if it is looking
573 through an internal node. If it is looking through a leaf node
574 bin_search will find the position of the item which has key either
575 equal to given key, or which is the maximal key less than the given
576 key. search_by_key returns a path that must be checked for the
577 correctness of the top of the path but need not be checked for the
578 correctness of the bottom of the path */
579 /* The function is NOT SCHEDULE-SAFE! */
580 int search_by_key (struct super_block * p_s_sb,
581 const struct cpu_key * p_s_key, /* Key to search. */
582 struct path * p_s_search_path, /* This structure was
583 allocated and initialized
584 by the calling
585 function. It is filled up
586 by this function. */
587 int n_stop_level /* How far down the tree to search. To
588 stop at leaf level - set to
589 DISK_LEAF_NODE_LEVEL */
591 int n_block_number;
592 int expected_level;
593 struct buffer_head * p_s_bh;
594 struct path_element * p_s_last_element;
595 int n_node_level, n_retval;
596 int right_neighbor_of_leaf_node;
597 int fs_gen;
598 struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
599 unsigned long reada_blocks[SEARCH_BY_KEY_READA];
600 int reada_count = 0;
602 #ifdef CONFIG_REISERFS_CHECK
603 int n_repeat_counter = 0;
604 #endif
606 PROC_INFO_INC( p_s_sb, search_by_key );
608 /* As we add each node to a path we increase its count. This means that
609 we must be careful to release all nodes in a path before we either
610 discard the path struct or re-use the path struct, as we do here. */
612 decrement_counters_in_path(p_s_search_path);
614 right_neighbor_of_leaf_node = 0;
616 /* With each iteration of this loop we search through the items in the
617 current node, and calculate the next current node(next path element)
618 for the next iteration of this loop.. */
619 n_block_number = SB_ROOT_BLOCK (p_s_sb);
620 expected_level = -1;
621 while ( 1 ) {
623 #ifdef CONFIG_REISERFS_CHECK
624 if ( !(++n_repeat_counter % 50000) )
625 reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:"
626 "there were %d iterations of while loop "
627 "looking for key %K",
628 current->comm, n_repeat_counter, p_s_key);
629 #endif
631 /* prep path to have another element added to it. */
632 p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length);
633 fs_gen = get_generation (p_s_sb);
635 /* Read the next tree node, and set the last element in the path to
636 have a pointer to it. */
637 if ((p_s_bh = p_s_last_element->pe_buffer =
638 sb_getblk(p_s_sb, n_block_number)) ) {
639 if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
640 search_by_key_reada (p_s_sb, reada_bh,
641 reada_blocks, reada_count);
643 ll_rw_block(READ, 1, &p_s_bh);
644 wait_on_buffer(p_s_bh);
645 if (!buffer_uptodate(p_s_bh))
646 goto io_error;
647 } else {
648 io_error:
649 p_s_search_path->path_length --;
650 pathrelse(p_s_search_path);
651 return IO_ERROR;
653 reada_count = 0;
654 if (expected_level == -1)
655 expected_level = SB_TREE_HEIGHT (p_s_sb);
656 expected_level --;
658 /* It is possible that schedule occurred. We must check whether the key
659 to search is still in the tree rooted from the current buffer. If
660 not then repeat search from the root. */
661 if ( fs_changed (fs_gen, p_s_sb) &&
662 (!B_IS_IN_TREE (p_s_bh) ||
663 B_LEVEL(p_s_bh) != expected_level ||
664 !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
665 PROC_INFO_INC( p_s_sb, search_by_key_fs_changed );
666 PROC_INFO_INC( p_s_sb, search_by_key_restarted );
667 PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] );
668 decrement_counters_in_path(p_s_search_path);
670 /* Get the root block number so that we can repeat the search
671 starting from the root. */
672 n_block_number = SB_ROOT_BLOCK (p_s_sb);
673 expected_level = -1;
674 right_neighbor_of_leaf_node = 0;
676 /* repeat search from the root */
677 continue;
680 /* only check that the key is in the buffer if p_s_key is not
681 equal to the MAX_KEY. Latter case is only possible in
682 "finish_unfinished()" processing during mount. */
683 RFALSE( comp_keys( &MAX_KEY, p_s_key ) &&
684 ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
685 "PAP-5130: key is not in the buffer");
686 #ifdef CONFIG_REISERFS_CHECK
687 if ( cur_tb ) {
688 print_cur_tb ("5140");
689 reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!");
691 #endif
693 // make sure, that the node contents look like a node of
694 // certain level
695 if (!is_tree_node (p_s_bh, expected_level)) {
696 reiserfs_warning (p_s_sb, "vs-5150: search_by_key: "
697 "invalid format found in block %ld. Fsck?",
698 p_s_bh->b_blocknr);
699 pathrelse (p_s_search_path);
700 return IO_ERROR;
703 /* ok, we have acquired next formatted node in the tree */
704 n_node_level = B_LEVEL (p_s_bh);
706 PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 );
708 RFALSE( n_node_level < n_stop_level,
709 "vs-5152: tree level (%d) is less than stop level (%d)",
710 n_node_level, n_stop_level);
712 n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
713 B_NR_ITEMS(p_s_bh),
714 ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE,
715 &(p_s_last_element->pe_position));
716 if (n_node_level == n_stop_level) {
717 return n_retval;
720 /* we are not in the stop level */
721 if (n_retval == ITEM_FOUND)
722 /* item has been found, so we choose the pointer which is to the right of the found one */
723 p_s_last_element->pe_position++;
725 /* if item was not found we choose the position which is to
726 the left of the found item. This requires no code,
727 bin_search did it already.*/
729 /* So we have chosen a position in the current node which is
730 an internal node. Now we calculate child block number by
731 position in the node. */
732 n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
734 /* if we are going to read leaf nodes, try for read ahead as well */
735 if ((p_s_search_path->reada & PATH_READA) &&
736 n_node_level == DISK_LEAF_NODE_LEVEL + 1)
738 int pos = p_s_last_element->pe_position;
739 int limit = B_NR_ITEMS(p_s_bh);
740 struct reiserfs_key *le_key;
742 if (p_s_search_path->reada & PATH_READA_BACK)
743 limit = 0;
744 while(reada_count < SEARCH_BY_KEY_READA) {
745 if (pos == limit)
746 break;
747 reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
748 if (p_s_search_path->reada & PATH_READA_BACK)
749 pos--;
750 else
751 pos++;
754 * check to make sure we're in the same object
756 le_key = B_N_PDELIM_KEY(p_s_bh, pos);
757 if (le32_to_cpu(le_key->k_objectid) !=
758 p_s_key->on_disk_key.k_objectid)
760 break;
768 /* Form the path to an item and position in this item which contains
769 file byte defined by p_s_key. If there is no such item
770 corresponding to the key, we point the path to the item with
771 maximal key less than p_s_key, and *p_n_pos_in_item is set to one
772 past the last entry/byte in the item. If searching for entry in a
773 directory item, and it is not found, *p_n_pos_in_item is set to one
774 entry more than the entry with maximal key which is less than the
775 sought key.
777 Note that if there is no entry in this same node which is one more,
778 then we point to an imaginary entry. for direct items, the
779 position is in units of bytes, for indirect items the position is
780 in units of blocknr entries, for directory items the position is in
781 units of directory entries. */
783 /* The function is NOT SCHEDULE-SAFE! */
784 int search_for_position_by_key (struct super_block * p_s_sb, /* Pointer to the super block. */
785 const struct cpu_key * p_cpu_key, /* Key to search (cpu variable) */
786 struct path * p_s_search_path /* Filled up by this function. */
788 struct item_head * p_le_ih; /* pointer to on-disk structure */
789 int n_blk_size;
790 loff_t item_offset, offset;
791 struct reiserfs_dir_entry de;
792 int retval;
794 /* If searching for directory entry. */
795 if ( is_direntry_cpu_key (p_cpu_key) )
796 return search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de);
798 /* If not searching for directory entry. */
800 /* If item is found. */
801 retval = search_item (p_s_sb, p_cpu_key, p_s_search_path);
802 if (retval == IO_ERROR)
803 return retval;
804 if ( retval == ITEM_FOUND ) {
806 RFALSE( ! ih_item_len(
807 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
808 PATH_LAST_POSITION(p_s_search_path))),
809 "PAP-5165: item length equals zero");
811 pos_in_item(p_s_search_path) = 0;
812 return POSITION_FOUND;
815 RFALSE( ! PATH_LAST_POSITION(p_s_search_path),
816 "PAP-5170: position equals zero");
818 /* Item is not found. Set path to the previous item. */
819 p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path));
820 n_blk_size = p_s_sb->s_blocksize;
822 if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) {
823 return FILE_NOT_FOUND;
826 // FIXME: quite ugly this far
828 item_offset = le_ih_k_offset (p_le_ih);
829 offset = cpu_key_k_offset (p_cpu_key);
831 /* Needed byte is contained in the item pointed to by the path.*/
832 if (item_offset <= offset &&
833 item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) {
834 pos_in_item (p_s_search_path) = offset - item_offset;
835 if ( is_indirect_le_ih(p_le_ih) ) {
836 pos_in_item (p_s_search_path) /= n_blk_size;
838 return POSITION_FOUND;
841 /* Needed byte is not contained in the item pointed to by the
842 path. Set pos_in_item out of the item. */
843 if ( is_indirect_le_ih (p_le_ih) )
844 pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE;
845 else
846 pos_in_item (p_s_search_path) = ih_item_len( p_le_ih );
848 return POSITION_NOT_FOUND;
852 /* Compare given item and item pointed to by the path. */
853 int comp_items (const struct item_head * stored_ih, const struct path * p_s_path)
855 struct buffer_head * p_s_bh;
856 struct item_head * ih;
858 /* Last buffer at the path is not in the tree. */
859 if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) )
860 return 1;
862 /* Last path position is invalid. */
863 if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) )
864 return 1;
866 /* we need only to know, whether it is the same item */
867 ih = get_ih (p_s_path);
868 return memcmp (stored_ih, ih, IH_SIZE);
872 /* unformatted nodes are not logged anymore, ever. This is safe
873 ** now
875 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
877 // block can not be forgotten as it is in I/O or held by someone
878 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
882 // prepare for delete or cut of direct item
883 static inline int prepare_for_direct_item (struct path * path,
884 struct item_head * le_ih,
885 struct inode * inode,
886 loff_t new_file_length,
887 int * cut_size)
889 loff_t round_len;
892 if ( new_file_length == max_reiserfs_offset (inode) ) {
893 /* item has to be deleted */
894 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
895 return M_DELETE;
898 // new file gets truncated
899 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) {
901 round_len = ROUND_UP (new_file_length);
902 /* this was n_new_file_length < le_ih ... */
903 if ( round_len < le_ih_k_offset (le_ih) ) {
904 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
905 return M_DELETE; /* Delete this item. */
907 /* Calculate first position and size for cutting from item. */
908 pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1);
909 *cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
911 return M_CUT; /* Cut from this item. */
915 // old file: items may have any length
917 if ( new_file_length < le_ih_k_offset (le_ih) ) {
918 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
919 return M_DELETE; /* Delete this item. */
921 /* Calculate first position and size for cutting from item. */
922 *cut_size = -(ih_item_len(le_ih) -
923 (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
924 return M_CUT; /* Cut from this item. */
928 static inline int prepare_for_direntry_item (struct path * path,
929 struct item_head * le_ih,
930 struct inode * inode,
931 loff_t new_file_length,
932 int * cut_size)
934 if (le_ih_k_offset (le_ih) == DOT_OFFSET &&
935 new_file_length == max_reiserfs_offset (inode)) {
936 RFALSE( ih_entry_count (le_ih) != 2,
937 "PAP-5220: incorrect empty directory item (%h)", le_ih);
938 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
939 return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
942 if ( ih_entry_count (le_ih) == 1 ) {
943 /* Delete the directory item such as there is one record only
944 in this item*/
945 *cut_size = -(IH_SIZE + ih_item_len(le_ih));
946 return M_DELETE;
949 /* Cut one record from the directory item. */
950 *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path)));
951 return M_CUT;
955 /* If the path points to a directory or direct item, calculate mode and the size cut, for balance.
956 If the path points to an indirect item, remove some number of its unformatted nodes.
957 In case of file truncate calculate whether this item must be deleted/truncated or last
958 unformatted node of this item will be converted to a direct item.
959 This function returns a determination of what balance mode the calling function should employ. */
960 static char prepare_for_delete_or_cut(
961 struct reiserfs_transaction_handle *th,
962 struct inode * inode,
963 struct path * p_s_path,
964 const struct cpu_key * p_s_item_key,
965 int * p_n_removed, /* Number of unformatted nodes which were removed
966 from end of the file. */
967 int * p_n_cut_size,
968 unsigned long long n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
970 struct super_block * p_s_sb = inode->i_sb;
971 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_path);
972 struct buffer_head * p_s_bh = PATH_PLAST_BUFFER(p_s_path);
974 BUG_ON (!th->t_trans_id);
976 /* Stat_data item. */
977 if ( is_statdata_le_ih (p_le_ih) ) {
979 RFALSE( n_new_file_length != max_reiserfs_offset (inode),
980 "PAP-5210: mode must be M_DELETE");
982 *p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
983 return M_DELETE;
987 /* Directory item. */
988 if ( is_direntry_le_ih (p_le_ih) )
989 return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
991 /* Direct item. */
992 if ( is_direct_le_ih (p_le_ih) )
993 return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
996 /* Case of an indirect item. */
998 int n_unfm_number, /* Number of the item unformatted nodes. */
999 n_counter,
1000 n_blk_size;
1001 __le32 * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1002 __u32 tmp;
1003 struct item_head s_ih; /* Item header. */
1004 char c_mode; /* Returned mode of the balance. */
1005 int need_research;
1008 n_blk_size = p_s_sb->s_blocksize;
1010 /* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
1011 do {
1012 need_research = 0;
1013 p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1014 /* Copy indirect item header to a temp variable. */
1015 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1016 /* Calculate number of unformatted nodes in this item. */
1017 n_unfm_number = I_UNFM_NUM(&s_ih);
1019 RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
1020 pos_in_item (p_s_path) + 1 != n_unfm_number,
1021 "PAP-5240: invalid item %h "
1022 "n_unfm_number = %d *p_n_pos_in_item = %d",
1023 &s_ih, n_unfm_number, pos_in_item (p_s_path));
1025 /* Calculate balance mode and position in the item to remove unformatted nodes. */
1026 if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
1027 pos_in_item (p_s_path) = 0;
1028 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1029 c_mode = M_DELETE;
1031 else { /* Case of truncate. */
1032 if ( n_new_file_length < le_ih_k_offset (&s_ih) ) {
1033 pos_in_item (p_s_path) = 0;
1034 *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1035 c_mode = M_DELETE; /* Delete this item. */
1037 else {
1038 /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1039 pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;
1041 RFALSE( pos_in_item (p_s_path) > n_unfm_number,
1042 "PAP-5250: invalid position in the item");
1044 /* Either convert last unformatted node of indirect item to direct item or increase
1045 its free space. */
1046 if ( pos_in_item (p_s_path) == n_unfm_number ) {
1047 *p_n_cut_size = 0; /* Nothing to cut. */
1048 return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1050 /* Calculate size to cut. */
1051 *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE);
1053 c_mode = M_CUT; /* Cut from this indirect item. */
1057 RFALSE( n_unfm_number <= pos_in_item (p_s_path),
1058 "PAP-5260: invalid position in the indirect item");
1060 /* pointers to be cut */
1061 n_unfm_number -= pos_in_item (p_s_path);
1062 /* Set pointer to the last unformatted node pointer that is to be cut. */
1063 p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
1066 /* We go through the unformatted nodes pointers of the indirect
1067 item and look for the unformatted nodes in the cache. If we
1068 found some of them we free it, zero corresponding indirect item
1069 entry and log buffer containing that indirect item. For this we
1070 need to prepare last path element for logging. If some
1071 unformatted node has b_count > 1 we must not free this
1072 unformatted node since it is in use. */
1073 reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1074 // note: path could be changed, first line in for loop takes care
1075 // of it
1077 for (n_counter = *p_n_removed;
1078 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
1080 cond_resched();
1081 if (item_moved (&s_ih, p_s_path)) {
1082 need_research = 1 ;
1083 break;
1085 RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
1086 p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
1087 "vs-5265: pointer out of range");
1089 /* Hole, nothing to remove. */
1090 if ( ! get_block_num(p_n_unfm_pointer,0) ) {
1091 (*p_n_removed)++;
1092 continue;
1095 (*p_n_removed)++;
1097 tmp = get_block_num(p_n_unfm_pointer,0);
1098 put_block_num(p_n_unfm_pointer, 0, 0);
1099 journal_mark_dirty (th, p_s_sb, p_s_bh);
1100 reiserfs_free_block(th, inode, tmp, 1);
1101 if ( item_moved (&s_ih, p_s_path) ) {
1102 need_research = 1;
1103 break ;
1107 /* a trick. If the buffer has been logged, this
1108 ** will do nothing. If we've broken the loop without
1109 ** logging it, it will restore the buffer
1112 reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1114 /* This loop can be optimized. */
1115 } while ( (*p_n_removed < n_unfm_number || need_research) &&
1116 search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND );
1118 RFALSE( *p_n_removed < n_unfm_number,
1119 "PAP-5310: indirect item is not found");
1120 RFALSE( item_moved (&s_ih, p_s_path),
1121 "after while, comp failed, retry") ;
1123 if (c_mode == M_CUT)
1124 pos_in_item (p_s_path) *= UNFM_P_SIZE;
1125 return c_mode;
1129 /* Calculate number of bytes which will be deleted or cut during balance */
1130 static int calc_deleted_bytes_number(
1131 struct tree_balance * p_s_tb,
1132 char c_mode
1134 int n_del_size;
1135 struct item_head * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1137 if ( is_statdata_le_ih (p_le_ih) )
1138 return 0;
1140 n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1141 if ( is_direntry_le_ih (p_le_ih) ) {
1142 // return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1143 // we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1144 // empty size. ick. FIXME, is this right?
1146 return n_del_size ;
1149 if ( is_indirect_le_ih (p_le_ih) )
1150 n_del_size = (n_del_size/UNFM_P_SIZE)*
1151 (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
1152 return n_del_size;
1155 static void init_tb_struct(
1156 struct reiserfs_transaction_handle *th,
1157 struct tree_balance * p_s_tb,
1158 struct super_block * p_s_sb,
1159 struct path * p_s_path,
1160 int n_size
1163 BUG_ON (!th->t_trans_id);
1165 memset (p_s_tb,'\0',sizeof(struct tree_balance));
1166 p_s_tb->transaction_handle = th ;
1167 p_s_tb->tb_sb = p_s_sb;
1168 p_s_tb->tb_path = p_s_path;
1169 PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1170 PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1171 p_s_tb->insert_size[0] = n_size;
1176 void padd_item (char * item, int total_length, int length)
1178 int i;
1180 for (i = total_length; i > length; )
1181 item [--i] = 0;
1184 #ifdef REISERQUOTA_DEBUG
1185 char key2type(struct reiserfs_key *ih)
1187 if (is_direntry_le_key(2, ih))
1188 return 'd';
1189 if (is_direct_le_key(2, ih))
1190 return 'D';
1191 if (is_indirect_le_key(2, ih))
1192 return 'i';
1193 if (is_statdata_le_key(2, ih))
1194 return 's';
1195 return 'u';
1198 char head2type(struct item_head *ih)
1200 if (is_direntry_le_ih(ih))
1201 return 'd';
1202 if (is_direct_le_ih(ih))
1203 return 'D';
1204 if (is_indirect_le_ih(ih))
1205 return 'i';
1206 if (is_statdata_le_ih(ih))
1207 return 's';
1208 return 'u';
1210 #endif
1212 /* Delete object item. */
1213 int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
1214 struct path * p_s_path, /* Path to the deleted item. */
1215 const struct cpu_key * p_s_item_key, /* Key to search for the deleted item. */
1216 struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */
1217 struct buffer_head * p_s_un_bh) /* NULL or unformatted node pointer. */
1219 struct super_block * p_s_sb = p_s_inode->i_sb;
1220 struct tree_balance s_del_balance;
1221 struct item_head s_ih;
1222 struct item_head *q_ih;
1223 int quota_cut_bytes;
1224 int n_ret_value,
1225 n_del_size,
1226 n_removed;
1228 #ifdef CONFIG_REISERFS_CHECK
1229 char c_mode;
1230 int n_iter = 0;
1231 #endif
1233 BUG_ON (!th->t_trans_id);
1235 init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/);
1237 while ( 1 ) {
1238 n_removed = 0;
1240 #ifdef CONFIG_REISERFS_CHECK
1241 n_iter++;
1242 c_mode =
1243 #endif
1244 prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode));
1246 RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1248 copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1249 s_del_balance.insert_size[0] = n_del_size;
1251 n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1252 if ( n_ret_value != REPEAT_SEARCH )
1253 break;
1255 PROC_INFO_INC( p_s_sb, delete_item_restarted );
1257 // file system changed, repeat search
1258 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1259 if (n_ret_value == IO_ERROR)
1260 break;
1261 if (n_ret_value == FILE_NOT_FOUND) {
1262 reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: "
1263 "no items of the file %K found", p_s_item_key);
1264 break;
1266 } /* while (1) */
1268 if ( n_ret_value != CARRY_ON ) {
1269 unfix_nodes(&s_del_balance);
1270 return 0;
1273 // reiserfs_delete_item returns item length when success
1274 n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1275 q_ih = get_ih(p_s_path) ;
1276 quota_cut_bytes = ih_item_len(q_ih) ;
1278 /* hack so the quota code doesn't have to guess if the file
1279 ** has a tail. On tail insert, we allocate quota for 1 unformatted node.
1280 ** We test the offset because the tail might have been
1281 ** split into multiple items, and we only want to decrement for
1282 ** the unfm node once
1284 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1285 if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1286 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1287 } else {
1288 quota_cut_bytes = 0 ;
1292 if ( p_s_un_bh ) {
1293 int off;
1294 char *data ;
1296 /* We are in direct2indirect conversion, so move tail contents
1297 to the unformatted node */
1298 /* note, we do the copy before preparing the buffer because we
1299 ** don't care about the contents of the unformatted node yet.
1300 ** the only thing we really care about is the direct item's data
1301 ** is in the unformatted node.
1303 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1304 ** the unformatted node, which might schedule, meaning we'd have to
1305 ** loop all the way back up to the start of the while loop.
1307 ** The unformatted node must be dirtied later on. We can't be
1308 ** sure here if the entire tail has been deleted yet.
1310 ** p_s_un_bh is from the page cache (all unformatted nodes are
1311 ** from the page cache) and might be a highmem page. So, we
1312 ** can't use p_s_un_bh->b_data.
1313 ** -clm
1316 data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1317 off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1318 memcpy(data + off,
1319 B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
1320 kunmap_atomic(data, KM_USER0);
1322 /* Perform balancing after all resources have been collected at once. */
1323 do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1325 #ifdef REISERQUOTA_DEBUG
1326 reiserfs_debug (p_s_sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1327 #endif
1328 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1330 /* Return deleted body length */
1331 return n_ret_value;
1335 /* Summary Of Mechanisms For Handling Collisions Between Processes:
1337 deletion of the body of the object is performed by iput(), with the
1338 result that if multiple processes are operating on a file, the
1339 deletion of the body of the file is deferred until the last process
1340 that has an open inode performs its iput().
1342 writes and truncates are protected from collisions by use of
1343 semaphores.
1345 creates, linking, and mknod are protected from collisions with other
1346 processes by making the reiserfs_add_entry() the last step in the
1347 creation, and then rolling back all changes if there was a collision.
1348 - Hans
1352 /* this deletes item which never gets split */
1353 void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
1354 struct inode *inode,
1355 struct reiserfs_key * key)
1357 struct tree_balance tb;
1358 INITIALIZE_PATH (path);
1359 int item_len = 0;
1360 int tb_init = 0 ;
1361 struct cpu_key cpu_key;
1362 int retval;
1363 int quota_cut_bytes = 0;
1365 BUG_ON (!th->t_trans_id);
1367 le_key2cpu_key (&cpu_key, key);
1369 while (1) {
1370 retval = search_item (th->t_super, &cpu_key, &path);
1371 if (retval == IO_ERROR) {
1372 reiserfs_warning (th->t_super,
1373 "vs-5350: reiserfs_delete_solid_item: "
1374 "i/o failure occurred trying to delete %K",
1375 &cpu_key);
1376 break;
1378 if (retval != ITEM_FOUND) {
1379 pathrelse (&path);
1380 // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1381 if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \
1382 (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) )
1383 reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key);
1384 break;
1386 if (!tb_init) {
1387 tb_init = 1 ;
1388 item_len = ih_item_len( PATH_PITEM_HEAD(&path) );
1389 init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len));
1391 quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ;
1393 retval = fix_nodes (M_DELETE, &tb, NULL, NULL);
1394 if (retval == REPEAT_SEARCH) {
1395 PROC_INFO_INC( th -> t_super, delete_solid_item_restarted );
1396 continue;
1399 if (retval == CARRY_ON) {
1400 do_balance (&tb, NULL, NULL, M_DELETE);
1401 if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */
1402 #ifdef REISERQUOTA_DEBUG
1403 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key));
1404 #endif
1405 DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes);
1407 break;
1410 // IO_ERROR, NO_DISK_SPACE, etc
1411 reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: "
1412 "could not delete %K due to fix_nodes failure", &cpu_key);
1413 unfix_nodes (&tb);
1414 break;
1417 reiserfs_check_path(&path) ;
1421 int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode)
1423 int err;
1424 inode->i_size = 0;
1425 BUG_ON (!th->t_trans_id);
1427 /* for directory this deletes item containing "." and ".." */
1428 err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/);
1429 if (err)
1430 return err;
1432 #if defined( USE_INODE_GENERATION_COUNTER )
1433 if( !old_format_only ( th -> t_super ) )
1435 __le32 *inode_generation;
1437 inode_generation =
1438 &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation;
1439 *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 );
1441 /* USE_INODE_GENERATION_COUNTER */
1442 #endif
1443 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1445 return err;
1448 static void
1449 unmap_buffers(struct page *page, loff_t pos) {
1450 struct buffer_head *bh ;
1451 struct buffer_head *head ;
1452 struct buffer_head *next ;
1453 unsigned long tail_index ;
1454 unsigned long cur_index ;
1456 if (page) {
1457 if (page_has_buffers(page)) {
1458 tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
1459 cur_index = 0 ;
1460 head = page_buffers(page) ;
1461 bh = head ;
1462 do {
1463 next = bh->b_this_page ;
1465 /* we want to unmap the buffers that contain the tail, and
1466 ** all the buffers after it (since the tail must be at the
1467 ** end of the file). We don't want to unmap file data
1468 ** before the tail, since it might be dirty and waiting to
1469 ** reach disk
1471 cur_index += bh->b_size ;
1472 if (cur_index > tail_index) {
1473 reiserfs_unmap_buffer(bh) ;
1475 bh = next ;
1476 } while (bh != head) ;
1477 if ( PAGE_SIZE == bh->b_size ) {
1478 clear_page_dirty(page);
1484 static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th,
1485 struct inode * p_s_inode,
1486 struct page *page,
1487 struct path * p_s_path,
1488 const struct cpu_key * p_s_item_key,
1489 loff_t n_new_file_size,
1490 char * p_c_mode
1492 struct super_block * p_s_sb = p_s_inode->i_sb;
1493 int n_block_size = p_s_sb->s_blocksize;
1494 int cut_bytes;
1495 BUG_ON (!th->t_trans_id);
1497 if (n_new_file_size != p_s_inode->i_size)
1498 BUG ();
1500 /* the page being sent in could be NULL if there was an i/o error
1501 ** reading in the last block. The user will hit problems trying to
1502 ** read the file, but for now we just skip the indirect2direct
1504 if (atomic_read(&p_s_inode->i_count) > 1 ||
1505 !tail_has_to_be_packed (p_s_inode) ||
1506 !page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1507 // leave tail in an unformatted node
1508 *p_c_mode = M_SKIP_BALANCING;
1509 cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
1510 pathrelse(p_s_path);
1511 return cut_bytes;
1513 /* Permorm the conversion to a direct_item. */
1514 /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
1515 return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
1519 /* we did indirect_to_direct conversion. And we have inserted direct
1520 item successesfully, but there were no disk space to cut unfm
1521 pointer being converted. Therefore we have to delete inserted
1522 direct item(s) */
1523 static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path)
1525 struct cpu_key tail_key;
1526 int tail_len;
1527 int removed;
1528 BUG_ON (!th->t_trans_id);
1530 make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!!
1531 tail_key.key_length = 4;
1533 tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1534 while (tail_len) {
1535 /* look for the last byte of the tail */
1536 if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND)
1537 reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item");
1538 RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1,
1539 "vs-5616: appended bytes found");
1540 PATH_LAST_POSITION (path) --;
1542 removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/);
1543 RFALSE( removed <= 0 || removed > tail_len,
1544 "vs-5617: there was tail %d bytes, removed item length %d bytes",
1545 tail_len, removed);
1546 tail_len -= removed;
1547 set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed);
1549 reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1550 //mark_file_without_tail (inode);
1551 mark_inode_dirty (inode);
1555 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1556 int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th,
1557 struct path * p_s_path,
1558 struct cpu_key * p_s_item_key,
1559 struct inode * p_s_inode,
1560 struct page *page,
1561 loff_t n_new_file_size)
1563 struct super_block * p_s_sb = p_s_inode->i_sb;
1564 /* Every function which is going to call do_balance must first
1565 create a tree_balance structure. Then it must fill up this
1566 structure by using the init_tb_struct and fix_nodes functions.
1567 After that we can make tree balancing. */
1568 struct tree_balance s_cut_balance;
1569 struct item_head *p_le_ih;
1570 int n_cut_size = 0, /* Amount to be cut. */
1571 n_ret_value = CARRY_ON,
1572 n_removed = 0, /* Number of the removed unformatted nodes. */
1573 n_is_inode_locked = 0;
1574 char c_mode; /* Mode of the balance. */
1575 int retval2 = -1;
1576 int quota_cut_bytes;
1577 loff_t tail_pos = 0;
1579 BUG_ON (!th->t_trans_id);
1581 init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size);
1584 /* Repeat this loop until we either cut the item without needing
1585 to balance, or we fix_nodes without schedule occurring */
1586 while ( 1 ) {
1587 /* Determine the balance mode, position of the first byte to
1588 be cut, and size to be cut. In case of the indirect item
1589 free unformatted nodes which are pointed to by the cut
1590 pointers. */
1592 c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed,
1593 &n_cut_size, n_new_file_size);
1594 if ( c_mode == M_CONVERT ) {
1595 /* convert last unformatted node to direct item or leave
1596 tail in the unformatted node */
1597 RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice");
1599 n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key,
1600 n_new_file_size, &c_mode);
1601 if ( c_mode == M_SKIP_BALANCING )
1602 /* tail has been left in the unformatted node */
1603 return n_ret_value;
1605 n_is_inode_locked = 1;
1607 /* removing of last unformatted node will change value we
1608 have to return to truncate. Save it */
1609 retval2 = n_ret_value;
1610 /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/
1612 /* So, we have performed the first part of the conversion:
1613 inserting the new direct item. Now we are removing the
1614 last unformatted node pointer. Set key to search for
1615 it. */
1616 set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
1617 p_s_item_key->key_length = 4;
1618 n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
1619 tail_pos = n_new_file_size;
1620 set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
1621 if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
1622 print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
1623 reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key);
1625 continue;
1627 if (n_cut_size == 0) {
1628 pathrelse (p_s_path);
1629 return 0;
1632 s_cut_balance.insert_size[0] = n_cut_size;
1634 n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1635 if ( n_ret_value != REPEAT_SEARCH )
1636 break;
1638 PROC_INFO_INC( p_s_sb, cut_from_item_restarted );
1640 n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1641 if (n_ret_value == POSITION_FOUND)
1642 continue;
1644 reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key);
1645 unfix_nodes (&s_cut_balance);
1646 return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1647 } /* while */
1649 // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1650 if ( n_ret_value != CARRY_ON ) {
1651 if ( n_is_inode_locked ) {
1652 // FIXME: this seems to be not needed: we are always able
1653 // to cut item
1654 indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
1656 if (n_ret_value == NO_DISK_SPACE)
1657 reiserfs_warning (p_s_sb, "NO_DISK_SPACE");
1658 unfix_nodes (&s_cut_balance);
1659 return -EIO;
1662 /* go ahead and perform balancing */
1664 RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1666 /* Calculate number of bytes that need to be cut from the item. */
1667 quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0];
1668 if (retval2 == -1)
1669 n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1670 else
1671 n_ret_value = retval2;
1674 /* For direct items, we only change the quota when deleting the last
1675 ** item.
1677 p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
1678 if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1679 if (c_mode == M_DELETE &&
1680 (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
1681 // FIXME: this is to keep 3.5 happy
1682 REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1683 quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ;
1684 } else {
1685 quota_cut_bytes = 0 ;
1688 #ifdef CONFIG_REISERFS_CHECK
1689 if (n_is_inode_locked) {
1690 struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
1691 /* we are going to complete indirect2direct conversion. Make
1692 sure, that we exactly remove last unformatted node pointer
1693 of the item */
1694 if (!is_indirect_le_ih (le_ih))
1695 reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: "
1696 "item must be indirect %h", le_ih);
1698 if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1699 reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: "
1700 "completing indirect2direct conversion indirect item %h "
1701 "being deleted must be of 4 byte long", le_ih);
1703 if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1704 reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: "
1705 "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1706 le_ih, s_cut_balance.insert_size[0]);
1708 /* it would be useful to make sure, that right neighboring
1709 item is direct item of this file */
1711 #endif
1713 do_balance(&s_cut_balance, NULL, NULL, c_mode);
1714 if ( n_is_inode_locked ) {
1715 /* we've done an indirect->direct conversion. when the data block
1716 ** was freed, it was removed from the list of blocks that must
1717 ** be flushed before the transaction commits, make sure to
1718 ** unmap and invalidate it
1720 unmap_buffers(page, tail_pos);
1721 REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ;
1723 #ifdef REISERQUOTA_DEBUG
1724 reiserfs_debug (p_s_inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?');
1725 #endif
1726 DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1727 return n_ret_value;
1730 static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode)
1732 BUG_ON (!th->t_trans_id);
1733 if (inode->i_nlink)
1734 reiserfs_warning (inode->i_sb,
1735 "vs-5655: truncate_directory: link count != 0");
1737 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET);
1738 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY);
1739 reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1740 reiserfs_update_sd(th, inode) ;
1741 set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET);
1742 set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA);
1748 /* Truncate file to the new size. Note, this must be called with a transaction
1749 already started */
1750 int reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
1751 struct inode * p_s_inode, /* ->i_size contains new
1752 size */
1753 struct page *page, /* up to date for last block */
1754 int update_timestamps /* when it is called by
1755 file_release to convert
1756 the tail - no timestamps
1757 should be updated */
1759 INITIALIZE_PATH (s_search_path); /* Path to the current object item. */
1760 struct item_head * p_le_ih; /* Pointer to an item header. */
1761 struct cpu_key s_item_key; /* Key to search for a previous file item. */
1762 loff_t n_file_size, /* Old file size. */
1763 n_new_file_size;/* New file size. */
1764 int n_deleted; /* Number of deleted or truncated bytes. */
1765 int retval;
1766 int err = 0;
1768 BUG_ON (!th->t_trans_id);
1769 if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
1770 return 0;
1772 if (S_ISDIR(p_s_inode->i_mode)) {
1773 // deletion of directory - no need to update timestamps
1774 truncate_directory (th, p_s_inode);
1775 return 0;
1778 /* Get new file size. */
1779 n_new_file_size = p_s_inode->i_size;
1781 // FIXME: note, that key type is unimportant here
1782 make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3);
1784 retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path);
1785 if (retval == IO_ERROR) {
1786 reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: "
1787 "i/o failure occurred trying to truncate %K", &s_item_key);
1788 err = -EIO;
1789 goto out;
1791 if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1792 reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: "
1793 "wrong result %d of search for %K", retval, &s_item_key);
1795 err = -EIO;
1796 goto out;
1799 s_search_path.pos_in_item --;
1801 /* Get real file size (total length of all file items) */
1802 p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1803 if ( is_statdata_le_ih (p_le_ih) )
1804 n_file_size = 0;
1805 else {
1806 loff_t offset = le_ih_k_offset (p_le_ih);
1807 int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);
1809 /* this may mismatch with real file size: if last direct item
1810 had no padding zeros and last unformatted node had no free
1811 space, this file would have this file size */
1812 n_file_size = offset + bytes - 1;
1815 * are we doing a full truncate or delete, if so
1816 * kick in the reada code
1818 if (n_new_file_size == 0)
1819 s_search_path.reada = PATH_READA | PATH_READA_BACK;
1821 if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
1822 goto update_and_out ;
1825 /* Update key to search for the last file item. */
1826 set_cpu_key_k_offset (&s_item_key, n_file_size);
1828 do {
1829 /* Cut or delete file item. */
1830 n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode, page, n_new_file_size);
1831 if (n_deleted < 0) {
1832 reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1833 reiserfs_check_path(&s_search_path) ;
1834 return 0;
1837 RFALSE( n_deleted > n_file_size,
1838 "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1839 n_deleted, n_file_size, &s_item_key);
1841 /* Change key to search the last file item. */
1842 n_file_size -= n_deleted;
1844 set_cpu_key_k_offset (&s_item_key, n_file_size);
1846 /* While there are bytes to truncate and previous file item is presented in the tree. */
1849 ** This loop could take a really long time, and could log
1850 ** many more blocks than a transaction can hold. So, we do a polite
1851 ** journal end here, and if the transaction needs ending, we make
1852 ** sure the file is consistent before ending the current trans
1853 ** and starting a new one
1855 if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1856 int orig_len_alloc = th->t_blocks_allocated ;
1857 decrement_counters_in_path(&s_search_path) ;
1859 if (update_timestamps) {
1860 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1862 reiserfs_update_sd(th, p_s_inode) ;
1864 err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
1865 if (err)
1866 goto out;
1867 err = journal_begin (th, p_s_inode->i_sb,
1868 JOURNAL_PER_BALANCE_CNT * 6);
1869 if (err)
1870 goto out;
1871 reiserfs_update_inode_transaction(p_s_inode) ;
1873 } while ( n_file_size > ROUND_UP (n_new_file_size) &&
1874 search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ;
1876 RFALSE( n_file_size > ROUND_UP (n_new_file_size),
1877 "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1878 n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1880 update_and_out:
1881 if (update_timestamps) {
1882 // this is truncate, not file closing
1883 p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1885 reiserfs_update_sd (th, p_s_inode);
1887 out:
1888 pathrelse(&s_search_path) ;
1889 return err;
1893 #ifdef CONFIG_REISERFS_CHECK
1894 // this makes sure, that we __append__, not overwrite or add holes
1895 static void check_research_for_paste (struct path * path,
1896 const struct cpu_key * p_s_key)
1898 struct item_head * found_ih = get_ih (path);
1900 if (is_direct_le_ih (found_ih)) {
1901 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) !=
1902 cpu_key_k_offset (p_s_key) ||
1903 op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path))
1904 reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: "
1905 "found direct item %h or position (%d) does not match to key %K",
1906 found_ih, pos_in_item (path), p_s_key);
1908 if (is_indirect_le_ih (found_ih)) {
1909 if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) ||
1910 I_UNFM_NUM (found_ih) != pos_in_item (path) ||
1911 get_ih_free_space (found_ih) != 0)
1912 reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: "
1913 "found indirect item (%h) or position (%d) does not match to key (%K)",
1914 found_ih, pos_in_item (path), p_s_key);
1917 #endif /* config reiserfs check */
1920 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1921 int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
1922 struct path * p_s_search_path, /* Path to the pasted item. */
1923 const struct cpu_key * p_s_key, /* Key to search for the needed item.*/
1924 struct inode * inode, /* Inode item belongs to */
1925 const char * p_c_body, /* Pointer to the bytes to paste. */
1926 int n_pasted_size) /* Size of pasted bytes. */
1928 struct tree_balance s_paste_balance;
1929 int retval;
1930 int fs_gen;
1932 BUG_ON (!th->t_trans_id);
1934 fs_gen = get_generation(inode->i_sb) ;
1936 #ifdef REISERQUOTA_DEBUG
1937 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
1938 #endif
1940 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1941 pathrelse(p_s_search_path);
1942 return -EDQUOT;
1944 init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
1945 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1946 s_paste_balance.key = p_s_key->on_disk_key;
1947 #endif
1949 /* DQUOT_* can schedule, must check before the fix_nodes */
1950 if (fs_changed(fs_gen, inode->i_sb)) {
1951 goto search_again;
1954 while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
1955 REPEAT_SEARCH ) {
1956 search_again:
1957 /* file system changed while we were in the fix_nodes */
1958 PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
1959 retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
1960 if (retval == IO_ERROR) {
1961 retval = -EIO ;
1962 goto error_out ;
1964 if (retval == POSITION_FOUND) {
1965 reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key);
1966 retval = -EEXIST ;
1967 goto error_out ;
1970 #ifdef CONFIG_REISERFS_CHECK
1971 check_research_for_paste (p_s_search_path, p_s_key);
1972 #endif
1975 /* Perform balancing after all resources are collected by fix_nodes, and
1976 accessing them will not risk triggering schedule. */
1977 if ( retval == CARRY_ON ) {
1978 do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE);
1979 return 0;
1981 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
1982 error_out:
1983 /* this also releases the path */
1984 unfix_nodes(&s_paste_balance);
1985 #ifdef REISERQUOTA_DEBUG
1986 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
1987 #endif
1988 DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1989 return retval ;
1993 /* Insert new item into the buffer at the path. */
1994 int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
1995 struct path * p_s_path, /* Path to the inserteded item. */
1996 const struct cpu_key * key,
1997 struct item_head * p_s_ih, /* Pointer to the item header to insert.*/
1998 struct inode * inode,
1999 const char * p_c_body) /* Pointer to the bytes to insert. */
2001 struct tree_balance s_ins_balance;
2002 int retval;
2003 int fs_gen = 0 ;
2004 int quota_bytes = 0 ;
2006 BUG_ON (!th->t_trans_id);
2008 if (inode) { /* Do we count quotas for item? */
2009 fs_gen = get_generation(inode->i_sb);
2010 quota_bytes = ih_item_len(p_s_ih);
2012 /* hack so the quota code doesn't have to guess if the file has
2013 ** a tail, links are always tails, so there's no guessing needed
2015 if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2016 quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
2018 #ifdef REISERQUOTA_DEBUG
2019 reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2020 #endif
2021 /* We can't dirty inode here. It would be immediately written but
2022 * appropriate stat item isn't inserted yet... */
2023 if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2024 pathrelse(p_s_path);
2025 return -EDQUOT;
2028 init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih));
2029 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2030 s_ins_balance.key = key->on_disk_key;
2031 #endif
2032 /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2033 if (inode && fs_changed(fs_gen, inode->i_sb)) {
2034 goto search_again;
2037 while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
2038 search_again:
2039 /* file system changed while we were in the fix_nodes */
2040 PROC_INFO_INC( th -> t_super, insert_item_restarted );
2041 retval = search_item (th->t_super, key, p_s_path);
2042 if (retval == IO_ERROR) {
2043 retval = -EIO;
2044 goto error_out ;
2046 if (retval == ITEM_FOUND) {
2047 reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: "
2048 "key %K already exists in the tree", key);
2049 retval = -EEXIST ;
2050 goto error_out;
2054 /* make balancing after all resources will be collected at a time */
2055 if ( retval == CARRY_ON ) {
2056 do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2057 return 0;
2060 retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2061 error_out:
2062 /* also releases the path */
2063 unfix_nodes(&s_ins_balance);
2064 #ifdef REISERQUOTA_DEBUG
2065 reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2066 #endif
2067 if (inode)
2068 DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ;
2069 return retval;