Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux/fpc-iii.git] / fs / reiserfs / do_balan.c
blob9a3c68cf6026ae0405ef91c07f3ec9ef02854f1a
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
11 /**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
16 **/
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include "reiserfs.h"
21 #include <linux/buffer_head.h>
22 #include <linux/kernel.h>
24 static inline void buffer_info_init_left(struct tree_balance *tb,
25 struct buffer_info *bi)
27 bi->tb = tb;
28 bi->bi_bh = tb->L[0];
29 bi->bi_parent = tb->FL[0];
30 bi->bi_position = get_left_neighbor_position(tb, 0);
33 static inline void buffer_info_init_right(struct tree_balance *tb,
34 struct buffer_info *bi)
36 bi->tb = tb;
37 bi->bi_bh = tb->R[0];
38 bi->bi_parent = tb->FR[0];
39 bi->bi_position = get_right_neighbor_position(tb, 0);
42 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
43 struct buffer_info *bi)
45 bi->tb = tb;
46 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path);
47 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
48 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
51 static inline void buffer_info_init_bh(struct tree_balance *tb,
52 struct buffer_info *bi,
53 struct buffer_head *bh)
55 bi->tb = tb;
56 bi->bi_bh = bh;
57 bi->bi_parent = NULL;
58 bi->bi_position = 0;
61 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
62 struct buffer_head *bh, int flag)
64 journal_mark_dirty(tb->transaction_handle,
65 tb->transaction_handle->t_super, bh);
68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
71 /* summary:
72 if deleting something ( tb->insert_size[0] < 0 )
73 return(balance_leaf_when_delete()); (flag d handled here)
74 else
75 if lnum is larger than 0 we put items into the left node
76 if rnum is larger than 0 we put items into the right node
77 if snum1 is larger than 0 we put items into the new node s1
78 if snum2 is larger than 0 we put items into the new node s2
79 Note that all *num* count new items being created.
81 It would be easier to read balance_leaf() if each of these summary
82 lines was a separate procedure rather than being inlined. I think
83 that there are many passages here and in balance_leaf_when_delete() in
84 which two calls to one procedure can replace two passages, and it
85 might save cache space and improve software maintenance costs to do so.
87 Vladimir made the perceptive comment that we should offload most of
88 the decision making in this function into fix_nodes/check_balance, and
89 then create some sort of structure in tb that says what actions should
90 be performed by do_balance.
92 -Hans */
94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
96 * lnum, rnum can have values >= -1
97 * -1 means that the neighbor must be joined with S
98 * 0 means that nothing should be done with the neighbor
99 * >0 means to shift entirely or partly the specified number of items to the neighbor
101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104 int item_pos = PATH_LAST_POSITION(tb->tb_path);
105 int pos_in_item = tb->tb_path->pos_in_item;
106 struct buffer_info bi;
107 int n;
108 struct item_head *ih;
110 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
111 "vs- 12000: level: wrong FR %z", tb->FR[0]);
112 RFALSE(tb->blknum[0] > 1,
113 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115 "PAP-12010: tree can not be empty");
117 ih = B_N_PITEM_HEAD(tbS0, item_pos);
118 buffer_info_init_tbS0(tb, &bi);
120 /* Delete or truncate the item */
122 switch (flag) {
123 case M_DELETE: /* delete item in S[0] */
125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127 -tb->insert_size[0], ih);
129 leaf_delete_items(&bi, 0, item_pos, 1, -1);
131 if (!item_pos && tb->CFL[0]) {
132 if (B_NR_ITEMS(tbS0)) {
133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
135 } else {
136 if (!PATH_H_POSITION(tb->tb_path, 1))
137 replace_key(tb, tb->CFL[0], tb->lkey[0],
138 PATH_H_PPARENT(tb->tb_path,
139 0), 0);
143 RFALSE(!item_pos && !tb->CFL[0],
144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
145 tb->L[0]);
147 break;
149 case M_CUT:{ /* cut item in S[0] */
150 if (is_direntry_le_ih(ih)) {
152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
154 tb->insert_size[0] = -1;
155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156 -tb->insert_size[0]);
158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
159 "PAP-12030: can not change delimiting key. CFL[0]=%p",
160 tb->CFL[0]);
162 if (!item_pos && !pos_in_item && tb->CFL[0]) {
163 replace_key(tb, tb->CFL[0], tb->lkey[0],
164 tbS0, 0);
166 } else {
167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
168 -tb->insert_size[0]);
170 RFALSE(!ih_item_len(ih),
171 "PAP-12035: cut must leave non-zero dynamic length of item");
173 break;
176 default:
177 print_cur_tb("12040");
178 reiserfs_panic(tb->tb_sb, "PAP-12040",
179 "unexpected mode: %s(%d)",
180 (flag ==
181 M_PASTE) ? "PASTE" : ((flag ==
182 M_INSERT) ? "INSERT" :
183 "UNKNOWN"), flag);
186 /* the rule is that no shifting occurs unless by shifting a node can be freed */
187 n = B_NR_ITEMS(tbS0);
188 if (tb->lnum[0]) { /* L[0] takes part in balancing */
189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192 /* all contents of all the 3 buffers will be in L[0] */
193 if (PATH_H_POSITION(tb->tb_path, 1) == 0
194 && 1 < B_NR_ITEMS(tb->FR[0]))
195 replace_key(tb, tb->CFL[0],
196 tb->lkey[0],
197 tb->FR[0], 1);
199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
200 -1, NULL);
201 leaf_move_items(LEAF_FROM_R_TO_L, tb,
202 B_NR_ITEMS(tb->R[0]),
203 -1, NULL);
205 reiserfs_invalidate_buffer(tb, tbS0);
206 reiserfs_invalidate_buffer(tb,
207 tb->R[0]);
209 return 0;
211 /* all contents of all the 3 buffers will be in R[0] */
212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213 NULL);
214 leaf_move_items(LEAF_FROM_L_TO_R, tb,
215 B_NR_ITEMS(tb->L[0]), -1, NULL);
217 /* right_delimiting_key is correct in R[0] */
218 replace_key(tb, tb->CFR[0], tb->rkey[0],
219 tb->R[0], 0);
221 reiserfs_invalidate_buffer(tb, tbS0);
222 reiserfs_invalidate_buffer(tb, tb->L[0]);
224 return -1;
227 RFALSE(tb->rnum[0] != 0,
228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
229 /* all contents of L[0] and S[0] will be in L[0] */
230 leaf_shift_left(tb, n, -1);
232 reiserfs_invalidate_buffer(tb, tbS0);
234 return 0;
236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239 (tb->lnum[0] + tb->rnum[0] > n + 1),
240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241 tb->rnum[0], tb->lnum[0], n);
242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243 (tb->lbytes != -1 || tb->rbytes != -1),
244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245 tb->rbytes, tb->lbytes);
246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247 (tb->lbytes < 1 || tb->rbytes != -1),
248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249 tb->rbytes, tb->lbytes);
251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
254 reiserfs_invalidate_buffer(tb, tbS0);
256 return 0;
259 if (tb->rnum[0] == -1) {
260 /* all contents of R[0] and S[0] will be in R[0] */
261 leaf_shift_right(tb, n, -1);
262 reiserfs_invalidate_buffer(tb, tbS0);
263 return 0;
266 RFALSE(tb->rnum[0],
267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
268 return 0;
271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
272 const char *body, /* body of inserted item or bytes to paste */
273 int flag, /* i - insert, d - delete, c - cut, p - paste
274 (see comment to do_balance) */
275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
276 must be inserted into the next higher level. This insertion
277 consists of a key or two keys and their corresponding
278 pointers */
279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
284 of the affected item */
285 struct buffer_info bi;
286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
287 int snum[2]; /* number of items that will be placed
288 into S_new (includes partially shifted
289 items) */
290 int sbytes[2]; /* if an item is partially shifted into S_new then
291 if it is a directory item
292 it is the number of entries from the item that are shifted into S_new
293 else
294 it is the number of bytes from the item that are shifted into S_new
296 int n, i;
297 int ret_val;
298 int pos_in_item;
299 int zeros_num;
301 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
303 /* Make balance in case insert_size[0] < 0 */
304 if (tb->insert_size[0] < 0)
305 return balance_leaf_when_delete(tb, flag);
307 zeros_num = 0;
308 if (flag == M_INSERT && !body)
309 zeros_num = ih_item_len(ih);
311 pos_in_item = tb->tb_path->pos_in_item;
312 /* for indirect item pos_in_item is measured in unformatted node
313 pointers. Recalculate to bytes */
314 if (flag != M_INSERT
315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
316 pos_in_item *= UNFM_P_SIZE;
318 if (tb->lnum[0] > 0) {
319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320 if (item_pos < tb->lnum[0]) {
321 /* new item or it part falls to L[0], shift it too */
322 n = B_NR_ITEMS(tb->L[0]);
324 switch (flag) {
325 case M_INSERT: /* insert item into L[0] */
327 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
328 /* part of new item falls into L[0] */
329 int new_item_len;
330 int version;
332 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
334 /* Calculate item length to insert to S[0] */
335 new_item_len = ih_item_len(ih) - tb->lbytes;
336 /* Calculate and check item length to insert to L[0] */
337 put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
339 RFALSE(ih_item_len(ih) <= 0,
340 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
341 ih_item_len(ih));
343 /* Insert new item into L[0] */
344 buffer_info_init_left(tb, &bi);
345 leaf_insert_into_buf(&bi,
346 n + item_pos - ret_val, ih, body,
347 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
349 version = ih_version(ih);
351 /* Calculate key component, item length and body to insert into S[0] */
352 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
353 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
355 put_ih_item_len(ih, new_item_len);
356 if (tb->lbytes > zeros_num) {
357 body += (tb->lbytes - zeros_num);
358 zeros_num = 0;
359 } else
360 zeros_num -= tb->lbytes;
362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 ih_item_len(ih));
365 } else {
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
368 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
369 /* Insert new item into L[0] */
370 buffer_info_init_left(tb, &bi);
371 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
372 tb->insert_size[0] = 0;
373 zeros_num = 0;
375 break;
377 case M_PASTE: /* append item in L[0] */
379 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
380 /* we must shift the part of the appended item */
381 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
383 RFALSE(zeros_num,
384 "PAP-12090: invalid parameter in case of a directory");
385 /* directory item */
386 if (tb->lbytes > pos_in_item) {
387 /* new directory entry falls into L[0] */
388 struct item_head *pasted;
389 int l_pos_in_item = pos_in_item;
391 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
392 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
393 if (ret_val && !item_pos) {
394 pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
395 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
398 /* Append given directory entry to directory item */
399 buffer_info_init_left(tb, &bi);
400 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
402 /* previous string prepared space for pasting new entry, following string pastes this entry */
404 /* when we have merge directory item, pos_in_item has been changed too */
406 /* paste new directory entry. 1 is entry number */
407 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
408 1, (struct reiserfs_de_head *) body,
409 body + DEH_SIZE, tb->insert_size[0]);
410 tb->insert_size[0] = 0;
411 } else {
412 /* new directory item doesn't fall into L[0] */
413 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
414 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
416 /* Calculate new position to append in item body */
417 pos_in_item -= tb->lbytes;
418 } else {
419 /* regular object */
420 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
421 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
422 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
423 ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
425 if (tb->lbytes >= pos_in_item) {
426 /* appended item will be in L[0] in whole */
427 int l_n;
429 /* this bytes number must be appended to the last item of L[h] */
430 l_n = tb->lbytes - pos_in_item;
432 /* Calculate new insert_size[0] */
433 tb->insert_size[0] -= l_n;
435 RFALSE(tb->insert_size[0] <= 0,
436 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
437 tb->insert_size[0]);
438 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
439 (B_N_PITEM_HEAD(tbS0, item_pos)));
440 /* Append to body of item in L[0] */
441 buffer_info_init_left(tb, &bi);
442 leaf_paste_in_buffer
443 (&bi, n + item_pos - ret_val, ih_item_len
444 (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
445 l_n, body,
446 zeros_num > l_n ? l_n : zeros_num);
447 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
449 int version;
450 int temp_l = l_n;
452 RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
453 "PAP-12106: item length must be 0");
454 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
455 (tb->L[0], n + item_pos - ret_val)),
456 "PAP-12107: items must be of the same file");
457 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
458 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
460 /* update key of first item in S0 */
461 version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
462 set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
463 le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
464 /* update left delimiting key */
465 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
466 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
469 /* Calculate new body, position in item and insert_size[0] */
470 if (l_n > zeros_num) {
471 body += (l_n - zeros_num);
472 zeros_num = 0;
473 } else
474 zeros_num -= l_n;
475 pos_in_item = 0;
477 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
478 || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
479 || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
480 "PAP-12120: item must be merge-able with left neighboring item");
481 } else { /* only part of the appended item will be in L[0] */
483 /* Calculate position in item for append in S[0] */
484 pos_in_item -= tb->lbytes;
486 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
488 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
489 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
492 } else { /* appended item will be in L[0] in whole */
494 struct item_head *pasted;
496 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
497 /* then increment pos_in_item by the size of the last item in L[0] */
498 pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
499 if (is_direntry_le_ih(pasted))
500 pos_in_item += ih_entry_count(pasted);
501 else
502 pos_in_item += ih_item_len(pasted);
505 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
506 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
507 /* Append to body of item in L[0] */
508 buffer_info_init_left(tb, &bi);
509 leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
510 pos_in_item,
511 tb->insert_size[0],
512 body, zeros_num);
514 /* if appended item is directory, paste entry */
515 pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
516 if (is_direntry_le_ih(pasted))
517 leaf_paste_entries(&bi, n + item_pos - ret_val,
518 pos_in_item, 1,
519 (struct reiserfs_de_head *) body,
520 body + DEH_SIZE,
521 tb->insert_size[0]);
522 /* if appended item is indirect item, put unformatted node into un list */
523 if (is_indirect_le_ih(pasted))
524 set_ih_free_space(pasted, 0);
525 tb->insert_size[0] = 0;
526 zeros_num = 0;
528 break;
529 default: /* cases d and t */
530 reiserfs_panic(tb->tb_sb, "PAP-12130",
531 "lnum > 0: unexpected mode: "
532 " %s(%d)",
533 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
535 } else {
536 /* new item doesn't fall into L[0] */
537 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
541 /* tb->lnum[0] > 0 */
542 /* Calculate new item position */
543 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
545 if (tb->rnum[0] > 0) {
546 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
547 n = B_NR_ITEMS(tbS0);
548 switch (flag) {
550 case M_INSERT: /* insert item */
551 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
552 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
553 loff_t old_key_comp, old_len, r_zeros_number;
554 const char *r_body;
555 int version;
556 loff_t offset;
558 leaf_shift_right(tb, tb->rnum[0] - 1, -1);
560 version = ih_version(ih);
561 /* Remember key component and item length */
562 old_key_comp = le_ih_k_offset(ih);
563 old_len = ih_item_len(ih);
565 /* Calculate key component and item length to insert into R[0] */
566 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
567 set_le_ih_k_offset(ih, offset);
568 put_ih_item_len(ih, tb->rbytes);
569 /* Insert part of the item into R[0] */
570 buffer_info_init_right(tb, &bi);
571 if ((old_len - tb->rbytes) > zeros_num) {
572 r_zeros_number = 0;
573 r_body = body + (old_len - tb->rbytes) - zeros_num;
574 } else {
575 r_body = body;
576 r_zeros_number = zeros_num - (old_len - tb->rbytes);
577 zeros_num -= r_zeros_number;
580 leaf_insert_into_buf(&bi, 0, ih, r_body,
581 r_zeros_number);
583 /* Replace right delimiting key by first key in R[0] */
584 replace_key(tb, tb->CFR[0], tb->rkey[0],
585 tb->R[0], 0);
587 /* Calculate key component and item length to insert into S[0] */
588 set_le_ih_k_offset(ih, old_key_comp);
589 put_ih_item_len(ih, old_len - tb->rbytes);
591 tb->insert_size[0] -= tb->rbytes;
593 } else { /* whole new item falls into R[0] */
595 /* Shift rnum[0]-1 items to R[0] */
596 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
597 /* Insert new item into R[0] */
598 buffer_info_init_right(tb, &bi);
599 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
600 ih, body, zeros_num);
602 if (item_pos - n + tb->rnum[0] - 1 == 0) {
603 replace_key(tb, tb->CFR[0],
604 tb->rkey[0],
605 tb->R[0], 0);
608 zeros_num = tb->insert_size[0] = 0;
610 } else { /* new item or part of it doesn't fall into R[0] */
612 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
614 break;
616 case M_PASTE: /* append item */
618 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
619 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
620 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
621 int entry_count;
623 RFALSE(zeros_num,
624 "PAP-12145: invalid parameter in case of a directory");
625 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
626 (tbS0, item_pos));
627 if (entry_count - tb->rbytes <
628 pos_in_item)
629 /* new directory entry falls into R[0] */
631 int paste_entry_position;
633 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
634 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
635 tb->rbytes, entry_count);
636 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
637 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
638 /* Paste given directory entry to directory item */
639 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
640 buffer_info_init_right(tb, &bi);
641 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
642 /* paste entry */
643 leaf_paste_entries(&bi, 0, paste_entry_position, 1,
644 (struct reiserfs_de_head *) body,
645 body + DEH_SIZE, tb->insert_size[0]);
647 if (paste_entry_position == 0) {
648 /* change delimiting keys */
649 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
652 tb->insert_size[0] = 0;
653 pos_in_item++;
654 } else { /* new directory entry doesn't fall into R[0] */
656 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
658 } else { /* regular object */
660 int n_shift, n_rem, r_zeros_number;
661 const char *r_body;
663 /* Calculate number of bytes which must be shifted from appended item */
664 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
665 n_shift = 0;
667 RFALSE(pos_in_item != ih_item_len
668 (B_N_PITEM_HEAD(tbS0, item_pos)),
669 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
670 pos_in_item, ih_item_len
671 (B_N_PITEM_HEAD(tbS0, item_pos)));
673 leaf_shift_right(tb, tb->rnum[0], n_shift);
674 /* Calculate number of bytes which must remain in body after appending to R[0] */
675 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
676 n_rem = 0;
679 int version;
680 unsigned long temp_rem = n_rem;
682 version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
683 if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
684 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
686 set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
687 le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
688 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
689 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
691 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
692 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
693 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
695 /* Append part of body into R[0] */
696 buffer_info_init_right(tb, &bi);
697 if (n_rem > zeros_num) {
698 r_zeros_number = 0;
699 r_body = body + n_rem - zeros_num;
700 } else {
701 r_body = body;
702 r_zeros_number = zeros_num - n_rem;
703 zeros_num -= r_zeros_number;
706 leaf_paste_in_buffer(&bi, 0, n_shift,
707 tb->insert_size[0] - n_rem,
708 r_body, r_zeros_number);
710 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
711 #if 0
712 RFALSE(n_rem,
713 "PAP-12160: paste more than one unformatted node pointer");
714 #endif
715 set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
717 tb->insert_size[0] = n_rem;
718 if (!n_rem)
719 pos_in_item++;
721 } else { /* pasted item in whole falls into R[0] */
723 struct item_head *pasted;
725 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
726 /* append item in R[0] */
727 if (pos_in_item >= 0) {
728 buffer_info_init_right(tb, &bi);
729 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
730 tb->insert_size[0], body, zeros_num);
733 /* paste new entry, if item is directory item */
734 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
735 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
736 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
737 pos_in_item, 1,
738 (struct reiserfs_de_head *) body,
739 body + DEH_SIZE, tb->insert_size[0]);
740 if (!pos_in_item) {
742 RFALSE(item_pos - n + tb->rnum[0],
743 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
745 /* update delimiting keys */
746 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
750 if (is_indirect_le_ih(pasted))
751 set_ih_free_space(pasted, 0);
752 zeros_num = tb->insert_size[0] = 0;
754 } else { /* new item doesn't fall into R[0] */
756 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
758 break;
759 default: /* cases d and t */
760 reiserfs_panic(tb->tb_sb, "PAP-12175",
761 "rnum > 0: unexpected mode: %s(%d)",
762 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
767 /* tb->rnum[0] > 0 */
768 RFALSE(tb->blknum[0] > 3,
769 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
770 RFALSE(tb->blknum[0] < 0,
771 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
773 /* if while adding to a node we discover that it is possible to split
774 it in two, and merge the left part into the left neighbor and the
775 right part into the right neighbor, eliminating the node */
776 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
778 RFALSE(!tb->lnum[0] || !tb->rnum[0],
779 "PAP-12190: lnum and rnum must not be zero");
780 /* if insertion was done before 0-th position in R[0], right
781 delimiting key of the tb->L[0]'s and left delimiting key are
782 not set correctly */
783 if (tb->CFL[0]) {
784 if (!tb->CFR[0])
785 reiserfs_panic(tb->tb_sb, "vs-12195",
786 "CFR not initialized");
787 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
788 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
789 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
792 reiserfs_invalidate_buffer(tb, tbS0);
793 return 0;
796 /* Fill new nodes that appear in place of S[0] */
798 /* I am told that this copying is because we need an array to enable
799 the looping code. -Hans */
800 snum[0] = tb->s1num, snum[1] = tb->s2num;
801 sbytes[0] = tb->s1bytes;
802 sbytes[1] = tb->s2bytes;
803 for (i = tb->blknum[0] - 2; i >= 0; i--) {
805 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
806 snum[i]);
808 /* here we shift from S to S_new nodes */
810 S_new[i] = get_FEB(tb);
812 /* initialized block type and tree level */
813 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
815 n = B_NR_ITEMS(tbS0);
817 switch (flag) {
818 case M_INSERT: /* insert item */
820 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
821 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
822 int old_key_comp, old_len, r_zeros_number;
823 const char *r_body;
824 int version;
826 /* Move snum[i]-1 items from S[0] to S_new[i] */
827 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
828 snum[i] - 1, -1,
829 S_new[i]);
830 /* Remember key component and item length */
831 version = ih_version(ih);
832 old_key_comp = le_ih_k_offset(ih);
833 old_len = ih_item_len(ih);
835 /* Calculate key component and item length to insert into S_new[i] */
836 set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
837 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
839 put_ih_item_len(ih, sbytes[i]);
841 /* Insert part of the item into S_new[i] before 0-th item */
842 buffer_info_init_bh(tb, &bi, S_new[i]);
844 if ((old_len - sbytes[i]) > zeros_num) {
845 r_zeros_number = 0;
846 r_body = body + (old_len - sbytes[i]) - zeros_num;
847 } else {
848 r_body = body;
849 r_zeros_number = zeros_num - (old_len - sbytes[i]);
850 zeros_num -= r_zeros_number;
853 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
855 /* Calculate key component and item length to insert into S[i] */
856 set_le_ih_k_offset(ih, old_key_comp);
857 put_ih_item_len(ih, old_len - sbytes[i]);
858 tb->insert_size[0] -= sbytes[i];
859 } else { /* whole new item falls into S_new[i] */
861 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
862 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
863 snum[i] - 1, sbytes[i], S_new[i]);
865 /* Insert new item into S_new[i] */
866 buffer_info_init_bh(tb, &bi, S_new[i]);
867 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
868 ih, body, zeros_num);
870 zeros_num = tb->insert_size[0] = 0;
874 else { /* new item or it part don't falls into S_new[i] */
876 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
877 snum[i], sbytes[i], S_new[i]);
879 break;
881 case M_PASTE: /* append item */
883 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
884 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
885 struct item_head *aux_ih;
887 RFALSE(ih, "PAP-12210: ih must be 0");
889 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
890 if (is_direntry_le_ih(aux_ih)) {
891 /* we append to directory item */
893 int entry_count;
895 entry_count = ih_entry_count(aux_ih);
897 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
898 /* new directory entry falls into S_new[i] */
900 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
901 RFALSE(sbytes[i] - 1 >= entry_count,
902 "PAP-12220: there are no so much entries (%d), only %d",
903 sbytes[i] - 1, entry_count);
905 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
906 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
907 /* Paste given directory entry to directory item */
908 buffer_info_init_bh(tb, &bi, S_new[i]);
909 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
910 tb->insert_size[0], body, zeros_num);
911 /* paste new directory entry */
912 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
913 (struct reiserfs_de_head *) body,
914 body + DEH_SIZE, tb->insert_size[0]);
915 tb->insert_size[0] = 0;
916 pos_in_item++;
917 } else { /* new directory entry doesn't fall into S_new[i] */
918 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
920 } else { /* regular object */
922 int n_shift, n_rem, r_zeros_number;
923 const char *r_body;
925 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
926 "PAP-12225: item too short or insert_size <= 0");
928 /* Calculate number of bytes which must be shifted from appended item */
929 n_shift = sbytes[i] - tb->insert_size[0];
930 if (n_shift < 0)
931 n_shift = 0;
932 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
934 /* Calculate number of bytes which must remain in body after append to S_new[i] */
935 n_rem = tb->insert_size[0] - sbytes[i];
936 if (n_rem < 0)
937 n_rem = 0;
938 /* Append part of body into S_new[0] */
939 buffer_info_init_bh(tb, &bi, S_new[i]);
940 if (n_rem > zeros_num) {
941 r_zeros_number = 0;
942 r_body = body + n_rem - zeros_num;
943 } else {
944 r_body = body;
945 r_zeros_number = zeros_num - n_rem;
946 zeros_num -= r_zeros_number;
949 leaf_paste_in_buffer(&bi, 0, n_shift,
950 tb->insert_size[0] - n_rem,
951 r_body, r_zeros_number);
953 struct item_head *tmp;
955 tmp = B_N_PITEM_HEAD(S_new[i], 0);
956 if (is_indirect_le_ih
957 (tmp)) {
958 set_ih_free_space(tmp, 0);
959 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
960 } else {
961 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
965 tb->insert_size[0] = n_rem;
966 if (!n_rem)
967 pos_in_item++;
969 } else
970 /* item falls wholly into S_new[i] */
972 int leaf_mi;
973 struct item_head *pasted;
975 #ifdef CONFIG_REISERFS_CHECK
976 struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
978 if (!is_direntry_le_ih(ih_check)
979 && (pos_in_item != ih_item_len(ih_check)
980 || tb->insert_size[0] <= 0))
981 reiserfs_panic(tb->tb_sb,
982 "PAP-12235",
983 "pos_in_item "
984 "must be equal "
985 "to ih_item_len");
986 #endif /* CONFIG_REISERFS_CHECK */
988 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
989 tb, snum[i],
990 sbytes[i],
991 S_new[i]);
993 RFALSE(leaf_mi,
994 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
995 leaf_mi);
997 /* paste into item */
998 buffer_info_init_bh(tb, &bi, S_new[i]);
999 leaf_paste_in_buffer(&bi,
1000 item_pos - n + snum[i],
1001 pos_in_item,
1002 tb->insert_size[0],
1003 body, zeros_num);
1005 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1006 if (is_direntry_le_ih(pasted)) {
1007 leaf_paste_entries(&bi,
1008 item_pos - n + snum[i],
1009 pos_in_item, 1,
1010 (struct reiserfs_de_head *)body,
1011 body + DEH_SIZE,
1012 tb->insert_size[0]
1016 /* if we paste to indirect item update ih_free_space */
1017 if (is_indirect_le_ih(pasted))
1018 set_ih_free_space(pasted, 0);
1019 zeros_num = tb->insert_size[0] = 0;
1023 else { /* pasted item doesn't fall into S_new[i] */
1025 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1026 snum[i], sbytes[i], S_new[i]);
1028 break;
1029 default: /* cases d and t */
1030 reiserfs_panic(tb->tb_sb, "PAP-12245",
1031 "blknum > 2: unexpected mode: %s(%d)",
1032 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1035 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1036 insert_ptr[i] = S_new[i];
1038 RFALSE(!buffer_journaled(S_new[i])
1039 || buffer_journal_dirty(S_new[i])
1040 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1041 i, S_new[i]);
1044 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1045 affected item which remains in S */
1046 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1048 switch (flag) {
1049 case M_INSERT: /* insert item into S[0] */
1050 buffer_info_init_tbS0(tb, &bi);
1051 leaf_insert_into_buf(&bi, item_pos, ih, body,
1052 zeros_num);
1054 /* If we insert the first key change the delimiting key */
1055 if (item_pos == 0) {
1056 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1057 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1059 break;
1061 case M_PASTE:{ /* append item in S[0] */
1062 struct item_head *pasted;
1064 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065 /* when directory, may be new entry already pasted */
1066 if (is_direntry_le_ih(pasted)) {
1067 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1069 RFALSE(!tb->insert_size[0],
1070 "PAP-12260: insert_size is 0 already");
1072 /* prepare space */
1073 buffer_info_init_tbS0(tb, &bi);
1074 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075 tb->insert_size[0], body,
1076 zeros_num);
1078 /* paste entry */
1079 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080 (struct reiserfs_de_head *)body,
1081 body + DEH_SIZE,
1082 tb->insert_size[0]);
1083 if (!item_pos && !pos_in_item) {
1084 RFALSE(!tb->CFL[0] || !tb->L[0],
1085 "PAP-12270: CFL[0]/L[0] must be specified");
1086 if (tb->CFL[0])
1087 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1089 tb->insert_size[0] = 0;
1091 } else { /* regular object */
1092 if (pos_in_item == ih_item_len(pasted)) {
1094 RFALSE(tb->insert_size[0] <= 0,
1095 "PAP-12275: insert size must not be %d",
1096 tb->insert_size[0]);
1097 buffer_info_init_tbS0(tb, &bi);
1098 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099 tb->insert_size[0], body, zeros_num);
1101 if (is_indirect_le_ih(pasted)) {
1102 #if 0
1103 RFALSE(tb->
1104 insert_size[0] !=
1105 UNFM_P_SIZE,
1106 "PAP-12280: insert_size for indirect item must be %d, not %d",
1107 UNFM_P_SIZE,
1108 tb->
1109 insert_size[0]);
1110 #endif
1111 set_ih_free_space(pasted, 0);
1113 tb->insert_size[0] = 0;
1115 #ifdef CONFIG_REISERFS_CHECK
1116 else {
1117 if (tb->insert_size[0]) {
1118 print_cur_tb("12285");
1119 reiserfs_panic(tb->tb_sb,
1120 "PAP-12285",
1121 "insert_size "
1122 "must be 0 "
1123 "(%d)",
1124 tb->insert_size[0]);
1127 #endif /* CONFIG_REISERFS_CHECK */
1130 } /* case M_PASTE: */
1133 #ifdef CONFIG_REISERFS_CHECK
1134 if (flag == M_PASTE && tb->insert_size[0]) {
1135 print_cur_tb("12290");
1136 reiserfs_panic(tb->tb_sb,
1137 "PAP-12290", "insert_size is still not 0 (%d)",
1138 tb->insert_size[0]);
1140 #endif /* CONFIG_REISERFS_CHECK */
1141 return 0;
1142 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1144 /* Make empty node */
1145 void make_empty_node(struct buffer_info *bi)
1147 struct block_head *blkh;
1149 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1151 blkh = B_BLK_HEAD(bi->bi_bh);
1152 set_blkh_nr_item(blkh, 0);
1153 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1155 if (bi->bi_parent)
1156 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1159 /* Get first empty buffer */
1160 struct buffer_head *get_FEB(struct tree_balance *tb)
1162 int i;
1163 struct buffer_info bi;
1165 for (i = 0; i < MAX_FEB_SIZE; i++)
1166 if (tb->FEB[i] != NULL)
1167 break;
1169 if (i == MAX_FEB_SIZE)
1170 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1172 buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1173 make_empty_node(&bi);
1174 set_buffer_uptodate(tb->FEB[i]);
1175 tb->used[i] = tb->FEB[i];
1176 tb->FEB[i] = NULL;
1178 return tb->used[i];
1181 /* This is now used because reiserfs_free_block has to be able to
1182 ** schedule.
1184 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1186 int i;
1188 if (buffer_dirty(bh))
1189 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1190 "called with dirty buffer");
1191 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1192 if (!tb->thrown[i]) {
1193 tb->thrown[i] = bh;
1194 get_bh(bh); /* free_thrown puts this */
1195 return;
1197 reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1198 "too many thrown buffers");
1201 static void free_thrown(struct tree_balance *tb)
1203 int i;
1204 b_blocknr_t blocknr;
1205 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1206 if (tb->thrown[i]) {
1207 blocknr = tb->thrown[i]->b_blocknr;
1208 if (buffer_dirty(tb->thrown[i]))
1209 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1210 "called with dirty buffer %d",
1211 blocknr);
1212 brelse(tb->thrown[i]); /* incremented in store_thrown */
1213 reiserfs_free_block(tb->transaction_handle, NULL,
1214 blocknr, 0);
1219 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1221 struct block_head *blkh;
1222 blkh = B_BLK_HEAD(bh);
1223 set_blkh_level(blkh, FREE_LEVEL);
1224 set_blkh_nr_item(blkh, 0);
1226 clear_buffer_dirty(bh);
1227 store_thrown(tb, bh);
1230 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1231 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1232 struct buffer_head *src, int n_src)
1235 RFALSE(dest == NULL || src == NULL,
1236 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1237 src, dest);
1238 RFALSE(!B_IS_KEYS_LEVEL(dest),
1239 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1240 dest);
1241 RFALSE(n_dest < 0 || n_src < 0,
1242 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1243 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1244 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1245 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1247 if (B_IS_ITEMS_LEVEL(src))
1248 /* source buffer contains leaf node */
1249 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1250 KEY_SIZE);
1251 else
1252 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1253 KEY_SIZE);
1255 do_balance_mark_internal_dirty(tb, dest, 0);
1258 int get_left_neighbor_position(struct tree_balance *tb, int h)
1260 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1262 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1263 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1264 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1266 if (Sh_position == 0)
1267 return B_NR_ITEMS(tb->FL[h]);
1268 else
1269 return Sh_position - 1;
1272 int get_right_neighbor_position(struct tree_balance *tb, int h)
1274 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1276 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1277 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1278 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1280 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1281 return 0;
1282 else
1283 return Sh_position + 1;
1286 #ifdef CONFIG_REISERFS_CHECK
1288 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1289 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1290 char *mes)
1292 struct disk_child *dc;
1293 int i;
1295 RFALSE(!bh, "PAP-12336: bh == 0");
1297 if (!bh || !B_IS_IN_TREE(bh))
1298 return;
1300 RFALSE(!buffer_dirty(bh) &&
1301 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1302 "PAP-12337: buffer (%b) must be dirty", bh);
1303 dc = B_N_CHILD(bh, 0);
1305 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1306 if (!is_reusable(s, dc_block_number(dc), 1)) {
1307 print_cur_tb(mes);
1308 reiserfs_panic(s, "PAP-12338",
1309 "invalid child pointer %y in %b",
1310 dc, bh);
1315 static int locked_or_not_in_tree(struct tree_balance *tb,
1316 struct buffer_head *bh, char *which)
1318 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1319 !B_IS_IN_TREE(bh)) {
1320 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1321 return 1;
1323 return 0;
1326 static int check_before_balancing(struct tree_balance *tb)
1328 int retval = 0;
1330 if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1331 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1332 "occurred based on cur_tb not being null at "
1333 "this point in code. do_balance cannot properly "
1334 "handle concurrent tree accesses on a same "
1335 "mount point.");
1338 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1339 prepped all of these for us). */
1340 if (tb->lnum[0]) {
1341 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1343 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1344 check_leaf(tb->L[0]);
1346 if (tb->rnum[0]) {
1347 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1348 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1349 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1350 check_leaf(tb->R[0]);
1352 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1353 "S[0]");
1354 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1356 return retval;
1359 static void check_after_balance_leaf(struct tree_balance *tb)
1361 if (tb->lnum[0]) {
1362 if (B_FREE_SPACE(tb->L[0]) !=
1363 MAX_CHILD_SIZE(tb->L[0]) -
1364 dc_size(B_N_CHILD
1365 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1366 print_cur_tb("12221");
1367 reiserfs_panic(tb->tb_sb, "PAP-12355",
1368 "shift to left was incorrect");
1371 if (tb->rnum[0]) {
1372 if (B_FREE_SPACE(tb->R[0]) !=
1373 MAX_CHILD_SIZE(tb->R[0]) -
1374 dc_size(B_N_CHILD
1375 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1376 print_cur_tb("12222");
1377 reiserfs_panic(tb->tb_sb, "PAP-12360",
1378 "shift to right was incorrect");
1381 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1382 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1383 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1384 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1385 PATH_H_POSITION(tb->tb_path, 1)))))) {
1386 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1387 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1388 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1389 PATH_H_POSITION(tb->tb_path,
1390 1))));
1391 print_cur_tb("12223");
1392 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1393 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1394 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1395 left,
1396 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1397 PATH_H_PBUFFER(tb->tb_path, 1),
1398 PATH_H_POSITION(tb->tb_path, 1),
1399 dc_size(B_N_CHILD
1400 (PATH_H_PBUFFER(tb->tb_path, 1),
1401 PATH_H_POSITION(tb->tb_path, 1))),
1402 right);
1403 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1407 static void check_leaf_level(struct tree_balance *tb)
1409 check_leaf(tb->L[0]);
1410 check_leaf(tb->R[0]);
1411 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1414 static void check_internal_levels(struct tree_balance *tb)
1416 int h;
1418 /* check all internal nodes */
1419 for (h = 1; tb->insert_size[h]; h++) {
1420 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1421 "BAD BUFFER ON PATH");
1422 if (tb->lnum[h])
1423 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1424 if (tb->rnum[h])
1425 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1430 #endif
1432 /* Now we have all of the buffers that must be used in balancing of
1433 the tree. We rely on the assumption that schedule() will not occur
1434 while do_balance works. ( Only interrupt handlers are acceptable.)
1435 We balance the tree according to the analysis made before this,
1436 using buffers already obtained. For SMP support it will someday be
1437 necessary to add ordered locking of tb. */
1439 /* Some interesting rules of balancing:
1441 we delete a maximum of two nodes per level per balancing: we never
1442 delete R, when we delete two of three nodes L, S, R then we move
1443 them into R.
1445 we only delete L if we are deleting two nodes, if we delete only
1446 one node we delete S
1448 if we shift leaves then we shift as much as we can: this is a
1449 deliberate policy of extremism in node packing which results in
1450 higher average utilization after repeated random balance operations
1451 at the cost of more memory copies and more balancing as a result of
1452 small insertions to full nodes.
1454 if we shift internal nodes we try to evenly balance the node
1455 utilization, with consequent less balancing at the cost of lower
1456 utilization.
1458 one could argue that the policy for directories in leaves should be
1459 that of internal nodes, but we will wait until another day to
1460 evaluate this.... It would be nice to someday measure and prove
1461 these assumptions as to what is optimal....
1465 static inline void do_balance_starts(struct tree_balance *tb)
1467 /* use print_cur_tb() to see initial state of struct
1468 tree_balance */
1470 /* store_print_tb (tb); */
1472 /* do not delete, just comment it out */
1473 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1474 "check");*/
1475 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476 #ifdef CONFIG_REISERFS_CHECK
1477 REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1478 #endif
1481 static inline void do_balance_completed(struct tree_balance *tb)
1484 #ifdef CONFIG_REISERFS_CHECK
1485 check_leaf_level(tb);
1486 check_internal_levels(tb);
1487 REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488 #endif
1490 /* reiserfs_free_block is no longer schedule safe. So, we need to
1491 ** put the buffers we want freed on the thrown list during do_balance,
1492 ** and then free them now
1495 REISERFS_SB(tb->tb_sb)->s_do_balance++;
1497 /* release all nodes hold to perform the balancing */
1498 unfix_nodes(tb);
1500 free_thrown(tb);
1503 void do_balance(struct tree_balance *tb, /* tree_balance structure */
1504 struct item_head *ih, /* item header of inserted item */
1505 const char *body, /* body of inserted item or bytes to paste */
1506 int flag)
1507 { /* i - insert, d - delete
1508 c - cut, p - paste
1510 Cut means delete part of an item
1511 (includes removing an entry from a
1512 directory).
1514 Delete means delete whole item.
1516 Insert means add a new item into the
1517 tree.
1519 Paste means to append to the end of an
1520 existing file or to insert a directory
1521 entry. */
1522 int child_pos, /* position of a child node in its parent */
1523 h; /* level of the tree being processed */
1524 struct item_head insert_key[2]; /* in our processing of one level
1525 we sometimes determine what
1526 must be inserted into the next
1527 higher level. This insertion
1528 consists of a key or two keys
1529 and their corresponding
1530 pointers */
1531 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1532 level */
1534 tb->tb_mode = flag;
1535 tb->need_balance_dirty = 0;
1537 if (FILESYSTEM_CHANGED_TB(tb)) {
1538 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1539 "changed");
1541 /* if we have no real work to do */
1542 if (!tb->insert_size[0]) {
1543 reiserfs_warning(tb->tb_sb, "PAP-12350",
1544 "insert_size == 0, mode == %c", flag);
1545 unfix_nodes(tb);
1546 return;
1549 atomic_inc(&(fs_generation(tb->tb_sb)));
1550 do_balance_starts(tb);
1552 /* balance leaf returns 0 except if combining L R and S into
1553 one node. see balance_internal() for explanation of this
1554 line of code. */
1555 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1558 #ifdef CONFIG_REISERFS_CHECK
1559 check_after_balance_leaf(tb);
1560 #endif
1562 /* Balance internal level of the tree. */
1563 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564 child_pos =
1565 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1567 do_balance_completed(tb);