netpoll queue cleanup
[linux/fpc-iii.git] / fs / reiserfs / do_balan.c
blobfba304e64de81d51805d52434b152a6fc8aed50b
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
11 /**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
16 **/
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include <linux/reiserfs_fs.h>
21 #include <linux/buffer_head.h>
23 #ifdef CONFIG_REISERFS_CHECK
25 struct tree_balance *cur_tb = NULL; /* detects whether more than one
26 copy of tb exists as a means
27 of checking whether schedule
28 is interrupting do_balance */
29 #endif
31 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
32 struct buffer_head *bh, int flag)
34 journal_mark_dirty(tb->transaction_handle,
35 tb->transaction_handle->t_super, bh);
38 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
39 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
41 /* summary:
42 if deleting something ( tb->insert_size[0] < 0 )
43 return(balance_leaf_when_delete()); (flag d handled here)
44 else
45 if lnum is larger than 0 we put items into the left node
46 if rnum is larger than 0 we put items into the right node
47 if snum1 is larger than 0 we put items into the new node s1
48 if snum2 is larger than 0 we put items into the new node s2
49 Note that all *num* count new items being created.
51 It would be easier to read balance_leaf() if each of these summary
52 lines was a separate procedure rather than being inlined. I think
53 that there are many passages here and in balance_leaf_when_delete() in
54 which two calls to one procedure can replace two passages, and it
55 might save cache space and improve software maintenance costs to do so.
57 Vladimir made the perceptive comment that we should offload most of
58 the decision making in this function into fix_nodes/check_balance, and
59 then create some sort of structure in tb that says what actions should
60 be performed by do_balance.
62 -Hans */
64 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
66 * lnum, rnum can have values >= -1
67 * -1 means that the neighbor must be joined with S
68 * 0 means that nothing should be done with the neighbor
69 * >0 means to shift entirely or partly the specified number of items to the neighbor
71 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
73 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
74 int item_pos = PATH_LAST_POSITION(tb->tb_path);
75 int pos_in_item = tb->tb_path->pos_in_item;
76 struct buffer_info bi;
77 int n;
78 struct item_head *ih;
80 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
81 "vs- 12000: level: wrong FR %z", tb->FR[0]);
82 RFALSE(tb->blknum[0] > 1,
83 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
84 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
85 "PAP-12010: tree can not be empty");
87 ih = B_N_PITEM_HEAD(tbS0, item_pos);
89 /* Delete or truncate the item */
91 switch (flag) {
92 case M_DELETE: /* delete item in S[0] */
94 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
95 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
96 -tb->insert_size[0], ih);
98 bi.tb = tb;
99 bi.bi_bh = tbS0;
100 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
101 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
102 leaf_delete_items(&bi, 0, item_pos, 1, -1);
104 if (!item_pos && tb->CFL[0]) {
105 if (B_NR_ITEMS(tbS0)) {
106 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
108 } else {
109 if (!PATH_H_POSITION(tb->tb_path, 1))
110 replace_key(tb, tb->CFL[0], tb->lkey[0],
111 PATH_H_PPARENT(tb->tb_path,
112 0), 0);
116 RFALSE(!item_pos && !tb->CFL[0],
117 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
118 tb->L[0]);
120 break;
122 case M_CUT:{ /* cut item in S[0] */
123 bi.tb = tb;
124 bi.bi_bh = tbS0;
125 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
126 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
127 if (is_direntry_le_ih(ih)) {
129 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
130 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
131 tb->insert_size[0] = -1;
132 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
133 -tb->insert_size[0]);
135 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
136 "PAP-12030: can not change delimiting key. CFL[0]=%p",
137 tb->CFL[0]);
139 if (!item_pos && !pos_in_item && tb->CFL[0]) {
140 replace_key(tb, tb->CFL[0], tb->lkey[0],
141 tbS0, 0);
143 } else {
144 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
145 -tb->insert_size[0]);
147 RFALSE(!ih_item_len(ih),
148 "PAP-12035: cut must leave non-zero dynamic length of item");
150 break;
153 default:
154 print_cur_tb("12040");
155 reiserfs_panic(tb->tb_sb,
156 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
157 (flag ==
158 M_PASTE) ? "PASTE" : ((flag ==
159 M_INSERT) ? "INSERT" :
160 "UNKNOWN"), flag);
163 /* the rule is that no shifting occurs unless by shifting a node can be freed */
164 n = B_NR_ITEMS(tbS0);
165 if (tb->lnum[0]) { /* L[0] takes part in balancing */
166 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
167 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
168 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
169 /* all contents of all the 3 buffers will be in L[0] */
170 if (PATH_H_POSITION(tb->tb_path, 1) == 0
171 && 1 < B_NR_ITEMS(tb->FR[0]))
172 replace_key(tb, tb->CFL[0],
173 tb->lkey[0],
174 tb->FR[0], 1);
176 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
177 -1, NULL);
178 leaf_move_items(LEAF_FROM_R_TO_L, tb,
179 B_NR_ITEMS(tb->R[0]),
180 -1, NULL);
182 reiserfs_invalidate_buffer(tb, tbS0);
183 reiserfs_invalidate_buffer(tb,
184 tb->R[0]);
186 return 0;
188 /* all contents of all the 3 buffers will be in R[0] */
189 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
190 NULL);
191 leaf_move_items(LEAF_FROM_L_TO_R, tb,
192 B_NR_ITEMS(tb->L[0]), -1, NULL);
194 /* right_delimiting_key is correct in R[0] */
195 replace_key(tb, tb->CFR[0], tb->rkey[0],
196 tb->R[0], 0);
198 reiserfs_invalidate_buffer(tb, tbS0);
199 reiserfs_invalidate_buffer(tb, tb->L[0]);
201 return -1;
204 RFALSE(tb->rnum[0] != 0,
205 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
206 /* all contents of L[0] and S[0] will be in L[0] */
207 leaf_shift_left(tb, n, -1);
209 reiserfs_invalidate_buffer(tb, tbS0);
211 return 0;
213 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
215 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
216 (tb->lnum[0] + tb->rnum[0] > n + 1),
217 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
218 tb->rnum[0], tb->lnum[0], n);
219 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
220 (tb->lbytes != -1 || tb->rbytes != -1),
221 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
222 tb->rbytes, tb->lbytes);
223 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
224 (tb->lbytes < 1 || tb->rbytes != -1),
225 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
226 tb->rbytes, tb->lbytes);
228 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
229 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
231 reiserfs_invalidate_buffer(tb, tbS0);
233 return 0;
236 if (tb->rnum[0] == -1) {
237 /* all contents of R[0] and S[0] will be in R[0] */
238 leaf_shift_right(tb, n, -1);
239 reiserfs_invalidate_buffer(tb, tbS0);
240 return 0;
243 RFALSE(tb->rnum[0],
244 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
245 return 0;
248 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
249 const char *body, /* body of inserted item or bytes to paste */
250 int flag, /* i - insert, d - delete, c - cut, p - paste
251 (see comment to do_balance) */
252 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
253 must be inserted into the next higher level. This insertion
254 consists of a key or two keys and their corresponding
255 pointers */
256 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
259 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
260 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
261 of the affected item */
262 struct buffer_info bi;
263 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
264 int snum[2]; /* number of items that will be placed
265 into S_new (includes partially shifted
266 items) */
267 int sbytes[2]; /* if an item is partially shifted into S_new then
268 if it is a directory item
269 it is the number of entries from the item that are shifted into S_new
270 else
271 it is the number of bytes from the item that are shifted into S_new
273 int n, i;
274 int ret_val;
275 int pos_in_item;
276 int zeros_num;
278 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
280 /* Make balance in case insert_size[0] < 0 */
281 if (tb->insert_size[0] < 0)
282 return balance_leaf_when_delete(tb, flag);
284 zeros_num = 0;
285 if (flag == M_INSERT && body == 0)
286 zeros_num = ih_item_len(ih);
288 pos_in_item = tb->tb_path->pos_in_item;
289 /* for indirect item pos_in_item is measured in unformatted node
290 pointers. Recalculate to bytes */
291 if (flag != M_INSERT
292 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
293 pos_in_item *= UNFM_P_SIZE;
295 if (tb->lnum[0] > 0) {
296 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
297 if (item_pos < tb->lnum[0]) {
298 /* new item or it part falls to L[0], shift it too */
299 n = B_NR_ITEMS(tb->L[0]);
301 switch (flag) {
302 case M_INSERT: /* insert item into L[0] */
304 if (item_pos == tb->lnum[0] - 1
305 && tb->lbytes != -1) {
306 /* part of new item falls into L[0] */
307 int new_item_len;
308 int version;
310 ret_val =
311 leaf_shift_left(tb, tb->lnum[0] - 1,
312 -1);
314 /* Calculate item length to insert to S[0] */
315 new_item_len =
316 ih_item_len(ih) - tb->lbytes;
317 /* Calculate and check item length to insert to L[0] */
318 put_ih_item_len(ih,
319 ih_item_len(ih) -
320 new_item_len);
322 RFALSE(ih_item_len(ih) <= 0,
323 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
324 ih_item_len(ih));
326 /* Insert new item into L[0] */
327 bi.tb = tb;
328 bi.bi_bh = tb->L[0];
329 bi.bi_parent = tb->FL[0];
330 bi.bi_position =
331 get_left_neighbor_position(tb, 0);
332 leaf_insert_into_buf(&bi,
333 n + item_pos -
334 ret_val, ih, body,
335 zeros_num >
336 ih_item_len(ih) ?
337 ih_item_len(ih) :
338 zeros_num);
340 version = ih_version(ih);
342 /* Calculate key component, item length and body to insert into S[0] */
343 set_le_ih_k_offset(ih,
344 le_ih_k_offset(ih) +
345 (tb->
346 lbytes <<
347 (is_indirect_le_ih
348 (ih) ? tb->tb_sb->
349 s_blocksize_bits -
350 UNFM_P_SHIFT :
351 0)));
353 put_ih_item_len(ih, new_item_len);
354 if (tb->lbytes > zeros_num) {
355 body +=
356 (tb->lbytes - zeros_num);
357 zeros_num = 0;
358 } else
359 zeros_num -= tb->lbytes;
361 RFALSE(ih_item_len(ih) <= 0,
362 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
363 ih_item_len(ih));
364 } else {
365 /* new item in whole falls into L[0] */
366 /* Shift lnum[0]-1 items to L[0] */
367 ret_val =
368 leaf_shift_left(tb, tb->lnum[0] - 1,
369 tb->lbytes);
370 /* Insert new item into L[0] */
371 bi.tb = tb;
372 bi.bi_bh = tb->L[0];
373 bi.bi_parent = tb->FL[0];
374 bi.bi_position =
375 get_left_neighbor_position(tb, 0);
376 leaf_insert_into_buf(&bi,
377 n + item_pos -
378 ret_val, ih, body,
379 zeros_num);
380 tb->insert_size[0] = 0;
381 zeros_num = 0;
383 break;
385 case M_PASTE: /* append item in L[0] */
387 if (item_pos == tb->lnum[0] - 1
388 && tb->lbytes != -1) {
389 /* we must shift the part of the appended item */
390 if (is_direntry_le_ih
391 (B_N_PITEM_HEAD(tbS0, item_pos))) {
393 RFALSE(zeros_num,
394 "PAP-12090: invalid parameter in case of a directory");
395 /* directory item */
396 if (tb->lbytes > pos_in_item) {
397 /* new directory entry falls into L[0] */
398 struct item_head
399 *pasted;
400 int l_pos_in_item =
401 pos_in_item;
403 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
404 ret_val =
405 leaf_shift_left(tb,
406 tb->
407 lnum
408 [0],
409 tb->
410 lbytes
413 if (ret_val
414 && !item_pos) {
415 pasted =
416 B_N_PITEM_HEAD
417 (tb->L[0],
418 B_NR_ITEMS
419 (tb->
420 L[0]) -
422 l_pos_in_item +=
423 I_ENTRY_COUNT
424 (pasted) -
425 (tb->
426 lbytes -
430 /* Append given directory entry to directory item */
431 bi.tb = tb;
432 bi.bi_bh = tb->L[0];
433 bi.bi_parent =
434 tb->FL[0];
435 bi.bi_position =
436 get_left_neighbor_position
437 (tb, 0);
438 leaf_paste_in_buffer
439 (&bi,
440 n + item_pos -
441 ret_val,
442 l_pos_in_item,
443 tb->insert_size[0],
444 body, zeros_num);
446 /* previous string prepared space for pasting new entry, following string pastes this entry */
448 /* when we have merge directory item, pos_in_item has been changed too */
450 /* paste new directory entry. 1 is entry number */
451 leaf_paste_entries(bi.
452 bi_bh,
454 item_pos
456 ret_val,
457 l_pos_in_item,
459 (struct
460 reiserfs_de_head
462 body,
463 body
465 DEH_SIZE,
466 tb->
467 insert_size
470 tb->insert_size[0] = 0;
471 } else {
472 /* new directory item doesn't fall into L[0] */
473 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
474 leaf_shift_left(tb,
475 tb->
476 lnum[0],
477 tb->
478 lbytes);
480 /* Calculate new position to append in item body */
481 pos_in_item -= tb->lbytes;
482 } else {
483 /* regular object */
484 RFALSE(tb->lbytes <= 0,
485 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
486 tb->lbytes);
487 RFALSE(pos_in_item !=
488 ih_item_len
489 (B_N_PITEM_HEAD
490 (tbS0, item_pos)),
491 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
492 ih_item_len
493 (B_N_PITEM_HEAD
494 (tbS0, item_pos)),
495 pos_in_item);
497 if (tb->lbytes >= pos_in_item) {
498 /* appended item will be in L[0] in whole */
499 int l_n;
501 /* this bytes number must be appended to the last item of L[h] */
502 l_n =
503 tb->lbytes -
504 pos_in_item;
506 /* Calculate new insert_size[0] */
507 tb->insert_size[0] -=
508 l_n;
510 RFALSE(tb->
511 insert_size[0] <=
513 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
514 tb->
515 insert_size[0]);
516 ret_val =
517 leaf_shift_left(tb,
518 tb->
519 lnum
520 [0],
521 ih_item_len
522 (B_N_PITEM_HEAD
523 (tbS0,
524 item_pos)));
525 /* Append to body of item in L[0] */
526 bi.tb = tb;
527 bi.bi_bh = tb->L[0];
528 bi.bi_parent =
529 tb->FL[0];
530 bi.bi_position =
531 get_left_neighbor_position
532 (tb, 0);
533 leaf_paste_in_buffer
534 (&bi,
535 n + item_pos -
536 ret_val,
537 ih_item_len
538 (B_N_PITEM_HEAD
539 (tb->L[0],
540 n + item_pos -
541 ret_val)), l_n,
542 body,
543 zeros_num >
544 l_n ? l_n :
545 zeros_num);
546 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
548 int version;
549 int temp_l =
550 l_n;
552 RFALSE
553 (ih_item_len
554 (B_N_PITEM_HEAD
555 (tbS0,
556 0)),
557 "PAP-12106: item length must be 0");
558 RFALSE
559 (comp_short_le_keys
560 (B_N_PKEY
561 (tbS0, 0),
562 B_N_PKEY
563 (tb->L[0],
565 item_pos
567 ret_val)),
568 "PAP-12107: items must be of the same file");
569 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
570 temp_l =
573 (tb->
574 tb_sb->
575 s_blocksize_bits
577 UNFM_P_SHIFT);
579 /* update key of first item in S0 */
580 version =
581 ih_version
582 (B_N_PITEM_HEAD
583 (tbS0, 0));
584 set_le_key_k_offset
585 (version,
586 B_N_PKEY
587 (tbS0, 0),
588 le_key_k_offset
589 (version,
590 B_N_PKEY
591 (tbS0,
592 0)) +
593 temp_l);
594 /* update left delimiting key */
595 set_le_key_k_offset
596 (version,
597 B_N_PDELIM_KEY
598 (tb->
599 CFL[0],
600 tb->
601 lkey[0]),
602 le_key_k_offset
603 (version,
604 B_N_PDELIM_KEY
605 (tb->
606 CFL[0],
607 tb->
608 lkey[0]))
609 + temp_l);
612 /* Calculate new body, position in item and insert_size[0] */
613 if (l_n > zeros_num) {
614 body +=
615 (l_n -
616 zeros_num);
617 zeros_num = 0;
618 } else
619 zeros_num -=
620 l_n;
621 pos_in_item = 0;
623 RFALSE
624 (comp_short_le_keys
625 (B_N_PKEY(tbS0, 0),
626 B_N_PKEY(tb->L[0],
627 B_NR_ITEMS
628 (tb->
629 L[0]) -
632 !op_is_left_mergeable
633 (B_N_PKEY(tbS0, 0),
634 tbS0->b_size)
636 !op_is_left_mergeable
637 (B_N_PDELIM_KEY
638 (tb->CFL[0],
639 tb->lkey[0]),
640 tbS0->b_size),
641 "PAP-12120: item must be merge-able with left neighboring item");
642 } else { /* only part of the appended item will be in L[0] */
644 /* Calculate position in item for append in S[0] */
645 pos_in_item -=
646 tb->lbytes;
648 RFALSE(pos_in_item <= 0,
649 "PAP-12125: no place for paste. pos_in_item=%d",
650 pos_in_item);
652 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
653 leaf_shift_left(tb,
654 tb->
655 lnum[0],
656 tb->
657 lbytes);
660 } else { /* appended item will be in L[0] in whole */
662 struct item_head *pasted;
664 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
665 /* then increment pos_in_item by the size of the last item in L[0] */
666 pasted =
667 B_N_PITEM_HEAD(tb->L[0],
668 n - 1);
669 if (is_direntry_le_ih(pasted))
670 pos_in_item +=
671 ih_entry_count
672 (pasted);
673 else
674 pos_in_item +=
675 ih_item_len(pasted);
678 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
679 ret_val =
680 leaf_shift_left(tb, tb->lnum[0],
681 tb->lbytes);
682 /* Append to body of item in L[0] */
683 bi.tb = tb;
684 bi.bi_bh = tb->L[0];
685 bi.bi_parent = tb->FL[0];
686 bi.bi_position =
687 get_left_neighbor_position(tb, 0);
688 leaf_paste_in_buffer(&bi,
689 n + item_pos -
690 ret_val,
691 pos_in_item,
692 tb->insert_size[0],
693 body, zeros_num);
695 /* if appended item is directory, paste entry */
696 pasted =
697 B_N_PITEM_HEAD(tb->L[0],
698 n + item_pos -
699 ret_val);
700 if (is_direntry_le_ih(pasted))
701 leaf_paste_entries(bi.bi_bh,
703 item_pos -
704 ret_val,
705 pos_in_item,
707 (struct
708 reiserfs_de_head
709 *)body,
710 body +
711 DEH_SIZE,
712 tb->
713 insert_size
716 /* if appended item is indirect item, put unformatted node into un list */
717 if (is_indirect_le_ih(pasted))
718 set_ih_free_space(pasted, 0);
719 tb->insert_size[0] = 0;
720 zeros_num = 0;
722 break;
723 default: /* cases d and t */
724 reiserfs_panic(tb->tb_sb,
725 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
726 (flag ==
727 M_DELETE) ? "DELETE" : ((flag ==
728 M_CUT)
729 ? "CUT"
731 "UNKNOWN"),
732 flag);
734 } else {
735 /* new item doesn't fall into L[0] */
736 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
740 /* tb->lnum[0] > 0 */
741 /* Calculate new item position */
742 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
744 if (tb->rnum[0] > 0) {
745 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
746 n = B_NR_ITEMS(tbS0);
747 switch (flag) {
749 case M_INSERT: /* insert item */
750 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
751 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
752 loff_t old_key_comp, old_len,
753 r_zeros_number;
754 const char *r_body;
755 int version;
756 loff_t offset;
758 leaf_shift_right(tb, tb->rnum[0] - 1,
759 -1);
761 version = ih_version(ih);
762 /* Remember key component and item length */
763 old_key_comp = le_ih_k_offset(ih);
764 old_len = ih_item_len(ih);
766 /* Calculate key component and item length to insert into R[0] */
767 offset =
768 le_ih_k_offset(ih) +
769 ((old_len -
770 tb->
771 rbytes) << (is_indirect_le_ih(ih)
772 ? tb->tb_sb->
773 s_blocksize_bits -
774 UNFM_P_SHIFT : 0));
775 set_le_ih_k_offset(ih, offset);
776 put_ih_item_len(ih, tb->rbytes);
777 /* Insert part of the item into R[0] */
778 bi.tb = tb;
779 bi.bi_bh = tb->R[0];
780 bi.bi_parent = tb->FR[0];
781 bi.bi_position =
782 get_right_neighbor_position(tb, 0);
783 if ((old_len - tb->rbytes) > zeros_num) {
784 r_zeros_number = 0;
785 r_body =
786 body + (old_len -
787 tb->rbytes) -
788 zeros_num;
789 } else {
790 r_body = body;
791 r_zeros_number =
792 zeros_num - (old_len -
793 tb->rbytes);
794 zeros_num -= r_zeros_number;
797 leaf_insert_into_buf(&bi, 0, ih, r_body,
798 r_zeros_number);
800 /* Replace right delimiting key by first key in R[0] */
801 replace_key(tb, tb->CFR[0], tb->rkey[0],
802 tb->R[0], 0);
804 /* Calculate key component and item length to insert into S[0] */
805 set_le_ih_k_offset(ih, old_key_comp);
806 put_ih_item_len(ih,
807 old_len - tb->rbytes);
809 tb->insert_size[0] -= tb->rbytes;
811 } else { /* whole new item falls into R[0] */
813 /* Shift rnum[0]-1 items to R[0] */
814 ret_val =
815 leaf_shift_right(tb,
816 tb->rnum[0] - 1,
817 tb->rbytes);
818 /* Insert new item into R[0] */
819 bi.tb = tb;
820 bi.bi_bh = tb->R[0];
821 bi.bi_parent = tb->FR[0];
822 bi.bi_position =
823 get_right_neighbor_position(tb, 0);
824 leaf_insert_into_buf(&bi,
825 item_pos - n +
826 tb->rnum[0] - 1,
827 ih, body,
828 zeros_num);
830 if (item_pos - n + tb->rnum[0] - 1 == 0) {
831 replace_key(tb, tb->CFR[0],
832 tb->rkey[0],
833 tb->R[0], 0);
836 zeros_num = tb->insert_size[0] = 0;
838 } else { /* new item or part of it doesn't fall into R[0] */
840 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
842 break;
844 case M_PASTE: /* append item */
846 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
847 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
848 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
849 int entry_count;
851 RFALSE(zeros_num,
852 "PAP-12145: invalid parameter in case of a directory");
853 entry_count =
854 I_ENTRY_COUNT(B_N_PITEM_HEAD
855 (tbS0,
856 item_pos));
857 if (entry_count - tb->rbytes <
858 pos_in_item)
859 /* new directory entry falls into R[0] */
861 int paste_entry_position;
863 RFALSE(tb->rbytes - 1 >=
864 entry_count
865 || !tb->
866 insert_size[0],
867 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
868 tb->rbytes,
869 entry_count);
870 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
871 leaf_shift_right(tb,
872 tb->
873 rnum
874 [0],
875 tb->
876 rbytes
877 - 1);
878 /* Paste given directory entry to directory item */
879 paste_entry_position =
880 pos_in_item -
881 entry_count +
882 tb->rbytes - 1;
883 bi.tb = tb;
884 bi.bi_bh = tb->R[0];
885 bi.bi_parent =
886 tb->FR[0];
887 bi.bi_position =
888 get_right_neighbor_position
889 (tb, 0);
890 leaf_paste_in_buffer
891 (&bi, 0,
892 paste_entry_position,
893 tb->insert_size[0],
894 body, zeros_num);
895 /* paste entry */
896 leaf_paste_entries(bi.
897 bi_bh,
899 paste_entry_position,
901 (struct
902 reiserfs_de_head
904 body,
905 body
907 DEH_SIZE,
908 tb->
909 insert_size
913 if (paste_entry_position
914 == 0) {
915 /* change delimiting keys */
916 replace_key(tb,
917 tb->
919 [0],
920 tb->
921 rkey
922 [0],
923 tb->
925 [0],
929 tb->insert_size[0] = 0;
930 pos_in_item++;
931 } else { /* new directory entry doesn't fall into R[0] */
933 leaf_shift_right(tb,
934 tb->
935 rnum
936 [0],
937 tb->
938 rbytes);
940 } else { /* regular object */
942 int n_shift, n_rem,
943 r_zeros_number;
944 const char *r_body;
946 /* Calculate number of bytes which must be shifted from appended item */
947 if ((n_shift =
948 tb->rbytes -
949 tb->insert_size[0]) < 0)
950 n_shift = 0;
952 RFALSE(pos_in_item !=
953 ih_item_len
954 (B_N_PITEM_HEAD
955 (tbS0, item_pos)),
956 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
957 pos_in_item,
958 ih_item_len
959 (B_N_PITEM_HEAD
960 (tbS0, item_pos)));
962 leaf_shift_right(tb,
963 tb->rnum[0],
964 n_shift);
965 /* Calculate number of bytes which must remain in body after appending to R[0] */
966 if ((n_rem =
967 tb->insert_size[0] -
968 tb->rbytes) < 0)
969 n_rem = 0;
972 int version;
973 unsigned long temp_rem =
974 n_rem;
976 version =
977 ih_version
978 (B_N_PITEM_HEAD
979 (tb->R[0], 0));
980 if (is_indirect_le_key
981 (version,
982 B_N_PKEY(tb->R[0],
983 0))) {
984 temp_rem =
985 n_rem <<
986 (tb->tb_sb->
987 s_blocksize_bits
989 UNFM_P_SHIFT);
991 set_le_key_k_offset
992 (version,
993 B_N_PKEY(tb->R[0],
995 le_key_k_offset
996 (version,
997 B_N_PKEY(tb->R[0],
998 0)) +
999 temp_rem);
1000 set_le_key_k_offset
1001 (version,
1002 B_N_PDELIM_KEY(tb->
1004 [0],
1005 tb->
1006 rkey
1007 [0]),
1008 le_key_k_offset
1009 (version,
1010 B_N_PDELIM_KEY
1011 (tb->CFR[0],
1012 tb->rkey[0])) +
1013 temp_rem);
1015 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1016 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
1017 do_balance_mark_internal_dirty
1018 (tb, tb->CFR[0], 0);
1020 /* Append part of body into R[0] */
1021 bi.tb = tb;
1022 bi.bi_bh = tb->R[0];
1023 bi.bi_parent = tb->FR[0];
1024 bi.bi_position =
1025 get_right_neighbor_position
1026 (tb, 0);
1027 if (n_rem > zeros_num) {
1028 r_zeros_number = 0;
1029 r_body =
1030 body + n_rem -
1031 zeros_num;
1032 } else {
1033 r_body = body;
1034 r_zeros_number =
1035 zeros_num - n_rem;
1036 zeros_num -=
1037 r_zeros_number;
1040 leaf_paste_in_buffer(&bi, 0,
1041 n_shift,
1042 tb->
1043 insert_size
1044 [0] -
1045 n_rem,
1046 r_body,
1047 r_zeros_number);
1049 if (is_indirect_le_ih
1050 (B_N_PITEM_HEAD
1051 (tb->R[0], 0))) {
1052 #if 0
1053 RFALSE(n_rem,
1054 "PAP-12160: paste more than one unformatted node pointer");
1055 #endif
1056 set_ih_free_space
1057 (B_N_PITEM_HEAD
1058 (tb->R[0], 0), 0);
1060 tb->insert_size[0] = n_rem;
1061 if (!n_rem)
1062 pos_in_item++;
1064 } else { /* pasted item in whole falls into R[0] */
1066 struct item_head *pasted;
1068 ret_val =
1069 leaf_shift_right(tb, tb->rnum[0],
1070 tb->rbytes);
1071 /* append item in R[0] */
1072 if (pos_in_item >= 0) {
1073 bi.tb = tb;
1074 bi.bi_bh = tb->R[0];
1075 bi.bi_parent = tb->FR[0];
1076 bi.bi_position =
1077 get_right_neighbor_position
1078 (tb, 0);
1079 leaf_paste_in_buffer(&bi,
1080 item_pos -
1082 tb->
1083 rnum[0],
1084 pos_in_item,
1085 tb->
1086 insert_size
1087 [0], body,
1088 zeros_num);
1091 /* paste new entry, if item is directory item */
1092 pasted =
1093 B_N_PITEM_HEAD(tb->R[0],
1094 item_pos - n +
1095 tb->rnum[0]);
1096 if (is_direntry_le_ih(pasted)
1097 && pos_in_item >= 0) {
1098 leaf_paste_entries(bi.bi_bh,
1099 item_pos -
1101 tb->rnum[0],
1102 pos_in_item,
1104 (struct
1105 reiserfs_de_head
1106 *)body,
1107 body +
1108 DEH_SIZE,
1109 tb->
1110 insert_size
1113 if (!pos_in_item) {
1115 RFALSE(item_pos - n +
1116 tb->rnum[0],
1117 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1119 /* update delimiting keys */
1120 replace_key(tb,
1121 tb->CFR[0],
1122 tb->rkey[0],
1123 tb->R[0],
1128 if (is_indirect_le_ih(pasted))
1129 set_ih_free_space(pasted, 0);
1130 zeros_num = tb->insert_size[0] = 0;
1132 } else { /* new item doesn't fall into R[0] */
1134 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1136 break;
1137 default: /* cases d and t */
1138 reiserfs_panic(tb->tb_sb,
1139 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
1140 (flag ==
1141 M_DELETE) ? "DELETE" : ((flag ==
1142 M_CUT) ? "CUT"
1143 : "UNKNOWN"),
1144 flag);
1149 /* tb->rnum[0] > 0 */
1150 RFALSE(tb->blknum[0] > 3,
1151 "PAP-12180: blknum can not be %d. It must be <= 3",
1152 tb->blknum[0]);
1153 RFALSE(tb->blknum[0] < 0,
1154 "PAP-12185: blknum can not be %d. It must be >= 0",
1155 tb->blknum[0]);
1157 /* if while adding to a node we discover that it is possible to split
1158 it in two, and merge the left part into the left neighbor and the
1159 right part into the right neighbor, eliminating the node */
1160 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1162 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1163 "PAP-12190: lnum and rnum must not be zero");
1164 /* if insertion was done before 0-th position in R[0], right
1165 delimiting key of the tb->L[0]'s and left delimiting key are
1166 not set correctly */
1167 if (tb->CFL[0]) {
1168 if (!tb->CFR[0])
1169 reiserfs_panic(tb->tb_sb,
1170 "vs-12195: balance_leaf: CFR not initialized");
1171 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1172 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1173 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1176 reiserfs_invalidate_buffer(tb, tbS0);
1177 return 0;
1180 /* Fill new nodes that appear in place of S[0] */
1182 /* I am told that this copying is because we need an array to enable
1183 the looping code. -Hans */
1184 snum[0] = tb->s1num, snum[1] = tb->s2num;
1185 sbytes[0] = tb->s1bytes;
1186 sbytes[1] = tb->s2bytes;
1187 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1189 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1190 snum[i]);
1192 /* here we shift from S to S_new nodes */
1194 S_new[i] = get_FEB(tb);
1196 /* initialized block type and tree level */
1197 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
1199 n = B_NR_ITEMS(tbS0);
1201 switch (flag) {
1202 case M_INSERT: /* insert item */
1204 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1205 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1206 int old_key_comp, old_len,
1207 r_zeros_number;
1208 const char *r_body;
1209 int version;
1211 /* Move snum[i]-1 items from S[0] to S_new[i] */
1212 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1213 snum[i] - 1, -1,
1214 S_new[i]);
1215 /* Remember key component and item length */
1216 version = ih_version(ih);
1217 old_key_comp = le_ih_k_offset(ih);
1218 old_len = ih_item_len(ih);
1220 /* Calculate key component and item length to insert into S_new[i] */
1221 set_le_ih_k_offset(ih,
1222 le_ih_k_offset(ih) +
1223 ((old_len -
1224 sbytes[i]) <<
1225 (is_indirect_le_ih
1226 (ih) ? tb->tb_sb->
1227 s_blocksize_bits -
1228 UNFM_P_SHIFT :
1229 0)));
1231 put_ih_item_len(ih, sbytes[i]);
1233 /* Insert part of the item into S_new[i] before 0-th item */
1234 bi.tb = tb;
1235 bi.bi_bh = S_new[i];
1236 bi.bi_parent = NULL;
1237 bi.bi_position = 0;
1239 if ((old_len - sbytes[i]) > zeros_num) {
1240 r_zeros_number = 0;
1241 r_body =
1242 body + (old_len -
1243 sbytes[i]) -
1244 zeros_num;
1245 } else {
1246 r_body = body;
1247 r_zeros_number =
1248 zeros_num - (old_len -
1249 sbytes[i]);
1250 zeros_num -= r_zeros_number;
1253 leaf_insert_into_buf(&bi, 0, ih, r_body,
1254 r_zeros_number);
1256 /* Calculate key component and item length to insert into S[i] */
1257 set_le_ih_k_offset(ih, old_key_comp);
1258 put_ih_item_len(ih,
1259 old_len - sbytes[i]);
1260 tb->insert_size[0] -= sbytes[i];
1261 } else { /* whole new item falls into S_new[i] */
1263 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1264 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1265 snum[i] - 1, sbytes[i],
1266 S_new[i]);
1268 /* Insert new item into S_new[i] */
1269 bi.tb = tb;
1270 bi.bi_bh = S_new[i];
1271 bi.bi_parent = NULL;
1272 bi.bi_position = 0;
1273 leaf_insert_into_buf(&bi,
1274 item_pos - n +
1275 snum[i] - 1, ih,
1276 body, zeros_num);
1278 zeros_num = tb->insert_size[0] = 0;
1282 else { /* new item or it part don't falls into S_new[i] */
1284 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1285 snum[i], sbytes[i], S_new[i]);
1287 break;
1289 case M_PASTE: /* append item */
1291 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
1292 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
1293 struct item_head *aux_ih;
1295 RFALSE(ih, "PAP-12210: ih must be 0");
1297 if (is_direntry_le_ih
1298 (aux_ih =
1299 B_N_PITEM_HEAD(tbS0, item_pos))) {
1300 /* we append to directory item */
1302 int entry_count;
1304 entry_count =
1305 ih_entry_count(aux_ih);
1307 if (entry_count - sbytes[i] <
1308 pos_in_item
1309 && pos_in_item <=
1310 entry_count) {
1311 /* new directory entry falls into S_new[i] */
1313 RFALSE(!tb->
1314 insert_size[0],
1315 "PAP-12215: insert_size is already 0");
1316 RFALSE(sbytes[i] - 1 >=
1317 entry_count,
1318 "PAP-12220: there are no so much entries (%d), only %d",
1319 sbytes[i] - 1,
1320 entry_count);
1322 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1323 leaf_move_items
1324 (LEAF_FROM_S_TO_SNEW,
1325 tb, snum[i],
1326 sbytes[i] - 1,
1327 S_new[i]);
1328 /* Paste given directory entry to directory item */
1329 bi.tb = tb;
1330 bi.bi_bh = S_new[i];
1331 bi.bi_parent = NULL;
1332 bi.bi_position = 0;
1333 leaf_paste_in_buffer
1334 (&bi, 0,
1335 pos_in_item -
1336 entry_count +
1337 sbytes[i] - 1,
1338 tb->insert_size[0],
1339 body, zeros_num);
1340 /* paste new directory entry */
1341 leaf_paste_entries(bi.
1342 bi_bh,
1344 pos_in_item
1346 entry_count
1348 sbytes
1349 [i] -
1350 1, 1,
1351 (struct
1352 reiserfs_de_head
1354 body,
1355 body
1357 DEH_SIZE,
1358 tb->
1359 insert_size
1362 tb->insert_size[0] = 0;
1363 pos_in_item++;
1364 } else { /* new directory entry doesn't fall into S_new[i] */
1365 leaf_move_items
1366 (LEAF_FROM_S_TO_SNEW,
1367 tb, snum[i],
1368 sbytes[i],
1369 S_new[i]);
1371 } else { /* regular object */
1373 int n_shift, n_rem,
1374 r_zeros_number;
1375 const char *r_body;
1377 RFALSE(pos_in_item !=
1378 ih_item_len
1379 (B_N_PITEM_HEAD
1380 (tbS0, item_pos))
1381 || tb->insert_size[0] <=
1383 "PAP-12225: item too short or insert_size <= 0");
1385 /* Calculate number of bytes which must be shifted from appended item */
1386 n_shift =
1387 sbytes[i] -
1388 tb->insert_size[0];
1389 if (n_shift < 0)
1390 n_shift = 0;
1391 leaf_move_items
1392 (LEAF_FROM_S_TO_SNEW, tb,
1393 snum[i], n_shift,
1394 S_new[i]);
1396 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1397 n_rem =
1398 tb->insert_size[0] -
1399 sbytes[i];
1400 if (n_rem < 0)
1401 n_rem = 0;
1402 /* Append part of body into S_new[0] */
1403 bi.tb = tb;
1404 bi.bi_bh = S_new[i];
1405 bi.bi_parent = NULL;
1406 bi.bi_position = 0;
1408 if (n_rem > zeros_num) {
1409 r_zeros_number = 0;
1410 r_body =
1411 body + n_rem -
1412 zeros_num;
1413 } else {
1414 r_body = body;
1415 r_zeros_number =
1416 zeros_num - n_rem;
1417 zeros_num -=
1418 r_zeros_number;
1421 leaf_paste_in_buffer(&bi, 0,
1422 n_shift,
1423 tb->
1424 insert_size
1425 [0] -
1426 n_rem,
1427 r_body,
1428 r_zeros_number);
1430 struct item_head *tmp;
1432 tmp =
1433 B_N_PITEM_HEAD(S_new
1434 [i],
1436 if (is_indirect_le_ih
1437 (tmp)) {
1438 set_ih_free_space
1439 (tmp, 0);
1440 set_le_ih_k_offset
1441 (tmp,
1442 le_ih_k_offset
1443 (tmp) +
1444 (n_rem <<
1445 (tb->
1446 tb_sb->
1447 s_blocksize_bits
1449 UNFM_P_SHIFT)));
1450 } else {
1451 set_le_ih_k_offset
1452 (tmp,
1453 le_ih_k_offset
1454 (tmp) +
1455 n_rem);
1459 tb->insert_size[0] = n_rem;
1460 if (!n_rem)
1461 pos_in_item++;
1463 } else
1464 /* item falls wholly into S_new[i] */
1466 int ret_val;
1467 struct item_head *pasted;
1469 #ifdef CONFIG_REISERFS_CHECK
1470 struct item_head *ih =
1471 B_N_PITEM_HEAD(tbS0, item_pos);
1473 if (!is_direntry_le_ih(ih)
1474 && (pos_in_item != ih_item_len(ih)
1475 || tb->insert_size[0] <= 0))
1476 reiserfs_panic(tb->tb_sb,
1477 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1478 #endif /* CONFIG_REISERFS_CHECK */
1480 ret_val =
1481 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1482 tb, snum[i],
1483 sbytes[i],
1484 S_new[i]);
1486 RFALSE(ret_val,
1487 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1488 ret_val);
1490 /* paste into item */
1491 bi.tb = tb;
1492 bi.bi_bh = S_new[i];
1493 bi.bi_parent = NULL;
1494 bi.bi_position = 0;
1495 leaf_paste_in_buffer(&bi,
1496 item_pos - n +
1497 snum[i],
1498 pos_in_item,
1499 tb->insert_size[0],
1500 body, zeros_num);
1502 pasted =
1503 B_N_PITEM_HEAD(S_new[i],
1504 item_pos - n +
1505 snum[i]);
1506 if (is_direntry_le_ih(pasted)) {
1507 leaf_paste_entries(bi.bi_bh,
1508 item_pos -
1509 n + snum[i],
1510 pos_in_item,
1512 (struct
1513 reiserfs_de_head
1514 *)body,
1515 body +
1516 DEH_SIZE,
1517 tb->
1518 insert_size
1523 /* if we paste to indirect item update ih_free_space */
1524 if (is_indirect_le_ih(pasted))
1525 set_ih_free_space(pasted, 0);
1526 zeros_num = tb->insert_size[0] = 0;
1530 else { /* pasted item doesn't fall into S_new[i] */
1532 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1533 snum[i], sbytes[i], S_new[i]);
1535 break;
1536 default: /* cases d and t */
1537 reiserfs_panic(tb->tb_sb,
1538 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1539 (flag ==
1540 M_DELETE) ? "DELETE" : ((flag ==
1541 M_CUT) ? "CUT"
1542 : "UNKNOWN"),
1543 flag);
1546 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1547 insert_ptr[i] = S_new[i];
1549 RFALSE(!buffer_journaled(S_new[i])
1550 || buffer_journal_dirty(S_new[i])
1551 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1552 i, S_new[i]);
1555 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1556 affected item which remains in S */
1557 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1559 switch (flag) {
1560 case M_INSERT: /* insert item into S[0] */
1561 bi.tb = tb;
1562 bi.bi_bh = tbS0;
1563 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1564 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1565 leaf_insert_into_buf(&bi, item_pos, ih, body,
1566 zeros_num);
1568 /* If we insert the first key change the delimiting key */
1569 if (item_pos == 0) {
1570 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1571 replace_key(tb, tb->CFL[0], tb->lkey[0],
1572 tbS0, 0);
1575 break;
1577 case M_PASTE:{ /* append item in S[0] */
1578 struct item_head *pasted;
1580 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1581 /* when directory, may be new entry already pasted */
1582 if (is_direntry_le_ih(pasted)) {
1583 if (pos_in_item >= 0 &&
1584 pos_in_item <=
1585 ih_entry_count(pasted)) {
1587 RFALSE(!tb->insert_size[0],
1588 "PAP-12260: insert_size is 0 already");
1590 /* prepare space */
1591 bi.tb = tb;
1592 bi.bi_bh = tbS0;
1593 bi.bi_parent =
1594 PATH_H_PPARENT(tb->tb_path,
1596 bi.bi_position =
1597 PATH_H_POSITION(tb->tb_path,
1599 leaf_paste_in_buffer(&bi,
1600 item_pos,
1601 pos_in_item,
1602 tb->
1603 insert_size
1604 [0], body,
1605 zeros_num);
1607 /* paste entry */
1608 leaf_paste_entries(bi.bi_bh,
1609 item_pos,
1610 pos_in_item,
1612 (struct
1613 reiserfs_de_head
1614 *)body,
1615 body +
1616 DEH_SIZE,
1617 tb->
1618 insert_size
1621 if (!item_pos && !pos_in_item) {
1622 RFALSE(!tb->CFL[0]
1623 || !tb->L[0],
1624 "PAP-12270: CFL[0]/L[0] must be specified");
1625 if (tb->CFL[0]) {
1626 replace_key(tb,
1627 tb->
1629 [0],
1630 tb->
1631 lkey
1632 [0],
1633 tbS0,
1638 tb->insert_size[0] = 0;
1640 } else { /* regular object */
1641 if (pos_in_item == ih_item_len(pasted)) {
1643 RFALSE(tb->insert_size[0] <= 0,
1644 "PAP-12275: insert size must not be %d",
1645 tb->insert_size[0]);
1646 bi.tb = tb;
1647 bi.bi_bh = tbS0;
1648 bi.bi_parent =
1649 PATH_H_PPARENT(tb->tb_path,
1651 bi.bi_position =
1652 PATH_H_POSITION(tb->tb_path,
1654 leaf_paste_in_buffer(&bi,
1655 item_pos,
1656 pos_in_item,
1657 tb->
1658 insert_size
1659 [0], body,
1660 zeros_num);
1662 if (is_indirect_le_ih(pasted)) {
1663 #if 0
1664 RFALSE(tb->
1665 insert_size[0] !=
1666 UNFM_P_SIZE,
1667 "PAP-12280: insert_size for indirect item must be %d, not %d",
1668 UNFM_P_SIZE,
1669 tb->
1670 insert_size[0]);
1671 #endif
1672 set_ih_free_space
1673 (pasted, 0);
1675 tb->insert_size[0] = 0;
1677 #ifdef CONFIG_REISERFS_CHECK
1678 else {
1679 if (tb->insert_size[0]) {
1680 print_cur_tb("12285");
1681 reiserfs_panic(tb->
1682 tb_sb,
1683 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1684 tb->
1685 insert_size
1686 [0]);
1689 #endif /* CONFIG_REISERFS_CHECK */
1692 } /* case M_PASTE: */
1695 #ifdef CONFIG_REISERFS_CHECK
1696 if (flag == M_PASTE && tb->insert_size[0]) {
1697 print_cur_tb("12290");
1698 reiserfs_panic(tb->tb_sb,
1699 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1700 tb->insert_size[0]);
1702 #endif /* CONFIG_REISERFS_CHECK */
1704 return 0;
1705 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1707 /* Make empty node */
1708 void make_empty_node(struct buffer_info *bi)
1710 struct block_head *blkh;
1712 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1714 blkh = B_BLK_HEAD(bi->bi_bh);
1715 set_blkh_nr_item(blkh, 0);
1716 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1718 if (bi->bi_parent)
1719 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1722 /* Get first empty buffer */
1723 struct buffer_head *get_FEB(struct tree_balance *tb)
1725 int i;
1726 struct buffer_head *first_b;
1727 struct buffer_info bi;
1729 for (i = 0; i < MAX_FEB_SIZE; i++)
1730 if (tb->FEB[i] != 0)
1731 break;
1733 if (i == MAX_FEB_SIZE)
1734 reiserfs_panic(tb->tb_sb,
1735 "vs-12300: get_FEB: FEB list is empty");
1737 bi.tb = tb;
1738 bi.bi_bh = first_b = tb->FEB[i];
1739 bi.bi_parent = NULL;
1740 bi.bi_position = 0;
1741 make_empty_node(&bi);
1742 set_buffer_uptodate(first_b);
1743 tb->FEB[i] = NULL;
1744 tb->used[i] = first_b;
1746 return (first_b);
1749 /* This is now used because reiserfs_free_block has to be able to
1750 ** schedule.
1752 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1754 int i;
1756 if (buffer_dirty(bh))
1757 reiserfs_warning(tb->tb_sb,
1758 "store_thrown deals with dirty buffer");
1759 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++)
1760 if (!tb->thrown[i]) {
1761 tb->thrown[i] = bh;
1762 get_bh(bh); /* free_thrown puts this */
1763 return;
1765 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
1768 static void free_thrown(struct tree_balance *tb)
1770 int i;
1771 b_blocknr_t blocknr;
1772 for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) {
1773 if (tb->thrown[i]) {
1774 blocknr = tb->thrown[i]->b_blocknr;
1775 if (buffer_dirty(tb->thrown[i]))
1776 reiserfs_warning(tb->tb_sb,
1777 "free_thrown deals with dirty buffer %d",
1778 blocknr);
1779 brelse(tb->thrown[i]); /* incremented in store_thrown */
1780 reiserfs_free_block(tb->transaction_handle, NULL,
1781 blocknr, 0);
1786 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1788 struct block_head *blkh;
1789 blkh = B_BLK_HEAD(bh);
1790 set_blkh_level(blkh, FREE_LEVEL);
1791 set_blkh_nr_item(blkh, 0);
1793 clear_buffer_dirty(bh);
1794 store_thrown(tb, bh);
1797 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1798 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1799 struct buffer_head *src, int n_src)
1802 RFALSE(dest == NULL || src == NULL,
1803 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1804 src, dest);
1805 RFALSE(!B_IS_KEYS_LEVEL(dest),
1806 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1807 dest);
1808 RFALSE(n_dest < 0 || n_src < 0,
1809 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1810 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1811 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1812 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1814 if (B_IS_ITEMS_LEVEL(src))
1815 /* source buffer contains leaf node */
1816 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1817 KEY_SIZE);
1818 else
1819 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1820 KEY_SIZE);
1822 do_balance_mark_internal_dirty(tb, dest, 0);
1825 int get_left_neighbor_position(struct tree_balance *tb, int h)
1827 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1829 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FL[h] == 0,
1830 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1831 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1833 if (Sh_position == 0)
1834 return B_NR_ITEMS(tb->FL[h]);
1835 else
1836 return Sh_position - 1;
1839 int get_right_neighbor_position(struct tree_balance *tb, int h)
1841 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1843 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == 0 || tb->FR[h] == 0,
1844 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1845 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1847 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1848 return 0;
1849 else
1850 return Sh_position + 1;
1853 #ifdef CONFIG_REISERFS_CHECK
1855 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1856 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1857 char *mes)
1859 struct disk_child *dc;
1860 int i;
1862 RFALSE(!bh, "PAP-12336: bh == 0");
1864 if (!bh || !B_IS_IN_TREE(bh))
1865 return;
1867 RFALSE(!buffer_dirty(bh) &&
1868 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1869 "PAP-12337: buffer (%b) must be dirty", bh);
1870 dc = B_N_CHILD(bh, 0);
1872 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1873 if (!is_reusable(s, dc_block_number(dc), 1)) {
1874 print_cur_tb(mes);
1875 reiserfs_panic(s,
1876 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1877 dc, bh);
1882 static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
1884 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1885 !B_IS_IN_TREE(bh)) {
1886 reiserfs_warning(NULL,
1887 "vs-12339: locked_or_not_in_tree: %s (%b)",
1888 which, bh);
1889 return 1;
1891 return 0;
1894 static int check_before_balancing(struct tree_balance *tb)
1896 int retval = 0;
1898 if (cur_tb) {
1899 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
1900 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1901 "do_balance cannot properly handle schedule occurring while it runs.");
1904 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1905 prepped all of these for us). */
1906 if (tb->lnum[0]) {
1907 retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
1908 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
1909 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
1910 check_leaf(tb->L[0]);
1912 if (tb->rnum[0]) {
1913 retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
1914 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
1915 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
1916 check_leaf(tb->R[0]);
1918 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
1919 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1921 return retval;
1924 static void check_after_balance_leaf(struct tree_balance *tb)
1926 if (tb->lnum[0]) {
1927 if (B_FREE_SPACE(tb->L[0]) !=
1928 MAX_CHILD_SIZE(tb->L[0]) -
1929 dc_size(B_N_CHILD
1930 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1931 print_cur_tb("12221");
1932 reiserfs_panic(tb->tb_sb,
1933 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1936 if (tb->rnum[0]) {
1937 if (B_FREE_SPACE(tb->R[0]) !=
1938 MAX_CHILD_SIZE(tb->R[0]) -
1939 dc_size(B_N_CHILD
1940 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1941 print_cur_tb("12222");
1942 reiserfs_panic(tb->tb_sb,
1943 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1946 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1947 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1948 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1949 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1950 PATH_H_POSITION(tb->tb_path, 1)))))) {
1951 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1952 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1953 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1954 PATH_H_POSITION(tb->tb_path,
1955 1))));
1956 print_cur_tb("12223");
1957 reiserfs_warning(tb->tb_sb,
1958 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1959 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1960 left,
1961 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1962 PATH_H_PBUFFER(tb->tb_path, 1),
1963 PATH_H_POSITION(tb->tb_path, 1),
1964 dc_size(B_N_CHILD
1965 (PATH_H_PBUFFER(tb->tb_path, 1),
1966 PATH_H_POSITION(tb->tb_path, 1))),
1967 right);
1968 reiserfs_panic(tb->tb_sb,
1969 "PAP-12365: check_after_balance_leaf: S is incorrect");
1973 static void check_leaf_level(struct tree_balance *tb)
1975 check_leaf(tb->L[0]);
1976 check_leaf(tb->R[0]);
1977 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1980 static void check_internal_levels(struct tree_balance *tb)
1982 int h;
1984 /* check all internal nodes */
1985 for (h = 1; tb->insert_size[h]; h++) {
1986 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1987 "BAD BUFFER ON PATH");
1988 if (tb->lnum[h])
1989 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1990 if (tb->rnum[h])
1991 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1996 #endif
1998 /* Now we have all of the buffers that must be used in balancing of
1999 the tree. We rely on the assumption that schedule() will not occur
2000 while do_balance works. ( Only interrupt handlers are acceptable.)
2001 We balance the tree according to the analysis made before this,
2002 using buffers already obtained. For SMP support it will someday be
2003 necessary to add ordered locking of tb. */
2005 /* Some interesting rules of balancing:
2007 we delete a maximum of two nodes per level per balancing: we never
2008 delete R, when we delete two of three nodes L, S, R then we move
2009 them into R.
2011 we only delete L if we are deleting two nodes, if we delete only
2012 one node we delete S
2014 if we shift leaves then we shift as much as we can: this is a
2015 deliberate policy of extremism in node packing which results in
2016 higher average utilization after repeated random balance operations
2017 at the cost of more memory copies and more balancing as a result of
2018 small insertions to full nodes.
2020 if we shift internal nodes we try to evenly balance the node
2021 utilization, with consequent less balancing at the cost of lower
2022 utilization.
2024 one could argue that the policy for directories in leaves should be
2025 that of internal nodes, but we will wait until another day to
2026 evaluate this.... It would be nice to someday measure and prove
2027 these assumptions as to what is optimal....
2031 static inline void do_balance_starts(struct tree_balance *tb)
2033 /* use print_cur_tb() to see initial state of struct
2034 tree_balance */
2036 /* store_print_tb (tb); */
2038 /* do not delete, just comment it out */
2039 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2040 "check");*/
2041 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
2042 #ifdef CONFIG_REISERFS_CHECK
2043 cur_tb = tb;
2044 #endif
2047 static inline void do_balance_completed(struct tree_balance *tb)
2050 #ifdef CONFIG_REISERFS_CHECK
2051 check_leaf_level(tb);
2052 check_internal_levels(tb);
2053 cur_tb = NULL;
2054 #endif
2056 /* reiserfs_free_block is no longer schedule safe. So, we need to
2057 ** put the buffers we want freed on the thrown list during do_balance,
2058 ** and then free them now
2061 REISERFS_SB(tb->tb_sb)->s_do_balance++;
2063 /* release all nodes hold to perform the balancing */
2064 unfix_nodes(tb);
2066 free_thrown(tb);
2069 void do_balance(struct tree_balance *tb, /* tree_balance structure */
2070 struct item_head *ih, /* item header of inserted item */
2071 const char *body, /* body of inserted item or bytes to paste */
2072 int flag)
2073 { /* i - insert, d - delete
2074 c - cut, p - paste
2076 Cut means delete part of an item
2077 (includes removing an entry from a
2078 directory).
2080 Delete means delete whole item.
2082 Insert means add a new item into the
2083 tree.
2085 Paste means to append to the end of an
2086 existing file or to insert a directory
2087 entry. */
2088 int child_pos, /* position of a child node in its parent */
2089 h; /* level of the tree being processed */
2090 struct item_head insert_key[2]; /* in our processing of one level
2091 we sometimes determine what
2092 must be inserted into the next
2093 higher level. This insertion
2094 consists of a key or two keys
2095 and their corresponding
2096 pointers */
2097 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2098 level */
2100 tb->tb_mode = flag;
2101 tb->need_balance_dirty = 0;
2103 if (FILESYSTEM_CHANGED_TB(tb)) {
2104 reiserfs_panic(tb->tb_sb,
2105 "clm-6000: do_balance, fs generation has changed\n");
2107 /* if we have no real work to do */
2108 if (!tb->insert_size[0]) {
2109 reiserfs_warning(tb->tb_sb,
2110 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2111 flag);
2112 unfix_nodes(tb);
2113 return;
2116 atomic_inc(&(fs_generation(tb->tb_sb)));
2117 do_balance_starts(tb);
2119 /* balance leaf returns 0 except if combining L R and S into
2120 one node. see balance_internal() for explanation of this
2121 line of code. */
2122 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
2123 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
2125 #ifdef CONFIG_REISERFS_CHECK
2126 check_after_balance_leaf(tb);
2127 #endif
2129 /* Balance internal level of the tree. */
2130 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
2131 child_pos =
2132 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
2134 do_balance_completed(tb);