2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
12 ** balance_leaf_when_delete
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include <linux/reiserfs_fs.h>
21 #include <linux/buffer_head.h>
23 #ifdef CONFIG_REISERFS_CHECK
25 struct tree_balance
*cur_tb
= NULL
; /* detects whether more than one
26 copy of tb exists as a means
27 of checking whether schedule
28 is interrupting do_balance */
31 inline void do_balance_mark_leaf_dirty(struct tree_balance
*tb
,
32 struct buffer_head
*bh
, int flag
)
34 journal_mark_dirty(tb
->transaction_handle
,
35 tb
->transaction_handle
->t_super
, bh
);
38 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
39 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
42 if deleting something ( tb->insert_size[0] < 0 )
43 return(balance_leaf_when_delete()); (flag d handled here)
45 if lnum is larger than 0 we put items into the left node
46 if rnum is larger than 0 we put items into the right node
47 if snum1 is larger than 0 we put items into the new node s1
48 if snum2 is larger than 0 we put items into the new node s2
49 Note that all *num* count new items being created.
51 It would be easier to read balance_leaf() if each of these summary
52 lines was a separate procedure rather than being inlined. I think
53 that there are many passages here and in balance_leaf_when_delete() in
54 which two calls to one procedure can replace two passages, and it
55 might save cache space and improve software maintenance costs to do so.
57 Vladimir made the perceptive comment that we should offload most of
58 the decision making in this function into fix_nodes/check_balance, and
59 then create some sort of structure in tb that says what actions should
60 be performed by do_balance.
64 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
66 * lnum, rnum can have values >= -1
67 * -1 means that the neighbor must be joined with S
68 * 0 means that nothing should be done with the neighbor
69 * >0 means to shift entirely or partly the specified number of items to the neighbor
71 static int balance_leaf_when_delete(struct tree_balance
*tb
, int flag
)
73 struct buffer_head
*tbS0
= PATH_PLAST_BUFFER(tb
->tb_path
);
74 int item_pos
= PATH_LAST_POSITION(tb
->tb_path
);
75 int pos_in_item
= tb
->tb_path
->pos_in_item
;
76 struct buffer_info bi
;
80 RFALSE(tb
->FR
[0] && B_LEVEL(tb
->FR
[0]) != DISK_LEAF_NODE_LEVEL
+ 1,
81 "vs- 12000: level: wrong FR %z", tb
->FR
[0]);
82 RFALSE(tb
->blknum
[0] > 1,
83 "PAP-12005: tb->blknum == %d, can not be > 1", tb
->blknum
[0]);
84 RFALSE(!tb
->blknum
[0] && !PATH_H_PPARENT(tb
->tb_path
, 0),
85 "PAP-12010: tree can not be empty");
87 ih
= B_N_PITEM_HEAD(tbS0
, item_pos
);
89 /* Delete or truncate the item */
92 case M_DELETE
: /* delete item in S[0] */
94 RFALSE(ih_item_len(ih
) + IH_SIZE
!= -tb
->insert_size
[0],
95 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
96 -tb
->insert_size
[0], ih
);
100 bi
.bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
101 bi
.bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
102 leaf_delete_items(&bi
, 0, item_pos
, 1, -1);
104 if (!item_pos
&& tb
->CFL
[0]) {
105 if (B_NR_ITEMS(tbS0
)) {
106 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0], tbS0
,
109 if (!PATH_H_POSITION(tb
->tb_path
, 1))
110 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
111 PATH_H_PPARENT(tb
->tb_path
,
116 RFALSE(!item_pos
&& !tb
->CFL
[0],
117 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb
->CFL
[0],
122 case M_CUT
:{ /* cut item in S[0] */
125 bi
.bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
126 bi
.bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
127 if (is_direntry_le_ih(ih
)) {
129 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
130 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
131 tb
->insert_size
[0] = -1;
132 leaf_cut_from_buffer(&bi
, item_pos
, pos_in_item
,
133 -tb
->insert_size
[0]);
135 RFALSE(!item_pos
&& !pos_in_item
&& !tb
->CFL
[0],
136 "PAP-12030: can not change delimiting key. CFL[0]=%p",
139 if (!item_pos
&& !pos_in_item
&& tb
->CFL
[0]) {
140 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
144 leaf_cut_from_buffer(&bi
, item_pos
, pos_in_item
,
145 -tb
->insert_size
[0]);
147 RFALSE(!ih_item_len(ih
),
148 "PAP-12035: cut must leave non-zero dynamic length of item");
154 print_cur_tb("12040");
155 reiserfs_panic(tb
->tb_sb
,
156 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
158 M_PASTE
) ? "PASTE" : ((flag
==
159 M_INSERT
) ? "INSERT" :
163 /* the rule is that no shifting occurs unless by shifting a node can be freed */
164 n
= B_NR_ITEMS(tbS0
);
165 if (tb
->lnum
[0]) { /* L[0] takes part in balancing */
166 if (tb
->lnum
[0] == -1) { /* L[0] must be joined with S[0] */
167 if (tb
->rnum
[0] == -1) { /* R[0] must be also joined with S[0] */
168 if (tb
->FR
[0] == PATH_H_PPARENT(tb
->tb_path
, 0)) {
169 /* all contents of all the 3 buffers will be in L[0] */
170 if (PATH_H_POSITION(tb
->tb_path
, 1) == 0
171 && 1 < B_NR_ITEMS(tb
->FR
[0]))
172 replace_key(tb
, tb
->CFL
[0],
176 leaf_move_items(LEAF_FROM_S_TO_L
, tb
, n
,
178 leaf_move_items(LEAF_FROM_R_TO_L
, tb
,
179 B_NR_ITEMS(tb
->R
[0]),
182 reiserfs_invalidate_buffer(tb
, tbS0
);
183 reiserfs_invalidate_buffer(tb
,
188 /* all contents of all the 3 buffers will be in R[0] */
189 leaf_move_items(LEAF_FROM_S_TO_R
, tb
, n
, -1,
191 leaf_move_items(LEAF_FROM_L_TO_R
, tb
,
192 B_NR_ITEMS(tb
->L
[0]), -1, NULL
);
194 /* right_delimiting_key is correct in R[0] */
195 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0],
198 reiserfs_invalidate_buffer(tb
, tbS0
);
199 reiserfs_invalidate_buffer(tb
, tb
->L
[0]);
204 RFALSE(tb
->rnum
[0] != 0,
205 "PAP-12045: rnum must be 0 (%d)", tb
->rnum
[0]);
206 /* all contents of L[0] and S[0] will be in L[0] */
207 leaf_shift_left(tb
, n
, -1);
209 reiserfs_invalidate_buffer(tb
, tbS0
);
213 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
215 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] < n
) ||
216 (tb
->lnum
[0] + tb
->rnum
[0] > n
+ 1),
217 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
218 tb
->rnum
[0], tb
->lnum
[0], n
);
219 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] == n
) &&
220 (tb
->lbytes
!= -1 || tb
->rbytes
!= -1),
221 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
222 tb
->rbytes
, tb
->lbytes
);
223 RFALSE((tb
->lnum
[0] + tb
->rnum
[0] == n
+ 1) &&
224 (tb
->lbytes
< 1 || tb
->rbytes
!= -1),
225 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
226 tb
->rbytes
, tb
->lbytes
);
228 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
229 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
231 reiserfs_invalidate_buffer(tb
, tbS0
);
236 if (tb
->rnum
[0] == -1) {
237 /* all contents of R[0] and S[0] will be in R[0] */
238 leaf_shift_right(tb
, n
, -1);
239 reiserfs_invalidate_buffer(tb
, tbS0
);
244 "PAP-12065: bad rnum parameter must be 0 (%d)", tb
->rnum
[0]);
248 static int balance_leaf(struct tree_balance
*tb
, struct item_head
*ih
, /* item header of inserted item (this is on little endian) */
249 const char *body
, /* body of inserted item or bytes to paste */
250 int flag
, /* i - insert, d - delete, c - cut, p - paste
251 (see comment to do_balance) */
252 struct item_head
*insert_key
, /* in our processing of one level we sometimes determine what
253 must be inserted into the next higher level. This insertion
254 consists of a key or two keys and their corresponding
256 struct buffer_head
**insert_ptr
/* inserted node-ptrs for the next level */
259 struct buffer_head
*tbS0
= PATH_PLAST_BUFFER(tb
->tb_path
);
260 int item_pos
= PATH_LAST_POSITION(tb
->tb_path
); /* index into the array of item headers in S[0]
261 of the affected item */
262 struct buffer_info bi
;
263 struct buffer_head
*S_new
[2]; /* new nodes allocated to hold what could not fit into S */
264 int snum
[2]; /* number of items that will be placed
265 into S_new (includes partially shifted
267 int sbytes
[2]; /* if an item is partially shifted into S_new then
268 if it is a directory item
269 it is the number of entries from the item that are shifted into S_new
271 it is the number of bytes from the item that are shifted into S_new
278 PROC_INFO_INC(tb
->tb_sb
, balance_at
[0]);
280 /* Make balance in case insert_size[0] < 0 */
281 if (tb
->insert_size
[0] < 0)
282 return balance_leaf_when_delete(tb
, flag
);
285 if (flag
== M_INSERT
&& body
== 0)
286 zeros_num
= ih_item_len(ih
);
288 pos_in_item
= tb
->tb_path
->pos_in_item
;
289 /* for indirect item pos_in_item is measured in unformatted node
290 pointers. Recalculate to bytes */
292 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0
, item_pos
)))
293 pos_in_item
*= UNFM_P_SIZE
;
295 if (tb
->lnum
[0] > 0) {
296 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
297 if (item_pos
< tb
->lnum
[0]) {
298 /* new item or it part falls to L[0], shift it too */
299 n
= B_NR_ITEMS(tb
->L
[0]);
302 case M_INSERT
: /* insert item into L[0] */
304 if (item_pos
== tb
->lnum
[0] - 1
305 && tb
->lbytes
!= -1) {
306 /* part of new item falls into L[0] */
311 leaf_shift_left(tb
, tb
->lnum
[0] - 1,
314 /* Calculate item length to insert to S[0] */
316 ih_item_len(ih
) - tb
->lbytes
;
317 /* Calculate and check item length to insert to L[0] */
322 RFALSE(ih_item_len(ih
) <= 0,
323 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
326 /* Insert new item into L[0] */
329 bi
.bi_parent
= tb
->FL
[0];
331 get_left_neighbor_position(tb
, 0);
332 leaf_insert_into_buf(&bi
,
340 version
= ih_version(ih
);
342 /* Calculate key component, item length and body to insert into S[0] */
343 set_le_ih_k_offset(ih
,
353 put_ih_item_len(ih
, new_item_len
);
354 if (tb
->lbytes
> zeros_num
) {
356 (tb
->lbytes
- zeros_num
);
359 zeros_num
-= tb
->lbytes
;
361 RFALSE(ih_item_len(ih
) <= 0,
362 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
365 /* new item in whole falls into L[0] */
366 /* Shift lnum[0]-1 items to L[0] */
368 leaf_shift_left(tb
, tb
->lnum
[0] - 1,
370 /* Insert new item into L[0] */
373 bi
.bi_parent
= tb
->FL
[0];
375 get_left_neighbor_position(tb
, 0);
376 leaf_insert_into_buf(&bi
,
380 tb
->insert_size
[0] = 0;
385 case M_PASTE
: /* append item in L[0] */
387 if (item_pos
== tb
->lnum
[0] - 1
388 && tb
->lbytes
!= -1) {
389 /* we must shift the part of the appended item */
390 if (is_direntry_le_ih
391 (B_N_PITEM_HEAD(tbS0
, item_pos
))) {
394 "PAP-12090: invalid parameter in case of a directory");
396 if (tb
->lbytes
> pos_in_item
) {
397 /* new directory entry falls into L[0] */
403 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
430 /* Append given directory entry to directory item */
436 get_left_neighbor_position
446 /* previous string prepared space for pasting new entry, following string pastes this entry */
448 /* when we have merge directory item, pos_in_item has been changed too */
450 /* paste new directory entry. 1 is entry number */
451 leaf_paste_entries(bi
.
470 tb
->insert_size
[0] = 0;
472 /* new directory item doesn't fall into L[0] */
473 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
480 /* Calculate new position to append in item body */
481 pos_in_item
-= tb
->lbytes
;
484 RFALSE(tb
->lbytes
<= 0,
485 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
487 RFALSE(pos_in_item
!=
491 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
497 if (tb
->lbytes
>= pos_in_item
) {
498 /* appended item will be in L[0] in whole */
501 /* this bytes number must be appended to the last item of L[h] */
506 /* Calculate new insert_size[0] */
507 tb
->insert_size
[0] -=
513 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
525 /* Append to body of item in L[0] */
531 get_left_neighbor_position
546 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
557 "PAP-12106: item length must be 0");
568 "PAP-12107: items must be of the same file");
569 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb
->L
[0], n
+ item_pos
- ret_val
))) {
579 /* update key of first item in S0 */
594 /* update left delimiting key */
612 /* Calculate new body, position in item and insert_size[0] */
613 if (l_n
> zeros_num
) {
632 !op_is_left_mergeable
636 !op_is_left_mergeable
641 "PAP-12120: item must be merge-able with left neighboring item");
642 } else { /* only part of the appended item will be in L[0] */
644 /* Calculate position in item for append in S[0] */
648 RFALSE(pos_in_item
<= 0,
649 "PAP-12125: no place for paste. pos_in_item=%d",
652 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
660 } else { /* appended item will be in L[0] in whole */
662 struct item_head
*pasted
;
664 if (!item_pos
&& op_is_left_mergeable(B_N_PKEY(tbS0
, 0), tbS0
->b_size
)) { /* if we paste into first item of S[0] and it is left mergable */
665 /* then increment pos_in_item by the size of the last item in L[0] */
667 B_N_PITEM_HEAD(tb
->L
[0],
669 if (is_direntry_le_ih(pasted
))
678 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
680 leaf_shift_left(tb
, tb
->lnum
[0],
682 /* Append to body of item in L[0] */
685 bi
.bi_parent
= tb
->FL
[0];
687 get_left_neighbor_position(tb
, 0);
688 leaf_paste_in_buffer(&bi
,
695 /* if appended item is directory, paste entry */
697 B_N_PITEM_HEAD(tb
->L
[0],
700 if (is_direntry_le_ih(pasted
))
701 leaf_paste_entries(bi
.bi_bh
,
716 /* if appended item is indirect item, put unformatted node into un list */
717 if (is_indirect_le_ih(pasted
))
718 set_ih_free_space(pasted
, 0);
719 tb
->insert_size
[0] = 0;
723 default: /* cases d and t */
724 reiserfs_panic(tb
->tb_sb
,
725 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
727 M_DELETE
) ? "DELETE" : ((flag
==
735 /* new item doesn't fall into L[0] */
736 leaf_shift_left(tb
, tb
->lnum
[0], tb
->lbytes
);
740 /* tb->lnum[0] > 0 */
741 /* Calculate new item position */
742 item_pos
-= (tb
->lnum
[0] - ((tb
->lbytes
!= -1) ? 1 : 0));
744 if (tb
->rnum
[0] > 0) {
745 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
746 n
= B_NR_ITEMS(tbS0
);
749 case M_INSERT
: /* insert item */
750 if (n
- tb
->rnum
[0] < item_pos
) { /* new item or its part falls to R[0] */
751 if (item_pos
== n
- tb
->rnum
[0] + 1 && tb
->rbytes
!= -1) { /* part of new item falls into R[0] */
752 loff_t old_key_comp
, old_len
,
758 leaf_shift_right(tb
, tb
->rnum
[0] - 1,
761 version
= ih_version(ih
);
762 /* Remember key component and item length */
763 old_key_comp
= le_ih_k_offset(ih
);
764 old_len
= ih_item_len(ih
);
766 /* Calculate key component and item length to insert into R[0] */
771 rbytes
) << (is_indirect_le_ih(ih
)
775 set_le_ih_k_offset(ih
, offset
);
776 put_ih_item_len(ih
, tb
->rbytes
);
777 /* Insert part of the item into R[0] */
780 bi
.bi_parent
= tb
->FR
[0];
782 get_right_neighbor_position(tb
, 0);
783 if ((old_len
- tb
->rbytes
) > zeros_num
) {
792 zeros_num
- (old_len
-
794 zeros_num
-= r_zeros_number
;
797 leaf_insert_into_buf(&bi
, 0, ih
, r_body
,
800 /* Replace right delimiting key by first key in R[0] */
801 replace_key(tb
, tb
->CFR
[0], tb
->rkey
[0],
804 /* Calculate key component and item length to insert into S[0] */
805 set_le_ih_k_offset(ih
, old_key_comp
);
807 old_len
- tb
->rbytes
);
809 tb
->insert_size
[0] -= tb
->rbytes
;
811 } else { /* whole new item falls into R[0] */
813 /* Shift rnum[0]-1 items to R[0] */
818 /* Insert new item into R[0] */
821 bi
.bi_parent
= tb
->FR
[0];
823 get_right_neighbor_position(tb
, 0);
824 leaf_insert_into_buf(&bi
,
830 if (item_pos
- n
+ tb
->rnum
[0] - 1 == 0) {
831 replace_key(tb
, tb
->CFR
[0],
836 zeros_num
= tb
->insert_size
[0] = 0;
838 } else { /* new item or part of it doesn't fall into R[0] */
840 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
844 case M_PASTE
: /* append item */
846 if (n
- tb
->rnum
[0] <= item_pos
) { /* pasted item or part of it falls to R[0] */
847 if (item_pos
== n
- tb
->rnum
[0] && tb
->rbytes
!= -1) { /* we must shift the part of the appended item */
848 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0
, item_pos
))) { /* we append to directory item */
852 "PAP-12145: invalid parameter in case of a directory");
854 I_ENTRY_COUNT(B_N_PITEM_HEAD
857 if (entry_count
- tb
->rbytes
<
859 /* new directory entry falls into R[0] */
861 int paste_entry_position
;
863 RFALSE(tb
->rbytes
- 1 >=
867 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
870 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
878 /* Paste given directory entry to directory item */
879 paste_entry_position
=
888 get_right_neighbor_position
892 paste_entry_position
,
896 leaf_paste_entries(bi
.
899 paste_entry_position
,
913 if (paste_entry_position
915 /* change delimiting keys */
929 tb
->insert_size
[0] = 0;
931 } else { /* new directory entry doesn't fall into R[0] */
940 } else { /* regular object */
946 /* Calculate number of bytes which must be shifted from appended item */
949 tb
->insert_size
[0]) < 0)
952 RFALSE(pos_in_item
!=
956 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
965 /* Calculate number of bytes which must remain in body after appending to R[0] */
973 unsigned long temp_rem
=
980 if (is_indirect_le_key
1015 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1016 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
1017 do_balance_mark_internal_dirty
1018 (tb
, tb
->CFR
[0], 0);
1020 /* Append part of body into R[0] */
1022 bi
.bi_bh
= tb
->R
[0];
1023 bi
.bi_parent
= tb
->FR
[0];
1025 get_right_neighbor_position
1027 if (n_rem
> zeros_num
) {
1040 leaf_paste_in_buffer(&bi
, 0,
1049 if (is_indirect_le_ih
1054 "PAP-12160: paste more than one unformatted node pointer");
1060 tb
->insert_size
[0] = n_rem
;
1064 } else { /* pasted item in whole falls into R[0] */
1066 struct item_head
*pasted
;
1069 leaf_shift_right(tb
, tb
->rnum
[0],
1071 /* append item in R[0] */
1072 if (pos_in_item
>= 0) {
1074 bi
.bi_bh
= tb
->R
[0];
1075 bi
.bi_parent
= tb
->FR
[0];
1077 get_right_neighbor_position
1079 leaf_paste_in_buffer(&bi
,
1091 /* paste new entry, if item is directory item */
1093 B_N_PITEM_HEAD(tb
->R
[0],
1096 if (is_direntry_le_ih(pasted
)
1097 && pos_in_item
>= 0) {
1098 leaf_paste_entries(bi
.bi_bh
,
1115 RFALSE(item_pos
- n
+
1117 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1119 /* update delimiting keys */
1128 if (is_indirect_le_ih(pasted
))
1129 set_ih_free_space(pasted
, 0);
1130 zeros_num
= tb
->insert_size
[0] = 0;
1132 } else { /* new item doesn't fall into R[0] */
1134 leaf_shift_right(tb
, tb
->rnum
[0], tb
->rbytes
);
1137 default: /* cases d and t */
1138 reiserfs_panic(tb
->tb_sb
,
1139 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
1141 M_DELETE
) ? "DELETE" : ((flag
==
1149 /* tb->rnum[0] > 0 */
1150 RFALSE(tb
->blknum
[0] > 3,
1151 "PAP-12180: blknum can not be %d. It must be <= 3",
1153 RFALSE(tb
->blknum
[0] < 0,
1154 "PAP-12185: blknum can not be %d. It must be >= 0",
1157 /* if while adding to a node we discover that it is possible to split
1158 it in two, and merge the left part into the left neighbor and the
1159 right part into the right neighbor, eliminating the node */
1160 if (tb
->blknum
[0] == 0) { /* node S[0] is empty now */
1162 RFALSE(!tb
->lnum
[0] || !tb
->rnum
[0],
1163 "PAP-12190: lnum and rnum must not be zero");
1164 /* if insertion was done before 0-th position in R[0], right
1165 delimiting key of the tb->L[0]'s and left delimiting key are
1166 not set correctly */
1169 reiserfs_panic(tb
->tb_sb
,
1170 "vs-12195: balance_leaf: CFR not initialized");
1171 copy_key(B_N_PDELIM_KEY(tb
->CFL
[0], tb
->lkey
[0]),
1172 B_N_PDELIM_KEY(tb
->CFR
[0], tb
->rkey
[0]));
1173 do_balance_mark_internal_dirty(tb
, tb
->CFL
[0], 0);
1176 reiserfs_invalidate_buffer(tb
, tbS0
);
1180 /* Fill new nodes that appear in place of S[0] */
1182 /* I am told that this copying is because we need an array to enable
1183 the looping code. -Hans */
1184 snum
[0] = tb
->s1num
, snum
[1] = tb
->s2num
;
1185 sbytes
[0] = tb
->s1bytes
;
1186 sbytes
[1] = tb
->s2bytes
;
1187 for (i
= tb
->blknum
[0] - 2; i
>= 0; i
--) {
1189 RFALSE(!snum
[i
], "PAP-12200: snum[%d] == %d. Must be > 0", i
,
1192 /* here we shift from S to S_new nodes */
1194 S_new
[i
] = get_FEB(tb
);
1196 /* initialized block type and tree level */
1197 set_blkh_level(B_BLK_HEAD(S_new
[i
]), DISK_LEAF_NODE_LEVEL
);
1199 n
= B_NR_ITEMS(tbS0
);
1202 case M_INSERT
: /* insert item */
1204 if (n
- snum
[i
] < item_pos
) { /* new item or it's part falls to first new node S_new[i] */
1205 if (item_pos
== n
- snum
[i
] + 1 && sbytes
[i
] != -1) { /* part of new item falls into S_new[i] */
1206 int old_key_comp
, old_len
,
1211 /* Move snum[i]-1 items from S[0] to S_new[i] */
1212 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1215 /* Remember key component and item length */
1216 version
= ih_version(ih
);
1217 old_key_comp
= le_ih_k_offset(ih
);
1218 old_len
= ih_item_len(ih
);
1220 /* Calculate key component and item length to insert into S_new[i] */
1221 set_le_ih_k_offset(ih
,
1222 le_ih_k_offset(ih
) +
1231 put_ih_item_len(ih
, sbytes
[i
]);
1233 /* Insert part of the item into S_new[i] before 0-th item */
1235 bi
.bi_bh
= S_new
[i
];
1236 bi
.bi_parent
= NULL
;
1239 if ((old_len
- sbytes
[i
]) > zeros_num
) {
1248 zeros_num
- (old_len
-
1250 zeros_num
-= r_zeros_number
;
1253 leaf_insert_into_buf(&bi
, 0, ih
, r_body
,
1256 /* Calculate key component and item length to insert into S[i] */
1257 set_le_ih_k_offset(ih
, old_key_comp
);
1259 old_len
- sbytes
[i
]);
1260 tb
->insert_size
[0] -= sbytes
[i
];
1261 } else { /* whole new item falls into S_new[i] */
1263 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1264 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1265 snum
[i
] - 1, sbytes
[i
],
1268 /* Insert new item into S_new[i] */
1270 bi
.bi_bh
= S_new
[i
];
1271 bi
.bi_parent
= NULL
;
1273 leaf_insert_into_buf(&bi
,
1278 zeros_num
= tb
->insert_size
[0] = 0;
1282 else { /* new item or it part don't falls into S_new[i] */
1284 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1285 snum
[i
], sbytes
[i
], S_new
[i
]);
1289 case M_PASTE
: /* append item */
1291 if (n
- snum
[i
] <= item_pos
) { /* pasted item or part if it falls to S_new[i] */
1292 if (item_pos
== n
- snum
[i
] && sbytes
[i
] != -1) { /* we must shift part of the appended item */
1293 struct item_head
*aux_ih
;
1295 RFALSE(ih
, "PAP-12210: ih must be 0");
1297 if (is_direntry_le_ih
1299 B_N_PITEM_HEAD(tbS0
, item_pos
))) {
1300 /* we append to directory item */
1305 ih_entry_count(aux_ih
);
1307 if (entry_count
- sbytes
[i
] <
1311 /* new directory entry falls into S_new[i] */
1315 "PAP-12215: insert_size is already 0");
1316 RFALSE(sbytes
[i
] - 1 >=
1318 "PAP-12220: there are no so much entries (%d), only %d",
1322 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1324 (LEAF_FROM_S_TO_SNEW
,
1328 /* Paste given directory entry to directory item */
1330 bi
.bi_bh
= S_new
[i
];
1331 bi
.bi_parent
= NULL
;
1333 leaf_paste_in_buffer
1340 /* paste new directory entry */
1341 leaf_paste_entries(bi
.
1362 tb
->insert_size
[0] = 0;
1364 } else { /* new directory entry doesn't fall into S_new[i] */
1366 (LEAF_FROM_S_TO_SNEW
,
1371 } else { /* regular object */
1377 RFALSE(pos_in_item
!=
1381 || tb
->insert_size
[0] <=
1383 "PAP-12225: item too short or insert_size <= 0");
1385 /* Calculate number of bytes which must be shifted from appended item */
1392 (LEAF_FROM_S_TO_SNEW
, tb
,
1396 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1398 tb
->insert_size
[0] -
1402 /* Append part of body into S_new[0] */
1404 bi
.bi_bh
= S_new
[i
];
1405 bi
.bi_parent
= NULL
;
1408 if (n_rem
> zeros_num
) {
1421 leaf_paste_in_buffer(&bi
, 0,
1430 struct item_head
*tmp
;
1433 B_N_PITEM_HEAD(S_new
1436 if (is_indirect_le_ih
1459 tb
->insert_size
[0] = n_rem
;
1464 /* item falls wholly into S_new[i] */
1467 struct item_head
*pasted
;
1469 #ifdef CONFIG_REISERFS_CHECK
1470 struct item_head
*ih
=
1471 B_N_PITEM_HEAD(tbS0
, item_pos
);
1473 if (!is_direntry_le_ih(ih
)
1474 && (pos_in_item
!= ih_item_len(ih
)
1475 || tb
->insert_size
[0] <= 0))
1476 reiserfs_panic(tb
->tb_sb
,
1477 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1478 #endif /* CONFIG_REISERFS_CHECK */
1481 leaf_move_items(LEAF_FROM_S_TO_SNEW
,
1487 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1490 /* paste into item */
1492 bi
.bi_bh
= S_new
[i
];
1493 bi
.bi_parent
= NULL
;
1495 leaf_paste_in_buffer(&bi
,
1503 B_N_PITEM_HEAD(S_new
[i
],
1506 if (is_direntry_le_ih(pasted
)) {
1507 leaf_paste_entries(bi
.bi_bh
,
1523 /* if we paste to indirect item update ih_free_space */
1524 if (is_indirect_le_ih(pasted
))
1525 set_ih_free_space(pasted
, 0);
1526 zeros_num
= tb
->insert_size
[0] = 0;
1530 else { /* pasted item doesn't fall into S_new[i] */
1532 leaf_move_items(LEAF_FROM_S_TO_SNEW
, tb
,
1533 snum
[i
], sbytes
[i
], S_new
[i
]);
1536 default: /* cases d and t */
1537 reiserfs_panic(tb
->tb_sb
,
1538 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1540 M_DELETE
) ? "DELETE" : ((flag
==
1546 memcpy(insert_key
+ i
, B_N_PKEY(S_new
[i
], 0), KEY_SIZE
);
1547 insert_ptr
[i
] = S_new
[i
];
1549 RFALSE(!buffer_journaled(S_new
[i
])
1550 || buffer_journal_dirty(S_new
[i
])
1551 || buffer_dirty(S_new
[i
]), "PAP-12247: S_new[%d] : (%b)",
1555 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1556 affected item which remains in S */
1557 if (0 <= item_pos
&& item_pos
< tb
->s0num
) { /* if we must insert or append into buffer S[0] */
1560 case M_INSERT
: /* insert item into S[0] */
1563 bi
.bi_parent
= PATH_H_PPARENT(tb
->tb_path
, 0);
1564 bi
.bi_position
= PATH_H_POSITION(tb
->tb_path
, 1);
1565 leaf_insert_into_buf(&bi
, item_pos
, ih
, body
,
1568 /* If we insert the first key change the delimiting key */
1569 if (item_pos
== 0) {
1570 if (tb
->CFL
[0]) /* can be 0 in reiserfsck */
1571 replace_key(tb
, tb
->CFL
[0], tb
->lkey
[0],
1577 case M_PASTE
:{ /* append item in S[0] */
1578 struct item_head
*pasted
;
1580 pasted
= B_N_PITEM_HEAD(tbS0
, item_pos
);
1581 /* when directory, may be new entry already pasted */
1582 if (is_direntry_le_ih(pasted
)) {
1583 if (pos_in_item
>= 0 &&
1585 ih_entry_count(pasted
)) {
1587 RFALSE(!tb
->insert_size
[0],
1588 "PAP-12260: insert_size is 0 already");
1594 PATH_H_PPARENT(tb
->tb_path
,
1597 PATH_H_POSITION(tb
->tb_path
,
1599 leaf_paste_in_buffer(&bi
,
1608 leaf_paste_entries(bi
.bi_bh
,
1621 if (!item_pos
&& !pos_in_item
) {
1624 "PAP-12270: CFL[0]/L[0] must be specified");
1638 tb
->insert_size
[0] = 0;
1640 } else { /* regular object */
1641 if (pos_in_item
== ih_item_len(pasted
)) {
1643 RFALSE(tb
->insert_size
[0] <= 0,
1644 "PAP-12275: insert size must not be %d",
1645 tb
->insert_size
[0]);
1649 PATH_H_PPARENT(tb
->tb_path
,
1652 PATH_H_POSITION(tb
->tb_path
,
1654 leaf_paste_in_buffer(&bi
,
1662 if (is_indirect_le_ih(pasted
)) {
1667 "PAP-12280: insert_size for indirect item must be %d, not %d",
1675 tb
->insert_size
[0] = 0;
1677 #ifdef CONFIG_REISERFS_CHECK
1679 if (tb
->insert_size
[0]) {
1680 print_cur_tb("12285");
1683 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1689 #endif /* CONFIG_REISERFS_CHECK */
1692 } /* case M_PASTE: */
1695 #ifdef CONFIG_REISERFS_CHECK
1696 if (flag
== M_PASTE
&& tb
->insert_size
[0]) {
1697 print_cur_tb("12290");
1698 reiserfs_panic(tb
->tb_sb
,
1699 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1700 tb
->insert_size
[0]);
1702 #endif /* CONFIG_REISERFS_CHECK */
1705 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1707 /* Make empty node */
1708 void make_empty_node(struct buffer_info
*bi
)
1710 struct block_head
*blkh
;
1712 RFALSE(bi
->bi_bh
== NULL
, "PAP-12295: pointer to the buffer is NULL");
1714 blkh
= B_BLK_HEAD(bi
->bi_bh
);
1715 set_blkh_nr_item(blkh
, 0);
1716 set_blkh_free_space(blkh
, MAX_CHILD_SIZE(bi
->bi_bh
));
1719 B_N_CHILD(bi
->bi_parent
, bi
->bi_position
)->dc_size
= 0; /* Endian safe if 0 */
1722 /* Get first empty buffer */
1723 struct buffer_head
*get_FEB(struct tree_balance
*tb
)
1726 struct buffer_head
*first_b
;
1727 struct buffer_info bi
;
1729 for (i
= 0; i
< MAX_FEB_SIZE
; i
++)
1730 if (tb
->FEB
[i
] != 0)
1733 if (i
== MAX_FEB_SIZE
)
1734 reiserfs_panic(tb
->tb_sb
,
1735 "vs-12300: get_FEB: FEB list is empty");
1738 bi
.bi_bh
= first_b
= tb
->FEB
[i
];
1739 bi
.bi_parent
= NULL
;
1741 make_empty_node(&bi
);
1742 set_buffer_uptodate(first_b
);
1744 tb
->used
[i
] = first_b
;
1749 /* This is now used because reiserfs_free_block has to be able to
1752 static void store_thrown(struct tree_balance
*tb
, struct buffer_head
*bh
)
1756 if (buffer_dirty(bh
))
1757 reiserfs_warning(tb
->tb_sb
,
1758 "store_thrown deals with dirty buffer");
1759 for (i
= 0; i
< sizeof(tb
->thrown
) / sizeof(tb
->thrown
[0]); i
++)
1760 if (!tb
->thrown
[i
]) {
1762 get_bh(bh
); /* free_thrown puts this */
1765 reiserfs_warning(tb
->tb_sb
, "store_thrown: too many thrown buffers");
1768 static void free_thrown(struct tree_balance
*tb
)
1771 b_blocknr_t blocknr
;
1772 for (i
= 0; i
< sizeof(tb
->thrown
) / sizeof(tb
->thrown
[0]); i
++) {
1773 if (tb
->thrown
[i
]) {
1774 blocknr
= tb
->thrown
[i
]->b_blocknr
;
1775 if (buffer_dirty(tb
->thrown
[i
]))
1776 reiserfs_warning(tb
->tb_sb
,
1777 "free_thrown deals with dirty buffer %d",
1779 brelse(tb
->thrown
[i
]); /* incremented in store_thrown */
1780 reiserfs_free_block(tb
->transaction_handle
, NULL
,
1786 void reiserfs_invalidate_buffer(struct tree_balance
*tb
, struct buffer_head
*bh
)
1788 struct block_head
*blkh
;
1789 blkh
= B_BLK_HEAD(bh
);
1790 set_blkh_level(blkh
, FREE_LEVEL
);
1791 set_blkh_nr_item(blkh
, 0);
1793 clear_buffer_dirty(bh
);
1794 store_thrown(tb
, bh
);
1797 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1798 void replace_key(struct tree_balance
*tb
, struct buffer_head
*dest
, int n_dest
,
1799 struct buffer_head
*src
, int n_src
)
1802 RFALSE(dest
== NULL
|| src
== NULL
,
1803 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1805 RFALSE(!B_IS_KEYS_LEVEL(dest
),
1806 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1808 RFALSE(n_dest
< 0 || n_src
< 0,
1809 "vs-12315: src(%d) or dest(%d) key number < 0", n_src
, n_dest
);
1810 RFALSE(n_dest
>= B_NR_ITEMS(dest
) || n_src
>= B_NR_ITEMS(src
),
1811 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1812 n_src
, B_NR_ITEMS(src
), n_dest
, B_NR_ITEMS(dest
));
1814 if (B_IS_ITEMS_LEVEL(src
))
1815 /* source buffer contains leaf node */
1816 memcpy(B_N_PDELIM_KEY(dest
, n_dest
), B_N_PITEM_HEAD(src
, n_src
),
1819 memcpy(B_N_PDELIM_KEY(dest
, n_dest
), B_N_PDELIM_KEY(src
, n_src
),
1822 do_balance_mark_internal_dirty(tb
, dest
, 0);
1825 int get_left_neighbor_position(struct tree_balance
*tb
, int h
)
1827 int Sh_position
= PATH_H_POSITION(tb
->tb_path
, h
+ 1);
1829 RFALSE(PATH_H_PPARENT(tb
->tb_path
, h
) == 0 || tb
->FL
[h
] == 0,
1830 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1831 h
, tb
->FL
[h
], h
, PATH_H_PPARENT(tb
->tb_path
, h
));
1833 if (Sh_position
== 0)
1834 return B_NR_ITEMS(tb
->FL
[h
]);
1836 return Sh_position
- 1;
1839 int get_right_neighbor_position(struct tree_balance
*tb
, int h
)
1841 int Sh_position
= PATH_H_POSITION(tb
->tb_path
, h
+ 1);
1843 RFALSE(PATH_H_PPARENT(tb
->tb_path
, h
) == 0 || tb
->FR
[h
] == 0,
1844 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1845 h
, PATH_H_PPARENT(tb
->tb_path
, h
), h
, tb
->FR
[h
]);
1847 if (Sh_position
== B_NR_ITEMS(PATH_H_PPARENT(tb
->tb_path
, h
)))
1850 return Sh_position
+ 1;
1853 #ifdef CONFIG_REISERFS_CHECK
1855 int is_reusable(struct super_block
*s
, b_blocknr_t block
, int bit_value
);
1856 static void check_internal_node(struct super_block
*s
, struct buffer_head
*bh
,
1859 struct disk_child
*dc
;
1862 RFALSE(!bh
, "PAP-12336: bh == 0");
1864 if (!bh
|| !B_IS_IN_TREE(bh
))
1867 RFALSE(!buffer_dirty(bh
) &&
1868 !(buffer_journaled(bh
) || buffer_journal_dirty(bh
)),
1869 "PAP-12337: buffer (%b) must be dirty", bh
);
1870 dc
= B_N_CHILD(bh
, 0);
1872 for (i
= 0; i
<= B_NR_ITEMS(bh
); i
++, dc
++) {
1873 if (!is_reusable(s
, dc_block_number(dc
), 1)) {
1876 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1882 static int locked_or_not_in_tree(struct buffer_head
*bh
, char *which
)
1884 if ((!buffer_journal_prepared(bh
) && buffer_locked(bh
)) ||
1885 !B_IS_IN_TREE(bh
)) {
1886 reiserfs_warning(NULL
,
1887 "vs-12339: locked_or_not_in_tree: %s (%b)",
1894 static int check_before_balancing(struct tree_balance
*tb
)
1899 reiserfs_panic(tb
->tb_sb
, "vs-12335: check_before_balancing: "
1900 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1901 "do_balance cannot properly handle schedule occurring while it runs.");
1904 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1905 prepped all of these for us). */
1907 retval
|= locked_or_not_in_tree(tb
->L
[0], "L[0]");
1908 retval
|= locked_or_not_in_tree(tb
->FL
[0], "FL[0]");
1909 retval
|= locked_or_not_in_tree(tb
->CFL
[0], "CFL[0]");
1910 check_leaf(tb
->L
[0]);
1913 retval
|= locked_or_not_in_tree(tb
->R
[0], "R[0]");
1914 retval
|= locked_or_not_in_tree(tb
->FR
[0], "FR[0]");
1915 retval
|= locked_or_not_in_tree(tb
->CFR
[0], "CFR[0]");
1916 check_leaf(tb
->R
[0]);
1918 retval
|= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb
->tb_path
), "S[0]");
1919 check_leaf(PATH_PLAST_BUFFER(tb
->tb_path
));
1924 static void check_after_balance_leaf(struct tree_balance
*tb
)
1927 if (B_FREE_SPACE(tb
->L
[0]) !=
1928 MAX_CHILD_SIZE(tb
->L
[0]) -
1930 (tb
->FL
[0], get_left_neighbor_position(tb
, 0)))) {
1931 print_cur_tb("12221");
1932 reiserfs_panic(tb
->tb_sb
,
1933 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1937 if (B_FREE_SPACE(tb
->R
[0]) !=
1938 MAX_CHILD_SIZE(tb
->R
[0]) -
1940 (tb
->FR
[0], get_right_neighbor_position(tb
, 0)))) {
1941 print_cur_tb("12222");
1942 reiserfs_panic(tb
->tb_sb
,
1943 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1946 if (PATH_H_PBUFFER(tb
->tb_path
, 1) &&
1947 (B_FREE_SPACE(PATH_H_PBUFFER(tb
->tb_path
, 0)) !=
1948 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)) -
1949 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb
->tb_path
, 1),
1950 PATH_H_POSITION(tb
->tb_path
, 1)))))) {
1951 int left
= B_FREE_SPACE(PATH_H_PBUFFER(tb
->tb_path
, 0));
1952 int right
= (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)) -
1953 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb
->tb_path
, 1),
1954 PATH_H_POSITION(tb
->tb_path
,
1956 print_cur_tb("12223");
1957 reiserfs_warning(tb
->tb_sb
,
1958 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1959 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1961 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb
->tb_path
, 0)),
1962 PATH_H_PBUFFER(tb
->tb_path
, 1),
1963 PATH_H_POSITION(tb
->tb_path
, 1),
1965 (PATH_H_PBUFFER(tb
->tb_path
, 1),
1966 PATH_H_POSITION(tb
->tb_path
, 1))),
1968 reiserfs_panic(tb
->tb_sb
,
1969 "PAP-12365: check_after_balance_leaf: S is incorrect");
1973 static void check_leaf_level(struct tree_balance
*tb
)
1975 check_leaf(tb
->L
[0]);
1976 check_leaf(tb
->R
[0]);
1977 check_leaf(PATH_PLAST_BUFFER(tb
->tb_path
));
1980 static void check_internal_levels(struct tree_balance
*tb
)
1984 /* check all internal nodes */
1985 for (h
= 1; tb
->insert_size
[h
]; h
++) {
1986 check_internal_node(tb
->tb_sb
, PATH_H_PBUFFER(tb
->tb_path
, h
),
1987 "BAD BUFFER ON PATH");
1989 check_internal_node(tb
->tb_sb
, tb
->L
[h
], "BAD L");
1991 check_internal_node(tb
->tb_sb
, tb
->R
[h
], "BAD R");
1998 /* Now we have all of the buffers that must be used in balancing of
1999 the tree. We rely on the assumption that schedule() will not occur
2000 while do_balance works. ( Only interrupt handlers are acceptable.)
2001 We balance the tree according to the analysis made before this,
2002 using buffers already obtained. For SMP support it will someday be
2003 necessary to add ordered locking of tb. */
2005 /* Some interesting rules of balancing:
2007 we delete a maximum of two nodes per level per balancing: we never
2008 delete R, when we delete two of three nodes L, S, R then we move
2011 we only delete L if we are deleting two nodes, if we delete only
2012 one node we delete S
2014 if we shift leaves then we shift as much as we can: this is a
2015 deliberate policy of extremism in node packing which results in
2016 higher average utilization after repeated random balance operations
2017 at the cost of more memory copies and more balancing as a result of
2018 small insertions to full nodes.
2020 if we shift internal nodes we try to evenly balance the node
2021 utilization, with consequent less balancing at the cost of lower
2024 one could argue that the policy for directories in leaves should be
2025 that of internal nodes, but we will wait until another day to
2026 evaluate this.... It would be nice to someday measure and prove
2027 these assumptions as to what is optimal....
2031 static inline void do_balance_starts(struct tree_balance
*tb
)
2033 /* use print_cur_tb() to see initial state of struct
2036 /* store_print_tb (tb); */
2038 /* do not delete, just comment it out */
2039 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2041 RFALSE(check_before_balancing(tb
), "PAP-12340: locked buffers in TB");
2042 #ifdef CONFIG_REISERFS_CHECK
2047 static inline void do_balance_completed(struct tree_balance
*tb
)
2050 #ifdef CONFIG_REISERFS_CHECK
2051 check_leaf_level(tb
);
2052 check_internal_levels(tb
);
2056 /* reiserfs_free_block is no longer schedule safe. So, we need to
2057 ** put the buffers we want freed on the thrown list during do_balance,
2058 ** and then free them now
2061 REISERFS_SB(tb
->tb_sb
)->s_do_balance
++;
2063 /* release all nodes hold to perform the balancing */
2069 void do_balance(struct tree_balance
*tb
, /* tree_balance structure */
2070 struct item_head
*ih
, /* item header of inserted item */
2071 const char *body
, /* body of inserted item or bytes to paste */
2073 { /* i - insert, d - delete
2076 Cut means delete part of an item
2077 (includes removing an entry from a
2080 Delete means delete whole item.
2082 Insert means add a new item into the
2085 Paste means to append to the end of an
2086 existing file or to insert a directory
2088 int child_pos
, /* position of a child node in its parent */
2089 h
; /* level of the tree being processed */
2090 struct item_head insert_key
[2]; /* in our processing of one level
2091 we sometimes determine what
2092 must be inserted into the next
2093 higher level. This insertion
2094 consists of a key or two keys
2095 and their corresponding
2097 struct buffer_head
*insert_ptr
[2]; /* inserted node-ptrs for the next
2101 tb
->need_balance_dirty
= 0;
2103 if (FILESYSTEM_CHANGED_TB(tb
)) {
2104 reiserfs_panic(tb
->tb_sb
,
2105 "clm-6000: do_balance, fs generation has changed\n");
2107 /* if we have no real work to do */
2108 if (!tb
->insert_size
[0]) {
2109 reiserfs_warning(tb
->tb_sb
,
2110 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2116 atomic_inc(&(fs_generation(tb
->tb_sb
)));
2117 do_balance_starts(tb
);
2119 /* balance leaf returns 0 except if combining L R and S into
2120 one node. see balance_internal() for explanation of this
2122 child_pos
= PATH_H_B_ITEM_ORDER(tb
->tb_path
, 0) +
2123 balance_leaf(tb
, ih
, body
, flag
, insert_key
, insert_ptr
);
2125 #ifdef CONFIG_REISERFS_CHECK
2126 check_after_balance_leaf(tb
);
2129 /* Balance internal level of the tree. */
2130 for (h
= 1; h
< MAX_HEIGHT
&& tb
->insert_size
[h
]; h
++)
2132 balance_internal(tb
, h
, child_pos
, insert_key
, insert_ptr
);
2134 do_balance_completed(tb
);