Linux 2.6.13-rc4
[linux-2.6/next.git] / fs / reiserfs / ibalance.c
blob6c5a726fd34bf8920da792af32563bc6a47bbc81
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 #include <linux/config.h>
6 #include <asm/uaccess.h>
7 #include <linux/string.h>
8 #include <linux/time.h>
9 #include <linux/reiserfs_fs.h>
10 #include <linux/buffer_head.h>
12 /* this is one and only function that is used outside (do_balance.c) */
13 int balance_internal(struct tree_balance *,
14 int, int, struct item_head *, struct buffer_head **);
16 /* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
17 #define INTERNAL_SHIFT_FROM_S_TO_L 0
18 #define INTERNAL_SHIFT_FROM_R_TO_S 1
19 #define INTERNAL_SHIFT_FROM_L_TO_S 2
20 #define INTERNAL_SHIFT_FROM_S_TO_R 3
21 #define INTERNAL_INSERT_TO_S 4
22 #define INTERNAL_INSERT_TO_L 5
23 #define INTERNAL_INSERT_TO_R 6
25 static void internal_define_dest_src_infos(int shift_mode,
26 struct tree_balance *tb,
27 int h,
28 struct buffer_info *dest_bi,
29 struct buffer_info *src_bi,
30 int *d_key, struct buffer_head **cf)
32 memset(dest_bi, 0, sizeof(struct buffer_info));
33 memset(src_bi, 0, sizeof(struct buffer_info));
34 /* define dest, src, dest parent, dest position */
35 switch (shift_mode) {
36 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */
37 src_bi->tb = tb;
38 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
39 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
40 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
41 dest_bi->tb = tb;
42 dest_bi->bi_bh = tb->L[h];
43 dest_bi->bi_parent = tb->FL[h];
44 dest_bi->bi_position = get_left_neighbor_position(tb, h);
45 *d_key = tb->lkey[h];
46 *cf = tb->CFL[h];
47 break;
48 case INTERNAL_SHIFT_FROM_L_TO_S:
49 src_bi->tb = tb;
50 src_bi->bi_bh = tb->L[h];
51 src_bi->bi_parent = tb->FL[h];
52 src_bi->bi_position = get_left_neighbor_position(tb, h);
53 dest_bi->tb = tb;
54 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
55 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
56 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */
57 *d_key = tb->lkey[h];
58 *cf = tb->CFL[h];
59 break;
61 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */
62 src_bi->tb = tb;
63 src_bi->bi_bh = tb->R[h];
64 src_bi->bi_parent = tb->FR[h];
65 src_bi->bi_position = get_right_neighbor_position(tb, h);
66 dest_bi->tb = tb;
67 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
68 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
69 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
70 *d_key = tb->rkey[h];
71 *cf = tb->CFR[h];
72 break;
74 case INTERNAL_SHIFT_FROM_S_TO_R:
75 src_bi->tb = tb;
76 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
77 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
78 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
79 dest_bi->tb = tb;
80 dest_bi->bi_bh = tb->R[h];
81 dest_bi->bi_parent = tb->FR[h];
82 dest_bi->bi_position = get_right_neighbor_position(tb, h);
83 *d_key = tb->rkey[h];
84 *cf = tb->CFR[h];
85 break;
87 case INTERNAL_INSERT_TO_L:
88 dest_bi->tb = tb;
89 dest_bi->bi_bh = tb->L[h];
90 dest_bi->bi_parent = tb->FL[h];
91 dest_bi->bi_position = get_left_neighbor_position(tb, h);
92 break;
94 case INTERNAL_INSERT_TO_S:
95 dest_bi->tb = tb;
96 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
97 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
98 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
99 break;
101 case INTERNAL_INSERT_TO_R:
102 dest_bi->tb = tb;
103 dest_bi->bi_bh = tb->R[h];
104 dest_bi->bi_parent = tb->FR[h];
105 dest_bi->bi_position = get_right_neighbor_position(tb, h);
106 break;
108 default:
109 reiserfs_panic(tb->tb_sb,
110 "internal_define_dest_src_infos: shift type is unknown (%d)",
111 shift_mode);
115 /* Insert count node pointers into buffer cur before position to + 1.
116 * Insert count items into buffer cur before position to.
117 * Items and node pointers are specified by inserted and bh respectively.
119 static void internal_insert_childs(struct buffer_info *cur_bi,
120 int to, int count,
121 struct item_head *inserted,
122 struct buffer_head **bh)
124 struct buffer_head *cur = cur_bi->bi_bh;
125 struct block_head *blkh;
126 int nr;
127 struct reiserfs_key *ih;
128 struct disk_child new_dc[2];
129 struct disk_child *dc;
130 int i;
132 if (count <= 0)
133 return;
135 blkh = B_BLK_HEAD(cur);
136 nr = blkh_nr_item(blkh);
138 RFALSE(count > 2, "too many children (%d) are to be inserted", count);
139 RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE),
140 "no enough free space (%d), needed %d bytes",
141 B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE));
143 /* prepare space for count disk_child */
144 dc = B_N_CHILD(cur, to + 1);
146 memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE);
148 /* copy to_be_insert disk children */
149 for (i = 0; i < count; i++) {
150 put_dc_size(&(new_dc[i]),
151 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
152 put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr);
154 memcpy(dc, new_dc, DC_SIZE * count);
156 /* prepare space for count items */
157 ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to));
159 memmove(ih + count, ih,
160 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
162 /* copy item headers (keys) */
163 memcpy(ih, inserted, KEY_SIZE);
164 if (count > 1)
165 memcpy(ih + 1, inserted + 1, KEY_SIZE);
167 /* sizes, item number */
168 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count);
169 set_blkh_free_space(blkh,
170 blkh_free_space(blkh) - count * (DC_SIZE +
171 KEY_SIZE));
173 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
175 /*&&&&&&&&&&&&&&&&&&&&&&&& */
176 check_internal(cur);
177 /*&&&&&&&&&&&&&&&&&&&&&&&& */
179 if (cur_bi->bi_parent) {
180 struct disk_child *t_dc =
181 B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
182 put_dc_size(t_dc,
183 dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE)));
184 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
187 /*&&&&&&&&&&&&&&&&&&&&&&&& */
188 check_internal(cur_bi->bi_parent);
189 /*&&&&&&&&&&&&&&&&&&&&&&&& */
194 /* Delete del_num items and node pointers from buffer cur starting from *
195 * the first_i'th item and first_p'th pointers respectively. */
196 static void internal_delete_pointers_items(struct buffer_info *cur_bi,
197 int first_p,
198 int first_i, int del_num)
200 struct buffer_head *cur = cur_bi->bi_bh;
201 int nr;
202 struct block_head *blkh;
203 struct reiserfs_key *key;
204 struct disk_child *dc;
206 RFALSE(cur == NULL, "buffer is 0");
207 RFALSE(del_num < 0,
208 "negative number of items (%d) can not be deleted", del_num);
209 RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1
210 || first_i < 0,
211 "first pointer order (%d) < 0 or "
212 "no so many pointers (%d), only (%d) or "
213 "first key order %d < 0", first_p, first_p + del_num,
214 B_NR_ITEMS(cur) + 1, first_i);
215 if (del_num == 0)
216 return;
218 blkh = B_BLK_HEAD(cur);
219 nr = blkh_nr_item(blkh);
221 if (first_p == 0 && del_num == nr + 1) {
222 RFALSE(first_i != 0,
223 "1st deleted key must have order 0, not %d", first_i);
224 make_empty_node(cur_bi);
225 return;
228 RFALSE(first_i + del_num > B_NR_ITEMS(cur),
229 "first_i = %d del_num = %d "
230 "no so many keys (%d) in the node (%b)(%z)",
231 first_i, del_num, first_i + del_num, cur, cur);
233 /* deleting */
234 dc = B_N_CHILD(cur, first_p);
236 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
237 key = B_N_PDELIM_KEY(cur, first_i);
238 memmove(key, key + del_num,
239 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
240 del_num) * DC_SIZE);
242 /* sizes, item number */
243 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
244 set_blkh_free_space(blkh,
245 blkh_free_space(blkh) +
246 (del_num * (KEY_SIZE + DC_SIZE)));
248 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
249 /*&&&&&&&&&&&&&&&&&&&&&&& */
250 check_internal(cur);
251 /*&&&&&&&&&&&&&&&&&&&&&&& */
253 if (cur_bi->bi_parent) {
254 struct disk_child *t_dc;
255 t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
256 put_dc_size(t_dc,
257 dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE)));
259 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
261 /*&&&&&&&&&&&&&&&&&&&&&&&& */
262 check_internal(cur_bi->bi_parent);
263 /*&&&&&&&&&&&&&&&&&&&&&&&& */
267 /* delete n node pointers and items starting from given position */
268 static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
270 int i_from;
272 i_from = (from == 0) ? from : from - 1;
274 /* delete n pointers starting from `from' position in CUR;
275 delete n keys starting from 'i_from' position in CUR;
277 internal_delete_pointers_items(cur_bi, from, i_from, n);
280 /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
281 * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
282 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
284 static void internal_copy_pointers_items(struct buffer_info *dest_bi,
285 struct buffer_head *src,
286 int last_first, int cpy_num)
288 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
289 * as delimiting key have already inserted to buffer dest.*/
290 struct buffer_head *dest = dest_bi->bi_bh;
291 int nr_dest, nr_src;
292 int dest_order, src_order;
293 struct block_head *blkh;
294 struct reiserfs_key *key;
295 struct disk_child *dc;
297 nr_src = B_NR_ITEMS(src);
299 RFALSE(dest == NULL || src == NULL,
300 "src (%p) or dest (%p) buffer is 0", src, dest);
301 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
302 "invalid last_first parameter (%d)", last_first);
303 RFALSE(nr_src < cpy_num - 1,
304 "no so many items (%d) in src (%d)", cpy_num, nr_src);
305 RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num);
306 RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest),
307 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)",
308 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
310 if (cpy_num == 0)
311 return;
313 /* coping */
314 blkh = B_BLK_HEAD(dest);
315 nr_dest = blkh_nr_item(blkh);
317 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */
318 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */
319 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order =
320 nr_src - cpy_num + 1) : (dest_order =
321 nr_dest,
322 src_order =
325 /* prepare space for cpy_num pointers */
326 dc = B_N_CHILD(dest, dest_order);
328 memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
330 /* insert pointers */
331 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
333 /* prepare space for cpy_num - 1 item headers */
334 key = B_N_PDELIM_KEY(dest, dest_order);
335 memmove(key + cpy_num - 1, key,
336 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
337 cpy_num));
339 /* insert headers */
340 memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1));
342 /* sizes, item number */
343 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
344 set_blkh_free_space(blkh,
345 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) +
346 DC_SIZE * cpy_num));
348 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
350 /*&&&&&&&&&&&&&&&&&&&&&&&& */
351 check_internal(dest);
352 /*&&&&&&&&&&&&&&&&&&&&&&&& */
354 if (dest_bi->bi_parent) {
355 struct disk_child *t_dc;
356 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
357 put_dc_size(t_dc,
358 dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) +
359 DC_SIZE * cpy_num));
361 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
363 /*&&&&&&&&&&&&&&&&&&&&&&&& */
364 check_internal(dest_bi->bi_parent);
365 /*&&&&&&&&&&&&&&&&&&&&&&&& */
370 /* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
371 * Delete cpy_num - del_par items and node pointers from buffer src.
372 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
373 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
375 static void internal_move_pointers_items(struct buffer_info *dest_bi,
376 struct buffer_info *src_bi,
377 int last_first, int cpy_num,
378 int del_par)
380 int first_pointer;
381 int first_item;
383 internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first,
384 cpy_num);
386 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
387 first_pointer = 0;
388 first_item = 0;
389 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
390 for key - with first_item */
391 internal_delete_pointers_items(src_bi, first_pointer,
392 first_item, cpy_num - del_par);
393 } else { /* shift_right occurs */
394 int i, j;
396 i = (cpy_num - del_par ==
397 (j =
398 B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num +
399 del_par;
401 internal_delete_pointers_items(src_bi,
402 j + 1 - cpy_num + del_par, i,
403 cpy_num - del_par);
407 /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
408 static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */
409 struct buffer_head *src, int src_position)
411 struct buffer_head *dest = dest_bi->bi_bh;
412 int nr;
413 struct block_head *blkh;
414 struct reiserfs_key *key;
416 RFALSE(dest == NULL || src == NULL,
417 "source(%p) or dest(%p) buffer is 0", src, dest);
418 RFALSE(dest_position_before < 0 || src_position < 0,
419 "source(%d) or dest(%d) key number less than 0",
420 src_position, dest_position_before);
421 RFALSE(dest_position_before > B_NR_ITEMS(dest) ||
422 src_position >= B_NR_ITEMS(src),
423 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
424 dest_position_before, B_NR_ITEMS(dest),
425 src_position, B_NR_ITEMS(src));
426 RFALSE(B_FREE_SPACE(dest) < KEY_SIZE,
427 "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest));
429 blkh = B_BLK_HEAD(dest);
430 nr = blkh_nr_item(blkh);
432 /* prepare space for inserting key */
433 key = B_N_PDELIM_KEY(dest, dest_position_before);
434 memmove(key + 1, key,
435 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
437 /* insert key */
438 memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
440 /* Change dirt, free space, item number fields. */
442 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
443 set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE);
445 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
447 if (dest_bi->bi_parent) {
448 struct disk_child *t_dc;
449 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
450 put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE);
452 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
457 /* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
458 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
459 * Replace d_key'th key in buffer cfl.
460 * Delete pointer_amount items and node pointers from buffer src.
462 /* this can be invoked both to shift from S to L and from R to S */
463 static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
464 struct tree_balance *tb,
465 int h, int pointer_amount)
467 struct buffer_info dest_bi, src_bi;
468 struct buffer_head *cf;
469 int d_key_position;
471 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
472 &d_key_position, &cf);
474 /*printk("pointer_amount = %d\n",pointer_amount); */
476 if (pointer_amount) {
477 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
478 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
479 d_key_position);
481 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
482 if (src_bi.bi_position /*src->b_item_order */ == 0)
483 replace_key(tb, cf, d_key_position,
484 src_bi.
485 bi_parent /*src->b_parent */ , 0);
486 } else
487 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
488 pointer_amount - 1);
490 /* last parameter is del_parameter */
491 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
492 pointer_amount, 0);
496 /* Insert delimiting key to L[h].
497 * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
498 * Delete n - 1 items and node pointers from buffer S[h].
500 /* it always shifts from S[h] to L[h] */
501 static void internal_shift1_left(struct tree_balance *tb,
502 int h, int pointer_amount)
504 struct buffer_info dest_bi, src_bi;
505 struct buffer_head *cf;
506 int d_key_position;
508 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
509 &dest_bi, &src_bi, &d_key_position, &cf);
511 if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
512 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
513 d_key_position);
514 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
516 /* last parameter is del_parameter */
517 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
518 pointer_amount, 1);
519 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
522 /* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
523 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
524 * Replace d_key'th key in buffer cfr.
525 * Delete n items and node pointers from buffer src.
527 static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
528 struct tree_balance *tb,
529 int h, int pointer_amount)
531 struct buffer_info dest_bi, src_bi;
532 struct buffer_head *cf;
533 int d_key_position;
534 int nr;
536 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
537 &d_key_position, &cf);
539 nr = B_NR_ITEMS(src_bi.bi_bh);
541 if (pointer_amount > 0) {
542 /* insert delimiting key from common father of dest and src to dest node into position 0 */
543 internal_insert_key(&dest_bi, 0, cf, d_key_position);
544 if (nr == pointer_amount - 1) {
545 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
546 dest_bi.bi_bh != tb->R[h],
547 "src (%p) must be == tb->S[h](%p) when it disappears",
548 src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h));
549 /* when S[h] disappers replace left delemiting key as well */
550 if (tb->CFL[h])
551 replace_key(tb, cf, d_key_position, tb->CFL[h],
552 tb->lkey[h]);
553 } else
554 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
555 nr - pointer_amount);
558 /* last parameter is del_parameter */
559 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
560 pointer_amount, 0);
563 /* Insert delimiting key to R[h].
564 * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
565 * Delete n - 1 items and node pointers from buffer S[h].
567 /* it always shift from S[h] to R[h] */
568 static void internal_shift1_right(struct tree_balance *tb,
569 int h, int pointer_amount)
571 struct buffer_info dest_bi, src_bi;
572 struct buffer_head *cf;
573 int d_key_position;
575 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
576 &dest_bi, &src_bi, &d_key_position, &cf);
578 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */
579 internal_insert_key(&dest_bi, 0, cf, d_key_position);
580 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
582 /* last parameter is del_parameter */
583 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
584 pointer_amount, 1);
585 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
588 /* Delete insert_num node pointers together with their left items
589 * and balance current node.*/
590 static void balance_internal_when_delete(struct tree_balance *tb,
591 int h, int child_pos)
593 int insert_num;
594 int n;
595 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
596 struct buffer_info bi;
598 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
600 /* delete child-node-pointer(s) together with their left item(s) */
601 bi.tb = tb;
602 bi.bi_bh = tbSh;
603 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
604 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
606 internal_delete_childs(&bi, child_pos, -insert_num);
608 RFALSE(tb->blknum[h] > 1,
609 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]);
611 n = B_NR_ITEMS(tbSh);
613 if (tb->lnum[h] == 0 && tb->rnum[h] == 0) {
614 if (tb->blknum[h] == 0) {
615 /* node S[h] (root of the tree) is empty now */
616 struct buffer_head *new_root;
618 RFALSE(n
619 || B_FREE_SPACE(tbSh) !=
620 MAX_CHILD_SIZE(tbSh) - DC_SIZE,
621 "buffer must have only 0 keys (%d)", n);
622 RFALSE(bi.bi_parent, "root has parent (%p)",
623 bi.bi_parent);
625 /* choose a new root */
626 if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1]))
627 new_root = tb->R[h - 1];
628 else
629 new_root = tb->L[h - 1];
630 /* switch super block's tree root block number to the new value */
631 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
632 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --;
633 PUT_SB_TREE_HEIGHT(tb->tb_sb,
634 SB_TREE_HEIGHT(tb->tb_sb) - 1);
636 do_balance_mark_sb_dirty(tb,
637 REISERFS_SB(tb->tb_sb)->s_sbh,
639 /*&&&&&&&&&&&&&&&&&&&&&& */
640 if (h > 1)
641 /* use check_internal if new root is an internal node */
642 check_internal(new_root);
643 /*&&&&&&&&&&&&&&&&&&&&&& */
645 /* do what is needed for buffer thrown from tree */
646 reiserfs_invalidate_buffer(tb, tbSh);
647 return;
649 return;
652 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */
654 RFALSE(tb->rnum[h] != 0,
655 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
656 h, tb->rnum[h]);
658 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
659 reiserfs_invalidate_buffer(tb, tbSh);
661 return;
664 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */
665 RFALSE(tb->lnum[h] != 0,
666 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
667 h, tb->lnum[h]);
669 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
671 reiserfs_invalidate_buffer(tb, tbSh);
672 return;
675 if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */
676 RFALSE(tb->rnum[h] != 0,
677 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
678 tb->rnum[h]);
679 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
680 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
681 -tb->lnum[h]);
682 return;
685 if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */
686 RFALSE(tb->lnum[h] != 0,
687 "invalid tb->lnum[%d]==%d when borrow from R[h]",
688 h, tb->lnum[h]);
689 internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */
690 return;
693 if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */
694 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
695 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
696 h, tb->lnum[h], h, tb->rnum[h], n);
698 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */
699 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
700 tb->rnum[h]);
702 reiserfs_invalidate_buffer(tb, tbSh);
704 return;
706 reiserfs_panic(tb->tb_sb,
707 "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
708 h, tb->lnum[h], h, tb->rnum[h]);
711 /* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
712 static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
714 RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL,
715 "L[h](%p) and CFL[h](%p) must exist in replace_lkey",
716 tb->L[h], tb->CFL[h]);
718 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
719 return;
721 memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
723 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
726 /* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
727 static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
729 RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL,
730 "R[h](%p) and CFR[h](%p) must exist in replace_rkey",
731 tb->R[h], tb->CFR[h]);
732 RFALSE(B_NR_ITEMS(tb->R[h]) == 0,
733 "R[h] can not be empty if it exists (item number=%d)",
734 B_NR_ITEMS(tb->R[h]));
736 memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
738 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
741 int balance_internal(struct tree_balance *tb, /* tree_balance structure */
742 int h, /* level of the tree */
743 int child_pos, struct item_head *insert_key, /* key for insertion on higher level */
744 struct buffer_head **insert_ptr /* node for insertion on higher level */
746 /* if inserting/pasting
748 child_pos is the position of the node-pointer in S[h] that *
749 pointed to S[h-1] before balancing of the h-1 level; *
750 this means that new pointers and items must be inserted AFTER *
751 child_pos
753 else
755 it is the position of the leftmost pointer that must be deleted (together with
756 its corresponding key to the left of the pointer)
757 as a result of the previous level's balancing.
761 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
762 struct buffer_info bi;
763 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
764 int insert_num, n, k;
765 struct buffer_head *S_new;
766 struct item_head new_insert_key;
767 struct buffer_head *new_insert_ptr = NULL;
768 struct item_head *new_insert_key_addr = insert_key;
770 RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h);
772 PROC_INFO_INC(tb->tb_sb, balance_at[h]);
774 order =
775 (tbSh) ? PATH_H_POSITION(tb->tb_path,
776 h + 1) /*tb->S[h]->b_item_order */ : 0;
778 /* Using insert_size[h] calculate the number insert_num of items
779 that must be inserted to or deleted from S[h]. */
780 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
782 /* Check whether insert_num is proper * */
783 RFALSE(insert_num < -2 || insert_num > 2,
784 "incorrect number of items inserted to the internal node (%d)",
785 insert_num);
786 RFALSE(h > 1 && (insert_num > 1 || insert_num < -1),
787 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level",
788 insert_num, h);
790 /* Make balance in case insert_num < 0 */
791 if (insert_num < 0) {
792 balance_internal_when_delete(tb, h, child_pos);
793 return order;
796 k = 0;
797 if (tb->lnum[h] > 0) {
798 /* shift lnum[h] items from S[h] to the left neighbor L[h].
799 check how many of new items fall into L[h] or CFL[h] after
800 shifting */
801 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
802 if (tb->lnum[h] <= child_pos) {
803 /* new items don't fall into L[h] or CFL[h] */
804 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
805 tb->lnum[h]);
806 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
807 child_pos -= tb->lnum[h];
808 } else if (tb->lnum[h] > child_pos + insert_num) {
809 /* all new items fall into L[h] */
810 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
811 tb->lnum[h] - insert_num);
812 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
813 tb->lnum[h]-insert_num);
815 /* insert insert_num keys and node-pointers into L[h] */
816 bi.tb = tb;
817 bi.bi_bh = tb->L[h];
818 bi.bi_parent = tb->FL[h];
819 bi.bi_position = get_left_neighbor_position(tb, h);
820 internal_insert_childs(&bi,
821 /*tb->L[h], tb->S[h-1]->b_next */
822 n + child_pos + 1,
823 insert_num, insert_key,
824 insert_ptr);
826 insert_num = 0;
827 } else {
828 struct disk_child *dc;
830 /* some items fall into L[h] or CFL[h], but some don't fall */
831 internal_shift1_left(tb, h, child_pos + 1);
832 /* calculate number of new items that fall into L[h] */
833 k = tb->lnum[h] - child_pos - 1;
834 bi.tb = tb;
835 bi.bi_bh = tb->L[h];
836 bi.bi_parent = tb->FL[h];
837 bi.bi_position = get_left_neighbor_position(tb, h);
838 internal_insert_childs(&bi,
839 /*tb->L[h], tb->S[h-1]->b_next, */
840 n + child_pos + 1, k,
841 insert_key, insert_ptr);
843 replace_lkey(tb, h, insert_key + k);
845 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
846 dc = B_N_CHILD(tbSh, 0);
847 put_dc_size(dc,
848 MAX_CHILD_SIZE(insert_ptr[k]) -
849 B_FREE_SPACE(insert_ptr[k]));
850 put_dc_block_number(dc, insert_ptr[k]->b_blocknr);
852 do_balance_mark_internal_dirty(tb, tbSh, 0);
854 k++;
855 insert_key += k;
856 insert_ptr += k;
857 insert_num -= k;
858 child_pos = 0;
861 /* tb->lnum[h] > 0 */
862 if (tb->rnum[h] > 0) {
863 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
864 /* check how many of new items fall into R or CFR after shifting */
865 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
866 if (n - tb->rnum[h] >= child_pos)
867 /* new items fall into S[h] */
868 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
869 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
870 tb->rnum[h]);
871 else if (n + insert_num - tb->rnum[h] < child_pos) {
872 /* all new items fall into R[h] */
873 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
874 tb->rnum[h] - insert_num); */
875 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
876 tb->rnum[h] - insert_num);
878 /* insert insert_num keys and node-pointers into R[h] */
879 bi.tb = tb;
880 bi.bi_bh = tb->R[h];
881 bi.bi_parent = tb->FR[h];
882 bi.bi_position = get_right_neighbor_position(tb, h);
883 internal_insert_childs(&bi,
884 /*tb->R[h],tb->S[h-1]->b_next */
885 child_pos - n - insert_num +
886 tb->rnum[h] - 1,
887 insert_num, insert_key,
888 insert_ptr);
889 insert_num = 0;
890 } else {
891 struct disk_child *dc;
893 /* one of the items falls into CFR[h] */
894 internal_shift1_right(tb, h, n - child_pos + 1);
895 /* calculate number of new items that fall into R[h] */
896 k = tb->rnum[h] - n + child_pos - 1;
897 bi.tb = tb;
898 bi.bi_bh = tb->R[h];
899 bi.bi_parent = tb->FR[h];
900 bi.bi_position = get_right_neighbor_position(tb, h);
901 internal_insert_childs(&bi,
902 /*tb->R[h], tb->R[h]->b_child, */
903 0, k, insert_key + 1,
904 insert_ptr + 1);
906 replace_rkey(tb, h, insert_key + insert_num - k - 1);
908 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */
909 dc = B_N_CHILD(tb->R[h], 0);
910 put_dc_size(dc,
911 MAX_CHILD_SIZE(insert_ptr
912 [insert_num - k - 1]) -
913 B_FREE_SPACE(insert_ptr
914 [insert_num - k - 1]));
915 put_dc_block_number(dc,
916 insert_ptr[insert_num - k -
917 1]->b_blocknr);
919 do_balance_mark_internal_dirty(tb, tb->R[h], 0);
921 insert_num -= (k + 1);
925 /** Fill new node that appears instead of S[h] **/
926 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
927 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
929 if (!tb->blknum[h]) { /* node S[h] is empty now */
930 RFALSE(!tbSh, "S[h] is equal NULL");
932 /* do what is needed for buffer thrown from tree */
933 reiserfs_invalidate_buffer(tb, tbSh);
934 return order;
937 if (!tbSh) {
938 /* create new root */
939 struct disk_child *dc;
940 struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1);
941 struct block_head *blkh;
943 if (tb->blknum[h] != 1)
944 reiserfs_panic(NULL,
945 "balance_internal: One new node required for creating the new root");
946 /* S[h] = empty buffer from the list FEB. */
947 tbSh = get_FEB(tb);
948 blkh = B_BLK_HEAD(tbSh);
949 set_blkh_level(blkh, h + 1);
951 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
953 dc = B_N_CHILD(tbSh, 0);
954 put_dc_block_number(dc, tbSh_1->b_blocknr);
955 put_dc_size(dc,
956 (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1)));
958 tb->insert_size[h] -= DC_SIZE;
959 set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE);
961 do_balance_mark_internal_dirty(tb, tbSh, 0);
963 /*&&&&&&&&&&&&&&&&&&&&&&&& */
964 check_internal(tbSh);
965 /*&&&&&&&&&&&&&&&&&&&&&&&& */
967 /* put new root into path structure */
968 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) =
969 tbSh;
971 /* Change root in structure super block. */
972 PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr);
973 PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1);
974 do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
977 if (tb->blknum[h] == 2) {
978 int snum;
979 struct buffer_info dest_bi, src_bi;
981 /* S_new = free buffer from list FEB */
982 S_new = get_FEB(tb);
984 set_blkh_level(B_BLK_HEAD(S_new), h + 1);
986 dest_bi.tb = tb;
987 dest_bi.bi_bh = S_new;
988 dest_bi.bi_parent = NULL;
989 dest_bi.bi_position = 0;
990 src_bi.tb = tb;
991 src_bi.bi_bh = tbSh;
992 src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
993 src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
995 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
996 snum = (insert_num + n + 1) / 2;
997 if (n - snum >= child_pos) {
998 /* new items don't fall into S_new */
999 /* store the delimiting key for the next level */
1000 /* new_insert_key = (n - snum)'th key in S[h] */
1001 memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum),
1002 KEY_SIZE);
1003 /* last parameter is del_par */
1004 internal_move_pointers_items(&dest_bi, &src_bi,
1005 LAST_TO_FIRST, snum, 0);
1006 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
1007 } else if (n + insert_num - snum < child_pos) {
1008 /* all new items fall into S_new */
1009 /* store the delimiting key for the next level */
1010 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
1011 memcpy(&new_insert_key,
1012 B_N_PDELIM_KEY(tbSh, n + insert_num - snum),
1013 KEY_SIZE);
1014 /* last parameter is del_par */
1015 internal_move_pointers_items(&dest_bi, &src_bi,
1016 LAST_TO_FIRST,
1017 snum - insert_num, 0);
1018 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
1020 /* insert insert_num keys and node-pointers into S_new */
1021 internal_insert_childs(&dest_bi,
1022 /*S_new,tb->S[h-1]->b_next, */
1023 child_pos - n - insert_num +
1024 snum - 1,
1025 insert_num, insert_key,
1026 insert_ptr);
1028 insert_num = 0;
1029 } else {
1030 struct disk_child *dc;
1032 /* some items fall into S_new, but some don't fall */
1033 /* last parameter is del_par */
1034 internal_move_pointers_items(&dest_bi, &src_bi,
1035 LAST_TO_FIRST,
1036 n - child_pos + 1, 1);
1037 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
1038 /* calculate number of new items that fall into S_new */
1039 k = snum - n + child_pos - 1;
1041 internal_insert_childs(&dest_bi, /*S_new, */ 0, k,
1042 insert_key + 1, insert_ptr + 1);
1044 /* new_insert_key = insert_key[insert_num - k - 1] */
1045 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1046 KEY_SIZE);
1047 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
1049 dc = B_N_CHILD(S_new, 0);
1050 put_dc_size(dc,
1051 (MAX_CHILD_SIZE
1052 (insert_ptr[insert_num - k - 1]) -
1053 B_FREE_SPACE(insert_ptr
1054 [insert_num - k - 1])));
1055 put_dc_block_number(dc,
1056 insert_ptr[insert_num - k -
1057 1]->b_blocknr);
1059 do_balance_mark_internal_dirty(tb, S_new, 0);
1061 insert_num -= (k + 1);
1063 /* new_insert_ptr = node_pointer to S_new */
1064 new_insert_ptr = S_new;
1066 RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new)
1067 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1068 S_new);
1070 // S_new is released in unfix_nodes
1073 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
1075 if (0 <= child_pos && child_pos <= n && insert_num > 0) {
1076 bi.tb = tb;
1077 bi.bi_bh = tbSh;
1078 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1079 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1080 internal_insert_childs(&bi, /*tbSh, */
1081 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */
1082 child_pos, insert_num, insert_key,
1083 insert_ptr);
1086 memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
1087 insert_ptr[0] = new_insert_ptr;
1089 return order;