mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race
[linux/fpc-iii.git] / fs / reiserfs / ibalance.c
blob5db6f45b3fed64cf58034dea3efc3a09a0f4c497
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 #include <linux/uaccess.h>
6 #include <linux/string.h>
7 #include <linux/time.h>
8 #include "reiserfs.h"
9 #include <linux/buffer_head.h>
11 /* this is one and only function that is used outside (do_balance.c) */
12 int balance_internal(struct tree_balance *,
13 int, int, struct item_head *, struct buffer_head **);
16 * modes of internal_shift_left, internal_shift_right and
17 * internal_insert_childs
19 #define INTERNAL_SHIFT_FROM_S_TO_L 0
20 #define INTERNAL_SHIFT_FROM_R_TO_S 1
21 #define INTERNAL_SHIFT_FROM_L_TO_S 2
22 #define INTERNAL_SHIFT_FROM_S_TO_R 3
23 #define INTERNAL_INSERT_TO_S 4
24 #define INTERNAL_INSERT_TO_L 5
25 #define INTERNAL_INSERT_TO_R 6
27 static void internal_define_dest_src_infos(int shift_mode,
28 struct tree_balance *tb,
29 int h,
30 struct buffer_info *dest_bi,
31 struct buffer_info *src_bi,
32 int *d_key, struct buffer_head **cf)
34 memset(dest_bi, 0, sizeof(struct buffer_info));
35 memset(src_bi, 0, sizeof(struct buffer_info));
36 /* define dest, src, dest parent, dest position */
37 switch (shift_mode) {
39 /* used in internal_shift_left */
40 case INTERNAL_SHIFT_FROM_S_TO_L:
41 src_bi->tb = tb;
42 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
43 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
44 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
45 dest_bi->tb = tb;
46 dest_bi->bi_bh = tb->L[h];
47 dest_bi->bi_parent = tb->FL[h];
48 dest_bi->bi_position = get_left_neighbor_position(tb, h);
49 *d_key = tb->lkey[h];
50 *cf = tb->CFL[h];
51 break;
52 case INTERNAL_SHIFT_FROM_L_TO_S:
53 src_bi->tb = tb;
54 src_bi->bi_bh = tb->L[h];
55 src_bi->bi_parent = tb->FL[h];
56 src_bi->bi_position = get_left_neighbor_position(tb, h);
57 dest_bi->tb = tb;
58 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
59 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
60 /* dest position is analog of dest->b_item_order */
61 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
62 *d_key = tb->lkey[h];
63 *cf = tb->CFL[h];
64 break;
66 /* used in internal_shift_left */
67 case INTERNAL_SHIFT_FROM_R_TO_S:
68 src_bi->tb = tb;
69 src_bi->bi_bh = tb->R[h];
70 src_bi->bi_parent = tb->FR[h];
71 src_bi->bi_position = get_right_neighbor_position(tb, h);
72 dest_bi->tb = tb;
73 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
74 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
75 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
76 *d_key = tb->rkey[h];
77 *cf = tb->CFR[h];
78 break;
80 case INTERNAL_SHIFT_FROM_S_TO_R:
81 src_bi->tb = tb;
82 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
83 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
84 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
85 dest_bi->tb = tb;
86 dest_bi->bi_bh = tb->R[h];
87 dest_bi->bi_parent = tb->FR[h];
88 dest_bi->bi_position = get_right_neighbor_position(tb, h);
89 *d_key = tb->rkey[h];
90 *cf = tb->CFR[h];
91 break;
93 case INTERNAL_INSERT_TO_L:
94 dest_bi->tb = tb;
95 dest_bi->bi_bh = tb->L[h];
96 dest_bi->bi_parent = tb->FL[h];
97 dest_bi->bi_position = get_left_neighbor_position(tb, h);
98 break;
100 case INTERNAL_INSERT_TO_S:
101 dest_bi->tb = tb;
102 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
103 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
104 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
105 break;
107 case INTERNAL_INSERT_TO_R:
108 dest_bi->tb = tb;
109 dest_bi->bi_bh = tb->R[h];
110 dest_bi->bi_parent = tb->FR[h];
111 dest_bi->bi_position = get_right_neighbor_position(tb, h);
112 break;
114 default:
115 reiserfs_panic(tb->tb_sb, "ibalance-1",
116 "shift type is unknown (%d)",
117 shift_mode);
122 * Insert count node pointers into buffer cur before position to + 1.
123 * Insert count items into buffer cur before position to.
124 * Items and node pointers are specified by inserted and bh respectively.
126 static void internal_insert_childs(struct buffer_info *cur_bi,
127 int to, int count,
128 struct item_head *inserted,
129 struct buffer_head **bh)
131 struct buffer_head *cur = cur_bi->bi_bh;
132 struct block_head *blkh;
133 int nr;
134 struct reiserfs_key *ih;
135 struct disk_child new_dc[2];
136 struct disk_child *dc;
137 int i;
139 if (count <= 0)
140 return;
142 blkh = B_BLK_HEAD(cur);
143 nr = blkh_nr_item(blkh);
145 RFALSE(count > 2, "too many children (%d) are to be inserted", count);
146 RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE),
147 "no enough free space (%d), needed %d bytes",
148 B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE));
150 /* prepare space for count disk_child */
151 dc = B_N_CHILD(cur, to + 1);
153 memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE);
155 /* copy to_be_insert disk children */
156 for (i = 0; i < count; i++) {
157 put_dc_size(&new_dc[i],
158 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
159 put_dc_block_number(&new_dc[i], bh[i]->b_blocknr);
161 memcpy(dc, new_dc, DC_SIZE * count);
163 /* prepare space for count items */
164 ih = internal_key(cur, ((to == -1) ? 0 : to));
166 memmove(ih + count, ih,
167 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
169 /* copy item headers (keys) */
170 memcpy(ih, inserted, KEY_SIZE);
171 if (count > 1)
172 memcpy(ih + 1, inserted + 1, KEY_SIZE);
174 /* sizes, item number */
175 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count);
176 set_blkh_free_space(blkh,
177 blkh_free_space(blkh) - count * (DC_SIZE +
178 KEY_SIZE));
180 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
182 /*&&&&&&&&&&&&&&&&&&&&&&&& */
183 check_internal(cur);
184 /*&&&&&&&&&&&&&&&&&&&&&&&& */
186 if (cur_bi->bi_parent) {
187 struct disk_child *t_dc =
188 B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
189 put_dc_size(t_dc,
190 dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE)));
191 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
194 /*&&&&&&&&&&&&&&&&&&&&&&&& */
195 check_internal(cur_bi->bi_parent);
196 /*&&&&&&&&&&&&&&&&&&&&&&&& */
202 * Delete del_num items and node pointers from buffer cur starting from
203 * the first_i'th item and first_p'th pointers respectively.
205 static void internal_delete_pointers_items(struct buffer_info *cur_bi,
206 int first_p,
207 int first_i, int del_num)
209 struct buffer_head *cur = cur_bi->bi_bh;
210 int nr;
211 struct block_head *blkh;
212 struct reiserfs_key *key;
213 struct disk_child *dc;
215 RFALSE(cur == NULL, "buffer is 0");
216 RFALSE(del_num < 0,
217 "negative number of items (%d) can not be deleted", del_num);
218 RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1
219 || first_i < 0,
220 "first pointer order (%d) < 0 or "
221 "no so many pointers (%d), only (%d) or "
222 "first key order %d < 0", first_p, first_p + del_num,
223 B_NR_ITEMS(cur) + 1, first_i);
224 if (del_num == 0)
225 return;
227 blkh = B_BLK_HEAD(cur);
228 nr = blkh_nr_item(blkh);
230 if (first_p == 0 && del_num == nr + 1) {
231 RFALSE(first_i != 0,
232 "1st deleted key must have order 0, not %d", first_i);
233 make_empty_node(cur_bi);
234 return;
237 RFALSE(first_i + del_num > B_NR_ITEMS(cur),
238 "first_i = %d del_num = %d "
239 "no so many keys (%d) in the node (%b)(%z)",
240 first_i, del_num, first_i + del_num, cur, cur);
242 /* deleting */
243 dc = B_N_CHILD(cur, first_p);
245 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
246 key = internal_key(cur, first_i);
247 memmove(key, key + del_num,
248 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
249 del_num) * DC_SIZE);
251 /* sizes, item number */
252 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
253 set_blkh_free_space(blkh,
254 blkh_free_space(blkh) +
255 (del_num * (KEY_SIZE + DC_SIZE)));
257 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
258 /*&&&&&&&&&&&&&&&&&&&&&&& */
259 check_internal(cur);
260 /*&&&&&&&&&&&&&&&&&&&&&&& */
262 if (cur_bi->bi_parent) {
263 struct disk_child *t_dc;
264 t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
265 put_dc_size(t_dc,
266 dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE)));
268 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
270 /*&&&&&&&&&&&&&&&&&&&&&&&& */
271 check_internal(cur_bi->bi_parent);
272 /*&&&&&&&&&&&&&&&&&&&&&&&& */
276 /* delete n node pointers and items starting from given position */
277 static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
279 int i_from;
281 i_from = (from == 0) ? from : from - 1;
284 * delete n pointers starting from `from' position in CUR;
285 * delete n keys starting from 'i_from' position in CUR;
287 internal_delete_pointers_items(cur_bi, from, i_from, n);
291 * copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer
292 * dest
293 * last_first == FIRST_TO_LAST means that we copy first items
294 * from src to tail of dest
295 * last_first == LAST_TO_FIRST means that we copy last items
296 * from src to head of dest
298 static void internal_copy_pointers_items(struct buffer_info *dest_bi,
299 struct buffer_head *src,
300 int last_first, int cpy_num)
303 * ATTENTION! Number of node pointers in DEST is equal to number
304 * of items in DEST as delimiting key have already inserted to
305 * buffer dest.
307 struct buffer_head *dest = dest_bi->bi_bh;
308 int nr_dest, nr_src;
309 int dest_order, src_order;
310 struct block_head *blkh;
311 struct reiserfs_key *key;
312 struct disk_child *dc;
314 nr_src = B_NR_ITEMS(src);
316 RFALSE(dest == NULL || src == NULL,
317 "src (%p) or dest (%p) buffer is 0", src, dest);
318 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
319 "invalid last_first parameter (%d)", last_first);
320 RFALSE(nr_src < cpy_num - 1,
321 "no so many items (%d) in src (%d)", cpy_num, nr_src);
322 RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num);
323 RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest),
324 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)",
325 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
327 if (cpy_num == 0)
328 return;
330 /* coping */
331 blkh = B_BLK_HEAD(dest);
332 nr_dest = blkh_nr_item(blkh);
334 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */
335 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */
336 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order =
337 nr_src - cpy_num + 1) : (dest_order =
338 nr_dest,
339 src_order =
342 /* prepare space for cpy_num pointers */
343 dc = B_N_CHILD(dest, dest_order);
345 memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
347 /* insert pointers */
348 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
350 /* prepare space for cpy_num - 1 item headers */
351 key = internal_key(dest, dest_order);
352 memmove(key + cpy_num - 1, key,
353 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
354 cpy_num));
356 /* insert headers */
357 memcpy(key, internal_key(src, src_order), KEY_SIZE * (cpy_num - 1));
359 /* sizes, item number */
360 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
361 set_blkh_free_space(blkh,
362 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) +
363 DC_SIZE * cpy_num));
365 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
367 /*&&&&&&&&&&&&&&&&&&&&&&&& */
368 check_internal(dest);
369 /*&&&&&&&&&&&&&&&&&&&&&&&& */
371 if (dest_bi->bi_parent) {
372 struct disk_child *t_dc;
373 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
374 put_dc_size(t_dc,
375 dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) +
376 DC_SIZE * cpy_num));
378 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
380 /*&&&&&&&&&&&&&&&&&&&&&&&& */
381 check_internal(dest_bi->bi_parent);
382 /*&&&&&&&&&&&&&&&&&&&&&&&& */
388 * Copy cpy_num node pointers and cpy_num - 1 items from buffer src to
389 * buffer dest.
390 * Delete cpy_num - del_par items and node pointers from buffer src.
391 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
392 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
394 static void internal_move_pointers_items(struct buffer_info *dest_bi,
395 struct buffer_info *src_bi,
396 int last_first, int cpy_num,
397 int del_par)
399 int first_pointer;
400 int first_item;
402 internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first,
403 cpy_num);
405 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
406 first_pointer = 0;
407 first_item = 0;
409 * delete cpy_num - del_par pointers and keys starting for
410 * pointers with first_pointer, for key - with first_item
412 internal_delete_pointers_items(src_bi, first_pointer,
413 first_item, cpy_num - del_par);
414 } else { /* shift_right occurs */
415 int i, j;
417 i = (cpy_num - del_par ==
418 (j =
419 B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num +
420 del_par;
422 internal_delete_pointers_items(src_bi,
423 j + 1 - cpy_num + del_par, i,
424 cpy_num - del_par);
428 /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
429 static void internal_insert_key(struct buffer_info *dest_bi,
430 /* insert key before key with n_dest number */
431 int dest_position_before,
432 struct buffer_head *src, int src_position)
434 struct buffer_head *dest = dest_bi->bi_bh;
435 int nr;
436 struct block_head *blkh;
437 struct reiserfs_key *key;
439 RFALSE(dest == NULL || src == NULL,
440 "source(%p) or dest(%p) buffer is 0", src, dest);
441 RFALSE(dest_position_before < 0 || src_position < 0,
442 "source(%d) or dest(%d) key number less than 0",
443 src_position, dest_position_before);
444 RFALSE(dest_position_before > B_NR_ITEMS(dest) ||
445 src_position >= B_NR_ITEMS(src),
446 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
447 dest_position_before, B_NR_ITEMS(dest),
448 src_position, B_NR_ITEMS(src));
449 RFALSE(B_FREE_SPACE(dest) < KEY_SIZE,
450 "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest));
452 blkh = B_BLK_HEAD(dest);
453 nr = blkh_nr_item(blkh);
455 /* prepare space for inserting key */
456 key = internal_key(dest, dest_position_before);
457 memmove(key + 1, key,
458 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
460 /* insert key */
461 memcpy(key, internal_key(src, src_position), KEY_SIZE);
463 /* Change dirt, free space, item number fields. */
465 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
466 set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE);
468 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
470 if (dest_bi->bi_parent) {
471 struct disk_child *t_dc;
472 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
473 put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE);
475 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
481 * Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
482 * Copy pointer_amount node pointers and pointer_amount - 1 items from
483 * buffer src to buffer dest.
484 * Replace d_key'th key in buffer cfl.
485 * Delete pointer_amount items and node pointers from buffer src.
487 /* this can be invoked both to shift from S to L and from R to S */
488 static void internal_shift_left(
490 * INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S
492 int mode,
493 struct tree_balance *tb,
494 int h, int pointer_amount)
496 struct buffer_info dest_bi, src_bi;
497 struct buffer_head *cf;
498 int d_key_position;
500 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
501 &d_key_position, &cf);
503 /*printk("pointer_amount = %d\n",pointer_amount); */
505 if (pointer_amount) {
507 * insert delimiting key from common father of dest and
508 * src to node dest into position B_NR_ITEM(dest)
510 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
511 d_key_position);
513 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
514 if (src_bi.bi_position /*src->b_item_order */ == 0)
515 replace_key(tb, cf, d_key_position,
516 src_bi.
517 bi_parent /*src->b_parent */ , 0);
518 } else
519 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
520 pointer_amount - 1);
522 /* last parameter is del_parameter */
523 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
524 pointer_amount, 0);
529 * Insert delimiting key to L[h].
530 * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
531 * Delete n - 1 items and node pointers from buffer S[h].
533 /* it always shifts from S[h] to L[h] */
534 static void internal_shift1_left(struct tree_balance *tb,
535 int h, int pointer_amount)
537 struct buffer_info dest_bi, src_bi;
538 struct buffer_head *cf;
539 int d_key_position;
541 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
542 &dest_bi, &src_bi, &d_key_position, &cf);
544 /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
545 if (pointer_amount > 0)
546 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
547 d_key_position);
549 /* last parameter is del_parameter */
550 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
551 pointer_amount, 1);
555 * Insert d_key'th (delimiting) key from buffer cfr to head of dest.
556 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
557 * Replace d_key'th key in buffer cfr.
558 * Delete n items and node pointers from buffer src.
560 static void internal_shift_right(
562 * INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S
564 int mode,
565 struct tree_balance *tb,
566 int h, int pointer_amount)
568 struct buffer_info dest_bi, src_bi;
569 struct buffer_head *cf;
570 int d_key_position;
571 int nr;
573 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
574 &d_key_position, &cf);
576 nr = B_NR_ITEMS(src_bi.bi_bh);
578 if (pointer_amount > 0) {
580 * insert delimiting key from common father of dest
581 * and src to dest node into position 0
583 internal_insert_key(&dest_bi, 0, cf, d_key_position);
584 if (nr == pointer_amount - 1) {
585 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
586 dest_bi.bi_bh != tb->R[h],
587 "src (%p) must be == tb->S[h](%p) when it disappears",
588 src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h));
589 /* when S[h] disappers replace left delemiting key as well */
590 if (tb->CFL[h])
591 replace_key(tb, cf, d_key_position, tb->CFL[h],
592 tb->lkey[h]);
593 } else
594 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
595 nr - pointer_amount);
598 /* last parameter is del_parameter */
599 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
600 pointer_amount, 0);
604 * Insert delimiting key to R[h].
605 * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
606 * Delete n - 1 items and node pointers from buffer S[h].
608 /* it always shift from S[h] to R[h] */
609 static void internal_shift1_right(struct tree_balance *tb,
610 int h, int pointer_amount)
612 struct buffer_info dest_bi, src_bi;
613 struct buffer_head *cf;
614 int d_key_position;
616 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
617 &dest_bi, &src_bi, &d_key_position, &cf);
619 /* insert rkey from CFR[h] to right neighbor R[h] */
620 if (pointer_amount > 0)
621 internal_insert_key(&dest_bi, 0, cf, d_key_position);
623 /* last parameter is del_parameter */
624 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
625 pointer_amount, 1);
629 * Delete insert_num node pointers together with their left items
630 * and balance current node.
632 static void balance_internal_when_delete(struct tree_balance *tb,
633 int h, int child_pos)
635 int insert_num;
636 int n;
637 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
638 struct buffer_info bi;
640 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
642 /* delete child-node-pointer(s) together with their left item(s) */
643 bi.tb = tb;
644 bi.bi_bh = tbSh;
645 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
646 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
648 internal_delete_childs(&bi, child_pos, -insert_num);
650 RFALSE(tb->blknum[h] > 1,
651 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]);
653 n = B_NR_ITEMS(tbSh);
655 if (tb->lnum[h] == 0 && tb->rnum[h] == 0) {
656 if (tb->blknum[h] == 0) {
657 /* node S[h] (root of the tree) is empty now */
658 struct buffer_head *new_root;
660 RFALSE(n
661 || B_FREE_SPACE(tbSh) !=
662 MAX_CHILD_SIZE(tbSh) - DC_SIZE,
663 "buffer must have only 0 keys (%d)", n);
664 RFALSE(bi.bi_parent, "root has parent (%p)",
665 bi.bi_parent);
667 /* choose a new root */
668 if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1]))
669 new_root = tb->R[h - 1];
670 else
671 new_root = tb->L[h - 1];
673 * switch super block's tree root block
674 * number to the new value */
675 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
676 /*REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --; */
677 PUT_SB_TREE_HEIGHT(tb->tb_sb,
678 SB_TREE_HEIGHT(tb->tb_sb) - 1);
680 do_balance_mark_sb_dirty(tb,
681 REISERFS_SB(tb->tb_sb)->s_sbh,
683 /*&&&&&&&&&&&&&&&&&&&&&& */
684 /* use check_internal if new root is an internal node */
685 if (h > 1)
686 check_internal(new_root);
687 /*&&&&&&&&&&&&&&&&&&&&&& */
689 /* do what is needed for buffer thrown from tree */
690 reiserfs_invalidate_buffer(tb, tbSh);
691 return;
693 return;
696 /* join S[h] with L[h] */
697 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) {
699 RFALSE(tb->rnum[h] != 0,
700 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
701 h, tb->rnum[h]);
703 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
704 reiserfs_invalidate_buffer(tb, tbSh);
706 return;
709 /* join S[h] with R[h] */
710 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) {
711 RFALSE(tb->lnum[h] != 0,
712 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
713 h, tb->lnum[h]);
715 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
717 reiserfs_invalidate_buffer(tb, tbSh);
718 return;
721 /* borrow from left neighbor L[h] */
722 if (tb->lnum[h] < 0) {
723 RFALSE(tb->rnum[h] != 0,
724 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
725 tb->rnum[h]);
726 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
727 -tb->lnum[h]);
728 return;
731 /* borrow from right neighbor R[h] */
732 if (tb->rnum[h] < 0) {
733 RFALSE(tb->lnum[h] != 0,
734 "invalid tb->lnum[%d]==%d when borrow from R[h]",
735 h, tb->lnum[h]);
736 internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */
737 return;
740 /* split S[h] into two parts and put them into neighbors */
741 if (tb->lnum[h] > 0) {
742 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
743 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
744 h, tb->lnum[h], h, tb->rnum[h], n);
746 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */
747 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
748 tb->rnum[h]);
750 reiserfs_invalidate_buffer(tb, tbSh);
752 return;
754 reiserfs_panic(tb->tb_sb, "ibalance-2",
755 "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
756 h, tb->lnum[h], h, tb->rnum[h]);
759 /* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
760 static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
762 RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL,
763 "L[h](%p) and CFL[h](%p) must exist in replace_lkey",
764 tb->L[h], tb->CFL[h]);
766 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
767 return;
769 memcpy(internal_key(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
771 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
774 /* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
775 static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
777 RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL,
778 "R[h](%p) and CFR[h](%p) must exist in replace_rkey",
779 tb->R[h], tb->CFR[h]);
780 RFALSE(B_NR_ITEMS(tb->R[h]) == 0,
781 "R[h] can not be empty if it exists (item number=%d)",
782 B_NR_ITEMS(tb->R[h]));
784 memcpy(internal_key(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
786 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
791 * if inserting/pasting {
792 * child_pos is the position of the node-pointer in S[h] that
793 * pointed to S[h-1] before balancing of the h-1 level;
794 * this means that new pointers and items must be inserted AFTER
795 * child_pos
796 * } else {
797 * it is the position of the leftmost pointer that must be deleted
798 * (together with its corresponding key to the left of the pointer)
799 * as a result of the previous level's balancing.
803 int balance_internal(struct tree_balance *tb,
804 int h, /* level of the tree */
805 int child_pos,
806 /* key for insertion on higher level */
807 struct item_head *insert_key,
808 /* node for insertion on higher level */
809 struct buffer_head **insert_ptr)
811 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
812 struct buffer_info bi;
815 * we return this: it is 0 if there is no S[h],
816 * else it is tb->S[h]->b_item_order
818 int order;
819 int insert_num, n, k;
820 struct buffer_head *S_new;
821 struct item_head new_insert_key;
822 struct buffer_head *new_insert_ptr = NULL;
823 struct item_head *new_insert_key_addr = insert_key;
825 RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h);
827 PROC_INFO_INC(tb->tb_sb, balance_at[h]);
829 order =
830 (tbSh) ? PATH_H_POSITION(tb->tb_path,
831 h + 1) /*tb->S[h]->b_item_order */ : 0;
834 * Using insert_size[h] calculate the number insert_num of items
835 * that must be inserted to or deleted from S[h].
837 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
839 /* Check whether insert_num is proper * */
840 RFALSE(insert_num < -2 || insert_num > 2,
841 "incorrect number of items inserted to the internal node (%d)",
842 insert_num);
843 RFALSE(h > 1 && (insert_num > 1 || insert_num < -1),
844 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level",
845 insert_num, h);
847 /* Make balance in case insert_num < 0 */
848 if (insert_num < 0) {
849 balance_internal_when_delete(tb, h, child_pos);
850 return order;
853 k = 0;
854 if (tb->lnum[h] > 0) {
856 * shift lnum[h] items from S[h] to the left neighbor L[h].
857 * check how many of new items fall into L[h] or CFL[h] after
858 * shifting
860 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
861 if (tb->lnum[h] <= child_pos) {
862 /* new items don't fall into L[h] or CFL[h] */
863 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
864 tb->lnum[h]);
865 child_pos -= tb->lnum[h];
866 } else if (tb->lnum[h] > child_pos + insert_num) {
867 /* all new items fall into L[h] */
868 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
869 tb->lnum[h] - insert_num);
870 /* insert insert_num keys and node-pointers into L[h] */
871 bi.tb = tb;
872 bi.bi_bh = tb->L[h];
873 bi.bi_parent = tb->FL[h];
874 bi.bi_position = get_left_neighbor_position(tb, h);
875 internal_insert_childs(&bi,
876 /*tb->L[h], tb->S[h-1]->b_next */
877 n + child_pos + 1,
878 insert_num, insert_key,
879 insert_ptr);
881 insert_num = 0;
882 } else {
883 struct disk_child *dc;
886 * some items fall into L[h] or CFL[h],
887 * but some don't fall
889 internal_shift1_left(tb, h, child_pos + 1);
890 /* calculate number of new items that fall into L[h] */
891 k = tb->lnum[h] - child_pos - 1;
892 bi.tb = tb;
893 bi.bi_bh = tb->L[h];
894 bi.bi_parent = tb->FL[h];
895 bi.bi_position = get_left_neighbor_position(tb, h);
896 internal_insert_childs(&bi,
897 /*tb->L[h], tb->S[h-1]->b_next, */
898 n + child_pos + 1, k,
899 insert_key, insert_ptr);
901 replace_lkey(tb, h, insert_key + k);
904 * replace the first node-ptr in S[h] by
905 * node-ptr to insert_ptr[k]
907 dc = B_N_CHILD(tbSh, 0);
908 put_dc_size(dc,
909 MAX_CHILD_SIZE(insert_ptr[k]) -
910 B_FREE_SPACE(insert_ptr[k]));
911 put_dc_block_number(dc, insert_ptr[k]->b_blocknr);
913 do_balance_mark_internal_dirty(tb, tbSh, 0);
915 k++;
916 insert_key += k;
917 insert_ptr += k;
918 insert_num -= k;
919 child_pos = 0;
922 /* tb->lnum[h] > 0 */
923 if (tb->rnum[h] > 0) {
924 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
926 * check how many of new items fall into R or CFR
927 * after shifting
929 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
930 if (n - tb->rnum[h] >= child_pos)
931 /* new items fall into S[h] */
932 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
933 tb->rnum[h]);
934 else if (n + insert_num - tb->rnum[h] < child_pos) {
935 /* all new items fall into R[h] */
936 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
937 tb->rnum[h] - insert_num);
939 /* insert insert_num keys and node-pointers into R[h] */
940 bi.tb = tb;
941 bi.bi_bh = tb->R[h];
942 bi.bi_parent = tb->FR[h];
943 bi.bi_position = get_right_neighbor_position(tb, h);
944 internal_insert_childs(&bi,
945 /*tb->R[h],tb->S[h-1]->b_next */
946 child_pos - n - insert_num +
947 tb->rnum[h] - 1,
948 insert_num, insert_key,
949 insert_ptr);
950 insert_num = 0;
951 } else {
952 struct disk_child *dc;
954 /* one of the items falls into CFR[h] */
955 internal_shift1_right(tb, h, n - child_pos + 1);
956 /* calculate number of new items that fall into R[h] */
957 k = tb->rnum[h] - n + child_pos - 1;
958 bi.tb = tb;
959 bi.bi_bh = tb->R[h];
960 bi.bi_parent = tb->FR[h];
961 bi.bi_position = get_right_neighbor_position(tb, h);
962 internal_insert_childs(&bi,
963 /*tb->R[h], tb->R[h]->b_child, */
964 0, k, insert_key + 1,
965 insert_ptr + 1);
967 replace_rkey(tb, h, insert_key + insert_num - k - 1);
970 * replace the first node-ptr in R[h] by
971 * node-ptr insert_ptr[insert_num-k-1]
973 dc = B_N_CHILD(tb->R[h], 0);
974 put_dc_size(dc,
975 MAX_CHILD_SIZE(insert_ptr
976 [insert_num - k - 1]) -
977 B_FREE_SPACE(insert_ptr
978 [insert_num - k - 1]));
979 put_dc_block_number(dc,
980 insert_ptr[insert_num - k -
981 1]->b_blocknr);
983 do_balance_mark_internal_dirty(tb, tb->R[h], 0);
985 insert_num -= (k + 1);
989 /** Fill new node that appears instead of S[h] **/
990 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
991 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
993 if (!tb->blknum[h]) { /* node S[h] is empty now */
994 RFALSE(!tbSh, "S[h] is equal NULL");
996 /* do what is needed for buffer thrown from tree */
997 reiserfs_invalidate_buffer(tb, tbSh);
998 return order;
1001 if (!tbSh) {
1002 /* create new root */
1003 struct disk_child *dc;
1004 struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1);
1005 struct block_head *blkh;
1007 if (tb->blknum[h] != 1)
1008 reiserfs_panic(NULL, "ibalance-3", "One new node "
1009 "required for creating the new root");
1010 /* S[h] = empty buffer from the list FEB. */
1011 tbSh = get_FEB(tb);
1012 blkh = B_BLK_HEAD(tbSh);
1013 set_blkh_level(blkh, h + 1);
1015 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
1017 dc = B_N_CHILD(tbSh, 0);
1018 put_dc_block_number(dc, tbSh_1->b_blocknr);
1019 put_dc_size(dc,
1020 (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1)));
1022 tb->insert_size[h] -= DC_SIZE;
1023 set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE);
1025 do_balance_mark_internal_dirty(tb, tbSh, 0);
1027 /*&&&&&&&&&&&&&&&&&&&&&&&& */
1028 check_internal(tbSh);
1029 /*&&&&&&&&&&&&&&&&&&&&&&&& */
1031 /* put new root into path structure */
1032 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) =
1033 tbSh;
1035 /* Change root in structure super block. */
1036 PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr);
1037 PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1);
1038 do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
1041 if (tb->blknum[h] == 2) {
1042 int snum;
1043 struct buffer_info dest_bi, src_bi;
1045 /* S_new = free buffer from list FEB */
1046 S_new = get_FEB(tb);
1048 set_blkh_level(B_BLK_HEAD(S_new), h + 1);
1050 dest_bi.tb = tb;
1051 dest_bi.bi_bh = S_new;
1052 dest_bi.bi_parent = NULL;
1053 dest_bi.bi_position = 0;
1054 src_bi.tb = tb;
1055 src_bi.bi_bh = tbSh;
1056 src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1057 src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1059 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
1060 snum = (insert_num + n + 1) / 2;
1061 if (n - snum >= child_pos) {
1062 /* new items don't fall into S_new */
1063 /* store the delimiting key for the next level */
1064 /* new_insert_key = (n - snum)'th key in S[h] */
1065 memcpy(&new_insert_key, internal_key(tbSh, n - snum),
1066 KEY_SIZE);
1067 /* last parameter is del_par */
1068 internal_move_pointers_items(&dest_bi, &src_bi,
1069 LAST_TO_FIRST, snum, 0);
1070 } else if (n + insert_num - snum < child_pos) {
1071 /* all new items fall into S_new */
1072 /* store the delimiting key for the next level */
1074 * new_insert_key = (n + insert_item - snum)'th
1075 * key in S[h]
1077 memcpy(&new_insert_key,
1078 internal_key(tbSh, n + insert_num - snum),
1079 KEY_SIZE);
1080 /* last parameter is del_par */
1081 internal_move_pointers_items(&dest_bi, &src_bi,
1082 LAST_TO_FIRST,
1083 snum - insert_num, 0);
1086 * insert insert_num keys and node-pointers
1087 * into S_new
1089 internal_insert_childs(&dest_bi,
1090 /*S_new,tb->S[h-1]->b_next, */
1091 child_pos - n - insert_num +
1092 snum - 1,
1093 insert_num, insert_key,
1094 insert_ptr);
1096 insert_num = 0;
1097 } else {
1098 struct disk_child *dc;
1100 /* some items fall into S_new, but some don't fall */
1101 /* last parameter is del_par */
1102 internal_move_pointers_items(&dest_bi, &src_bi,
1103 LAST_TO_FIRST,
1104 n - child_pos + 1, 1);
1105 /* calculate number of new items that fall into S_new */
1106 k = snum - n + child_pos - 1;
1108 internal_insert_childs(&dest_bi, /*S_new, */ 0, k,
1109 insert_key + 1, insert_ptr + 1);
1111 /* new_insert_key = insert_key[insert_num - k - 1] */
1112 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1113 KEY_SIZE);
1115 * replace first node-ptr in S_new by node-ptr
1116 * to insert_ptr[insert_num-k-1]
1119 dc = B_N_CHILD(S_new, 0);
1120 put_dc_size(dc,
1121 (MAX_CHILD_SIZE
1122 (insert_ptr[insert_num - k - 1]) -
1123 B_FREE_SPACE(insert_ptr
1124 [insert_num - k - 1])));
1125 put_dc_block_number(dc,
1126 insert_ptr[insert_num - k -
1127 1]->b_blocknr);
1129 do_balance_mark_internal_dirty(tb, S_new, 0);
1131 insert_num -= (k + 1);
1133 /* new_insert_ptr = node_pointer to S_new */
1134 new_insert_ptr = S_new;
1136 RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new)
1137 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1138 S_new);
1140 /* S_new is released in unfix_nodes */
1143 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
1145 if (0 <= child_pos && child_pos <= n && insert_num > 0) {
1146 bi.tb = tb;
1147 bi.bi_bh = tbSh;
1148 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1149 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1150 internal_insert_childs(&bi, /*tbSh, */
1151 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */
1152 child_pos, insert_num, insert_key,
1153 insert_ptr);
1156 insert_ptr[0] = new_insert_ptr;
1157 if (new_insert_ptr)
1158 memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
1160 return order;