IB/mthca: Query port fix
[linux-2.6/verdex.git] / fs / reiserfs / ibalance.c
blobde391a82b9999830cb8eb89b3633b960ac96e2e8
1 /*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
5 #include <asm/uaccess.h>
6 #include <linux/string.h>
7 #include <linux/time.h>
8 #include <linux/reiserfs_fs.h>
9 #include <linux/buffer_head.h>
11 /* this is one and only function that is used outside (do_balance.c) */
12 int balance_internal(struct tree_balance *,
13 int, int, struct item_head *, struct buffer_head **);
15 /* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
16 #define INTERNAL_SHIFT_FROM_S_TO_L 0
17 #define INTERNAL_SHIFT_FROM_R_TO_S 1
18 #define INTERNAL_SHIFT_FROM_L_TO_S 2
19 #define INTERNAL_SHIFT_FROM_S_TO_R 3
20 #define INTERNAL_INSERT_TO_S 4
21 #define INTERNAL_INSERT_TO_L 5
22 #define INTERNAL_INSERT_TO_R 6
24 static void internal_define_dest_src_infos(int shift_mode,
25 struct tree_balance *tb,
26 int h,
27 struct buffer_info *dest_bi,
28 struct buffer_info *src_bi,
29 int *d_key, struct buffer_head **cf)
31 memset(dest_bi, 0, sizeof(struct buffer_info));
32 memset(src_bi, 0, sizeof(struct buffer_info));
33 /* define dest, src, dest parent, dest position */
34 switch (shift_mode) {
35 case INTERNAL_SHIFT_FROM_S_TO_L: /* used in internal_shift_left */
36 src_bi->tb = tb;
37 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
38 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
39 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
40 dest_bi->tb = tb;
41 dest_bi->bi_bh = tb->L[h];
42 dest_bi->bi_parent = tb->FL[h];
43 dest_bi->bi_position = get_left_neighbor_position(tb, h);
44 *d_key = tb->lkey[h];
45 *cf = tb->CFL[h];
46 break;
47 case INTERNAL_SHIFT_FROM_L_TO_S:
48 src_bi->tb = tb;
49 src_bi->bi_bh = tb->L[h];
50 src_bi->bi_parent = tb->FL[h];
51 src_bi->bi_position = get_left_neighbor_position(tb, h);
52 dest_bi->tb = tb;
53 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
54 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
55 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */
56 *d_key = tb->lkey[h];
57 *cf = tb->CFL[h];
58 break;
60 case INTERNAL_SHIFT_FROM_R_TO_S: /* used in internal_shift_left */
61 src_bi->tb = tb;
62 src_bi->bi_bh = tb->R[h];
63 src_bi->bi_parent = tb->FR[h];
64 src_bi->bi_position = get_right_neighbor_position(tb, h);
65 dest_bi->tb = tb;
66 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
67 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
68 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
69 *d_key = tb->rkey[h];
70 *cf = tb->CFR[h];
71 break;
73 case INTERNAL_SHIFT_FROM_S_TO_R:
74 src_bi->tb = tb;
75 src_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
76 src_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
77 src_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
78 dest_bi->tb = tb;
79 dest_bi->bi_bh = tb->R[h];
80 dest_bi->bi_parent = tb->FR[h];
81 dest_bi->bi_position = get_right_neighbor_position(tb, h);
82 *d_key = tb->rkey[h];
83 *cf = tb->CFR[h];
84 break;
86 case INTERNAL_INSERT_TO_L:
87 dest_bi->tb = tb;
88 dest_bi->bi_bh = tb->L[h];
89 dest_bi->bi_parent = tb->FL[h];
90 dest_bi->bi_position = get_left_neighbor_position(tb, h);
91 break;
93 case INTERNAL_INSERT_TO_S:
94 dest_bi->tb = tb;
95 dest_bi->bi_bh = PATH_H_PBUFFER(tb->tb_path, h);
96 dest_bi->bi_parent = PATH_H_PPARENT(tb->tb_path, h);
97 dest_bi->bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
98 break;
100 case INTERNAL_INSERT_TO_R:
101 dest_bi->tb = tb;
102 dest_bi->bi_bh = tb->R[h];
103 dest_bi->bi_parent = tb->FR[h];
104 dest_bi->bi_position = get_right_neighbor_position(tb, h);
105 break;
107 default:
108 reiserfs_panic(tb->tb_sb,
109 "internal_define_dest_src_infos: shift type is unknown (%d)",
110 shift_mode);
114 /* Insert count node pointers into buffer cur before position to + 1.
115 * Insert count items into buffer cur before position to.
116 * Items and node pointers are specified by inserted and bh respectively.
118 static void internal_insert_childs(struct buffer_info *cur_bi,
119 int to, int count,
120 struct item_head *inserted,
121 struct buffer_head **bh)
123 struct buffer_head *cur = cur_bi->bi_bh;
124 struct block_head *blkh;
125 int nr;
126 struct reiserfs_key *ih;
127 struct disk_child new_dc[2];
128 struct disk_child *dc;
129 int i;
131 if (count <= 0)
132 return;
134 blkh = B_BLK_HEAD(cur);
135 nr = blkh_nr_item(blkh);
137 RFALSE(count > 2, "too many children (%d) are to be inserted", count);
138 RFALSE(B_FREE_SPACE(cur) < count * (KEY_SIZE + DC_SIZE),
139 "no enough free space (%d), needed %d bytes",
140 B_FREE_SPACE(cur), count * (KEY_SIZE + DC_SIZE));
142 /* prepare space for count disk_child */
143 dc = B_N_CHILD(cur, to + 1);
145 memmove(dc + count, dc, (nr + 1 - (to + 1)) * DC_SIZE);
147 /* copy to_be_insert disk children */
148 for (i = 0; i < count; i++) {
149 put_dc_size(&(new_dc[i]),
150 MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE(bh[i]));
151 put_dc_block_number(&(new_dc[i]), bh[i]->b_blocknr);
153 memcpy(dc, new_dc, DC_SIZE * count);
155 /* prepare space for count items */
156 ih = B_N_PDELIM_KEY(cur, ((to == -1) ? 0 : to));
158 memmove(ih + count, ih,
159 (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
161 /* copy item headers (keys) */
162 memcpy(ih, inserted, KEY_SIZE);
163 if (count > 1)
164 memcpy(ih + 1, inserted + 1, KEY_SIZE);
166 /* sizes, item number */
167 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + count);
168 set_blkh_free_space(blkh,
169 blkh_free_space(blkh) - count * (DC_SIZE +
170 KEY_SIZE));
172 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
174 /*&&&&&&&&&&&&&&&&&&&&&&&& */
175 check_internal(cur);
176 /*&&&&&&&&&&&&&&&&&&&&&&&& */
178 if (cur_bi->bi_parent) {
179 struct disk_child *t_dc =
180 B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
181 put_dc_size(t_dc,
182 dc_size(t_dc) + (count * (DC_SIZE + KEY_SIZE)));
183 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
186 /*&&&&&&&&&&&&&&&&&&&&&&&& */
187 check_internal(cur_bi->bi_parent);
188 /*&&&&&&&&&&&&&&&&&&&&&&&& */
193 /* Delete del_num items and node pointers from buffer cur starting from *
194 * the first_i'th item and first_p'th pointers respectively. */
195 static void internal_delete_pointers_items(struct buffer_info *cur_bi,
196 int first_p,
197 int first_i, int del_num)
199 struct buffer_head *cur = cur_bi->bi_bh;
200 int nr;
201 struct block_head *blkh;
202 struct reiserfs_key *key;
203 struct disk_child *dc;
205 RFALSE(cur == NULL, "buffer is 0");
206 RFALSE(del_num < 0,
207 "negative number of items (%d) can not be deleted", del_num);
208 RFALSE(first_p < 0 || first_p + del_num > B_NR_ITEMS(cur) + 1
209 || first_i < 0,
210 "first pointer order (%d) < 0 or "
211 "no so many pointers (%d), only (%d) or "
212 "first key order %d < 0", first_p, first_p + del_num,
213 B_NR_ITEMS(cur) + 1, first_i);
214 if (del_num == 0)
215 return;
217 blkh = B_BLK_HEAD(cur);
218 nr = blkh_nr_item(blkh);
220 if (first_p == 0 && del_num == nr + 1) {
221 RFALSE(first_i != 0,
222 "1st deleted key must have order 0, not %d", first_i);
223 make_empty_node(cur_bi);
224 return;
227 RFALSE(first_i + del_num > B_NR_ITEMS(cur),
228 "first_i = %d del_num = %d "
229 "no so many keys (%d) in the node (%b)(%z)",
230 first_i, del_num, first_i + del_num, cur, cur);
232 /* deleting */
233 dc = B_N_CHILD(cur, first_p);
235 memmove(dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
236 key = B_N_PDELIM_KEY(cur, first_i);
237 memmove(key, key + del_num,
238 (nr - first_i - del_num) * KEY_SIZE + (nr + 1 -
239 del_num) * DC_SIZE);
241 /* sizes, item number */
242 set_blkh_nr_item(blkh, blkh_nr_item(blkh) - del_num);
243 set_blkh_free_space(blkh,
244 blkh_free_space(blkh) +
245 (del_num * (KEY_SIZE + DC_SIZE)));
247 do_balance_mark_internal_dirty(cur_bi->tb, cur, 0);
248 /*&&&&&&&&&&&&&&&&&&&&&&& */
249 check_internal(cur);
250 /*&&&&&&&&&&&&&&&&&&&&&&& */
252 if (cur_bi->bi_parent) {
253 struct disk_child *t_dc;
254 t_dc = B_N_CHILD(cur_bi->bi_parent, cur_bi->bi_position);
255 put_dc_size(t_dc,
256 dc_size(t_dc) - (del_num * (KEY_SIZE + DC_SIZE)));
258 do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent,
260 /*&&&&&&&&&&&&&&&&&&&&&&&& */
261 check_internal(cur_bi->bi_parent);
262 /*&&&&&&&&&&&&&&&&&&&&&&&& */
266 /* delete n node pointers and items starting from given position */
267 static void internal_delete_childs(struct buffer_info *cur_bi, int from, int n)
269 int i_from;
271 i_from = (from == 0) ? from : from - 1;
273 /* delete n pointers starting from `from' position in CUR;
274 delete n keys starting from 'i_from' position in CUR;
276 internal_delete_pointers_items(cur_bi, from, i_from, n);
279 /* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
280 * last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
281 * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest
283 static void internal_copy_pointers_items(struct buffer_info *dest_bi,
284 struct buffer_head *src,
285 int last_first, int cpy_num)
287 /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
288 * as delimiting key have already inserted to buffer dest.*/
289 struct buffer_head *dest = dest_bi->bi_bh;
290 int nr_dest, nr_src;
291 int dest_order, src_order;
292 struct block_head *blkh;
293 struct reiserfs_key *key;
294 struct disk_child *dc;
296 nr_src = B_NR_ITEMS(src);
298 RFALSE(dest == NULL || src == NULL,
299 "src (%p) or dest (%p) buffer is 0", src, dest);
300 RFALSE(last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST,
301 "invalid last_first parameter (%d)", last_first);
302 RFALSE(nr_src < cpy_num - 1,
303 "no so many items (%d) in src (%d)", cpy_num, nr_src);
304 RFALSE(cpy_num < 0, "cpy_num less than 0 (%d)", cpy_num);
305 RFALSE(cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest),
306 "cpy_num (%d) + item number in dest (%d) can not be > MAX_NR_KEY(%d)",
307 cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
309 if (cpy_num == 0)
310 return;
312 /* coping */
313 blkh = B_BLK_HEAD(dest);
314 nr_dest = blkh_nr_item(blkh);
316 /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest; */
317 /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0; */
318 (last_first == LAST_TO_FIRST) ? (dest_order = 0, src_order =
319 nr_src - cpy_num + 1) : (dest_order =
320 nr_dest,
321 src_order =
324 /* prepare space for cpy_num pointers */
325 dc = B_N_CHILD(dest, dest_order);
327 memmove(dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
329 /* insert pointers */
330 memcpy(dc, B_N_CHILD(src, src_order), DC_SIZE * cpy_num);
332 /* prepare space for cpy_num - 1 item headers */
333 key = B_N_PDELIM_KEY(dest, dest_order);
334 memmove(key + cpy_num - 1, key,
335 KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest +
336 cpy_num));
338 /* insert headers */
339 memcpy(key, B_N_PDELIM_KEY(src, src_order), KEY_SIZE * (cpy_num - 1));
341 /* sizes, item number */
342 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + (cpy_num - 1));
343 set_blkh_free_space(blkh,
344 blkh_free_space(blkh) - (KEY_SIZE * (cpy_num - 1) +
345 DC_SIZE * cpy_num));
347 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
349 /*&&&&&&&&&&&&&&&&&&&&&&&& */
350 check_internal(dest);
351 /*&&&&&&&&&&&&&&&&&&&&&&&& */
353 if (dest_bi->bi_parent) {
354 struct disk_child *t_dc;
355 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
356 put_dc_size(t_dc,
357 dc_size(t_dc) + (KEY_SIZE * (cpy_num - 1) +
358 DC_SIZE * cpy_num));
360 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
362 /*&&&&&&&&&&&&&&&&&&&&&&&& */
363 check_internal(dest_bi->bi_parent);
364 /*&&&&&&&&&&&&&&&&&&&&&&&& */
369 /* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
370 * Delete cpy_num - del_par items and node pointers from buffer src.
371 * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
372 * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
374 static void internal_move_pointers_items(struct buffer_info *dest_bi,
375 struct buffer_info *src_bi,
376 int last_first, int cpy_num,
377 int del_par)
379 int first_pointer;
380 int first_item;
382 internal_copy_pointers_items(dest_bi, src_bi->bi_bh, last_first,
383 cpy_num);
385 if (last_first == FIRST_TO_LAST) { /* shift_left occurs */
386 first_pointer = 0;
387 first_item = 0;
388 /* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer,
389 for key - with first_item */
390 internal_delete_pointers_items(src_bi, first_pointer,
391 first_item, cpy_num - del_par);
392 } else { /* shift_right occurs */
393 int i, j;
395 i = (cpy_num - del_par ==
396 (j =
397 B_NR_ITEMS(src_bi->bi_bh)) + 1) ? 0 : j - cpy_num +
398 del_par;
400 internal_delete_pointers_items(src_bi,
401 j + 1 - cpy_num + del_par, i,
402 cpy_num - del_par);
406 /* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
407 static void internal_insert_key(struct buffer_info *dest_bi, int dest_position_before, /* insert key before key with n_dest number */
408 struct buffer_head *src, int src_position)
410 struct buffer_head *dest = dest_bi->bi_bh;
411 int nr;
412 struct block_head *blkh;
413 struct reiserfs_key *key;
415 RFALSE(dest == NULL || src == NULL,
416 "source(%p) or dest(%p) buffer is 0", src, dest);
417 RFALSE(dest_position_before < 0 || src_position < 0,
418 "source(%d) or dest(%d) key number less than 0",
419 src_position, dest_position_before);
420 RFALSE(dest_position_before > B_NR_ITEMS(dest) ||
421 src_position >= B_NR_ITEMS(src),
422 "invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
423 dest_position_before, B_NR_ITEMS(dest),
424 src_position, B_NR_ITEMS(src));
425 RFALSE(B_FREE_SPACE(dest) < KEY_SIZE,
426 "no enough free space (%d) in dest buffer", B_FREE_SPACE(dest));
428 blkh = B_BLK_HEAD(dest);
429 nr = blkh_nr_item(blkh);
431 /* prepare space for inserting key */
432 key = B_N_PDELIM_KEY(dest, dest_position_before);
433 memmove(key + 1, key,
434 (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
436 /* insert key */
437 memcpy(key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
439 /* Change dirt, free space, item number fields. */
441 set_blkh_nr_item(blkh, blkh_nr_item(blkh) + 1);
442 set_blkh_free_space(blkh, blkh_free_space(blkh) - KEY_SIZE);
444 do_balance_mark_internal_dirty(dest_bi->tb, dest, 0);
446 if (dest_bi->bi_parent) {
447 struct disk_child *t_dc;
448 t_dc = B_N_CHILD(dest_bi->bi_parent, dest_bi->bi_position);
449 put_dc_size(t_dc, dc_size(t_dc) + KEY_SIZE);
451 do_balance_mark_internal_dirty(dest_bi->tb, dest_bi->bi_parent,
456 /* Insert d_key'th (delimiting) key from buffer cfl to tail of dest.
457 * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
458 * Replace d_key'th key in buffer cfl.
459 * Delete pointer_amount items and node pointers from buffer src.
461 /* this can be invoked both to shift from S to L and from R to S */
462 static void internal_shift_left(int mode, /* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
463 struct tree_balance *tb,
464 int h, int pointer_amount)
466 struct buffer_info dest_bi, src_bi;
467 struct buffer_head *cf;
468 int d_key_position;
470 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
471 &d_key_position, &cf);
473 /*printk("pointer_amount = %d\n",pointer_amount); */
475 if (pointer_amount) {
476 /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
477 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
478 d_key_position);
480 if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
481 if (src_bi.bi_position /*src->b_item_order */ == 0)
482 replace_key(tb, cf, d_key_position,
483 src_bi.
484 bi_parent /*src->b_parent */ , 0);
485 } else
486 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
487 pointer_amount - 1);
489 /* last parameter is del_parameter */
490 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
491 pointer_amount, 0);
495 /* Insert delimiting key to L[h].
496 * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
497 * Delete n - 1 items and node pointers from buffer S[h].
499 /* it always shifts from S[h] to L[h] */
500 static void internal_shift1_left(struct tree_balance *tb,
501 int h, int pointer_amount)
503 struct buffer_info dest_bi, src_bi;
504 struct buffer_head *cf;
505 int d_key_position;
507 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
508 &dest_bi, &src_bi, &d_key_position, &cf);
510 if (pointer_amount > 0) /* insert lkey[h]-th key from CFL[h] to left neighbor L[h] */
511 internal_insert_key(&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf,
512 d_key_position);
513 /* internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]); */
515 /* last parameter is del_parameter */
516 internal_move_pointers_items(&dest_bi, &src_bi, FIRST_TO_LAST,
517 pointer_amount, 1);
518 /* internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1); */
521 /* Insert d_key'th (delimiting) key from buffer cfr to head of dest.
522 * Copy n node pointers and n - 1 items from buffer src to buffer dest.
523 * Replace d_key'th key in buffer cfr.
524 * Delete n items and node pointers from buffer src.
526 static void internal_shift_right(int mode, /* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
527 struct tree_balance *tb,
528 int h, int pointer_amount)
530 struct buffer_info dest_bi, src_bi;
531 struct buffer_head *cf;
532 int d_key_position;
533 int nr;
535 internal_define_dest_src_infos(mode, tb, h, &dest_bi, &src_bi,
536 &d_key_position, &cf);
538 nr = B_NR_ITEMS(src_bi.bi_bh);
540 if (pointer_amount > 0) {
541 /* insert delimiting key from common father of dest and src to dest node into position 0 */
542 internal_insert_key(&dest_bi, 0, cf, d_key_position);
543 if (nr == pointer_amount - 1) {
544 RFALSE(src_bi.bi_bh != PATH_H_PBUFFER(tb->tb_path, h) /*tb->S[h] */ ||
545 dest_bi.bi_bh != tb->R[h],
546 "src (%p) must be == tb->S[h](%p) when it disappears",
547 src_bi.bi_bh, PATH_H_PBUFFER(tb->tb_path, h));
548 /* when S[h] disappers replace left delemiting key as well */
549 if (tb->CFL[h])
550 replace_key(tb, cf, d_key_position, tb->CFL[h],
551 tb->lkey[h]);
552 } else
553 replace_key(tb, cf, d_key_position, src_bi.bi_bh,
554 nr - pointer_amount);
557 /* last parameter is del_parameter */
558 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
559 pointer_amount, 0);
562 /* Insert delimiting key to R[h].
563 * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
564 * Delete n - 1 items and node pointers from buffer S[h].
566 /* it always shift from S[h] to R[h] */
567 static void internal_shift1_right(struct tree_balance *tb,
568 int h, int pointer_amount)
570 struct buffer_info dest_bi, src_bi;
571 struct buffer_head *cf;
572 int d_key_position;
574 internal_define_dest_src_infos(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
575 &dest_bi, &src_bi, &d_key_position, &cf);
577 if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */
578 internal_insert_key(&dest_bi, 0, cf, d_key_position);
579 /* internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]); */
581 /* last parameter is del_parameter */
582 internal_move_pointers_items(&dest_bi, &src_bi, LAST_TO_FIRST,
583 pointer_amount, 1);
584 /* internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1); */
587 /* Delete insert_num node pointers together with their left items
588 * and balance current node.*/
589 static void balance_internal_when_delete(struct tree_balance *tb,
590 int h, int child_pos)
592 int insert_num;
593 int n;
594 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
595 struct buffer_info bi;
597 insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
599 /* delete child-node-pointer(s) together with their left item(s) */
600 bi.tb = tb;
601 bi.bi_bh = tbSh;
602 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
603 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
605 internal_delete_childs(&bi, child_pos, -insert_num);
607 RFALSE(tb->blknum[h] > 1,
608 "tb->blknum[%d]=%d when insert_size < 0", h, tb->blknum[h]);
610 n = B_NR_ITEMS(tbSh);
612 if (tb->lnum[h] == 0 && tb->rnum[h] == 0) {
613 if (tb->blknum[h] == 0) {
614 /* node S[h] (root of the tree) is empty now */
615 struct buffer_head *new_root;
617 RFALSE(n
618 || B_FREE_SPACE(tbSh) !=
619 MAX_CHILD_SIZE(tbSh) - DC_SIZE,
620 "buffer must have only 0 keys (%d)", n);
621 RFALSE(bi.bi_parent, "root has parent (%p)",
622 bi.bi_parent);
624 /* choose a new root */
625 if (!tb->L[h - 1] || !B_NR_ITEMS(tb->L[h - 1]))
626 new_root = tb->R[h - 1];
627 else
628 new_root = tb->L[h - 1];
629 /* switch super block's tree root block number to the new value */
630 PUT_SB_ROOT_BLOCK(tb->tb_sb, new_root->b_blocknr);
631 //REISERFS_SB(tb->tb_sb)->s_rs->s_tree_height --;
632 PUT_SB_TREE_HEIGHT(tb->tb_sb,
633 SB_TREE_HEIGHT(tb->tb_sb) - 1);
635 do_balance_mark_sb_dirty(tb,
636 REISERFS_SB(tb->tb_sb)->s_sbh,
638 /*&&&&&&&&&&&&&&&&&&&&&& */
639 if (h > 1)
640 /* use check_internal if new root is an internal node */
641 check_internal(new_root);
642 /*&&&&&&&&&&&&&&&&&&&&&& */
644 /* do what is needed for buffer thrown from tree */
645 reiserfs_invalidate_buffer(tb, tbSh);
646 return;
648 return;
651 if (tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1) { /* join S[h] with L[h] */
653 RFALSE(tb->rnum[h] != 0,
654 "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
655 h, tb->rnum[h]);
657 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
658 reiserfs_invalidate_buffer(tb, tbSh);
660 return;
663 if (tb->R[h] && tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1) { /* join S[h] with R[h] */
664 RFALSE(tb->lnum[h] != 0,
665 "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
666 h, tb->lnum[h]);
668 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
670 reiserfs_invalidate_buffer(tb, tbSh);
671 return;
674 if (tb->lnum[h] < 0) { /* borrow from left neighbor L[h] */
675 RFALSE(tb->rnum[h] != 0,
676 "wrong tb->rnum[%d]==%d when borrow from L[h]", h,
677 tb->rnum[h]);
678 /*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]); */
679 internal_shift_right(INTERNAL_SHIFT_FROM_L_TO_S, tb, h,
680 -tb->lnum[h]);
681 return;
684 if (tb->rnum[h] < 0) { /* borrow from right neighbor R[h] */
685 RFALSE(tb->lnum[h] != 0,
686 "invalid tb->lnum[%d]==%d when borrow from R[h]",
687 h, tb->lnum[h]);
688 internal_shift_left(INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]); /*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]); */
689 return;
692 if (tb->lnum[h] > 0) { /* split S[h] into two parts and put them into neighbors */
693 RFALSE(tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1,
694 "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
695 h, tb->lnum[h], h, tb->rnum[h], n);
697 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]); /*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]); */
698 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
699 tb->rnum[h]);
701 reiserfs_invalidate_buffer(tb, tbSh);
703 return;
705 reiserfs_panic(tb->tb_sb,
706 "balance_internal_when_delete: unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
707 h, tb->lnum[h], h, tb->rnum[h]);
710 /* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
711 static void replace_lkey(struct tree_balance *tb, int h, struct item_head *key)
713 RFALSE(tb->L[h] == NULL || tb->CFL[h] == NULL,
714 "L[h](%p) and CFL[h](%p) must exist in replace_lkey",
715 tb->L[h], tb->CFL[h]);
717 if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
718 return;
720 memcpy(B_N_PDELIM_KEY(tb->CFL[h], tb->lkey[h]), key, KEY_SIZE);
722 do_balance_mark_internal_dirty(tb, tb->CFL[h], 0);
725 /* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
726 static void replace_rkey(struct tree_balance *tb, int h, struct item_head *key)
728 RFALSE(tb->R[h] == NULL || tb->CFR[h] == NULL,
729 "R[h](%p) and CFR[h](%p) must exist in replace_rkey",
730 tb->R[h], tb->CFR[h]);
731 RFALSE(B_NR_ITEMS(tb->R[h]) == 0,
732 "R[h] can not be empty if it exists (item number=%d)",
733 B_NR_ITEMS(tb->R[h]));
735 memcpy(B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]), key, KEY_SIZE);
737 do_balance_mark_internal_dirty(tb, tb->CFR[h], 0);
740 int balance_internal(struct tree_balance *tb, /* tree_balance structure */
741 int h, /* level of the tree */
742 int child_pos, struct item_head *insert_key, /* key for insertion on higher level */
743 struct buffer_head **insert_ptr /* node for insertion on higher level */
745 /* if inserting/pasting
747 child_pos is the position of the node-pointer in S[h] that *
748 pointed to S[h-1] before balancing of the h-1 level; *
749 this means that new pointers and items must be inserted AFTER *
750 child_pos
752 else
754 it is the position of the leftmost pointer that must be deleted (together with
755 its corresponding key to the left of the pointer)
756 as a result of the previous level's balancing.
760 struct buffer_head *tbSh = PATH_H_PBUFFER(tb->tb_path, h);
761 struct buffer_info bi;
762 int order; /* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
763 int insert_num, n, k;
764 struct buffer_head *S_new;
765 struct item_head new_insert_key;
766 struct buffer_head *new_insert_ptr = NULL;
767 struct item_head *new_insert_key_addr = insert_key;
769 RFALSE(h < 1, "h (%d) can not be < 1 on internal level", h);
771 PROC_INFO_INC(tb->tb_sb, balance_at[h]);
773 order =
774 (tbSh) ? PATH_H_POSITION(tb->tb_path,
775 h + 1) /*tb->S[h]->b_item_order */ : 0;
777 /* Using insert_size[h] calculate the number insert_num of items
778 that must be inserted to or deleted from S[h]. */
779 insert_num = tb->insert_size[h] / ((int)(KEY_SIZE + DC_SIZE));
781 /* Check whether insert_num is proper * */
782 RFALSE(insert_num < -2 || insert_num > 2,
783 "incorrect number of items inserted to the internal node (%d)",
784 insert_num);
785 RFALSE(h > 1 && (insert_num > 1 || insert_num < -1),
786 "incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level",
787 insert_num, h);
789 /* Make balance in case insert_num < 0 */
790 if (insert_num < 0) {
791 balance_internal_when_delete(tb, h, child_pos);
792 return order;
795 k = 0;
796 if (tb->lnum[h] > 0) {
797 /* shift lnum[h] items from S[h] to the left neighbor L[h].
798 check how many of new items fall into L[h] or CFL[h] after
799 shifting */
800 n = B_NR_ITEMS(tb->L[h]); /* number of items in L[h] */
801 if (tb->lnum[h] <= child_pos) {
802 /* new items don't fall into L[h] or CFL[h] */
803 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
804 tb->lnum[h]);
805 /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]); */
806 child_pos -= tb->lnum[h];
807 } else if (tb->lnum[h] > child_pos + insert_num) {
808 /* all new items fall into L[h] */
809 internal_shift_left(INTERNAL_SHIFT_FROM_S_TO_L, tb, h,
810 tb->lnum[h] - insert_num);
811 /* internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
812 tb->lnum[h]-insert_num);
814 /* insert insert_num keys and node-pointers into L[h] */
815 bi.tb = tb;
816 bi.bi_bh = tb->L[h];
817 bi.bi_parent = tb->FL[h];
818 bi.bi_position = get_left_neighbor_position(tb, h);
819 internal_insert_childs(&bi,
820 /*tb->L[h], tb->S[h-1]->b_next */
821 n + child_pos + 1,
822 insert_num, insert_key,
823 insert_ptr);
825 insert_num = 0;
826 } else {
827 struct disk_child *dc;
829 /* some items fall into L[h] or CFL[h], but some don't fall */
830 internal_shift1_left(tb, h, child_pos + 1);
831 /* calculate number of new items that fall into L[h] */
832 k = tb->lnum[h] - child_pos - 1;
833 bi.tb = tb;
834 bi.bi_bh = tb->L[h];
835 bi.bi_parent = tb->FL[h];
836 bi.bi_position = get_left_neighbor_position(tb, h);
837 internal_insert_childs(&bi,
838 /*tb->L[h], tb->S[h-1]->b_next, */
839 n + child_pos + 1, k,
840 insert_key, insert_ptr);
842 replace_lkey(tb, h, insert_key + k);
844 /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
845 dc = B_N_CHILD(tbSh, 0);
846 put_dc_size(dc,
847 MAX_CHILD_SIZE(insert_ptr[k]) -
848 B_FREE_SPACE(insert_ptr[k]));
849 put_dc_block_number(dc, insert_ptr[k]->b_blocknr);
851 do_balance_mark_internal_dirty(tb, tbSh, 0);
853 k++;
854 insert_key += k;
855 insert_ptr += k;
856 insert_num -= k;
857 child_pos = 0;
860 /* tb->lnum[h] > 0 */
861 if (tb->rnum[h] > 0) {
862 /*shift rnum[h] items from S[h] to the right neighbor R[h] */
863 /* check how many of new items fall into R or CFR after shifting */
864 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
865 if (n - tb->rnum[h] >= child_pos)
866 /* new items fall into S[h] */
867 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]); */
868 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
869 tb->rnum[h]);
870 else if (n + insert_num - tb->rnum[h] < child_pos) {
871 /* all new items fall into R[h] */
872 /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
873 tb->rnum[h] - insert_num); */
874 internal_shift_right(INTERNAL_SHIFT_FROM_S_TO_R, tb, h,
875 tb->rnum[h] - insert_num);
877 /* insert insert_num keys and node-pointers into R[h] */
878 bi.tb = tb;
879 bi.bi_bh = tb->R[h];
880 bi.bi_parent = tb->FR[h];
881 bi.bi_position = get_right_neighbor_position(tb, h);
882 internal_insert_childs(&bi,
883 /*tb->R[h],tb->S[h-1]->b_next */
884 child_pos - n - insert_num +
885 tb->rnum[h] - 1,
886 insert_num, insert_key,
887 insert_ptr);
888 insert_num = 0;
889 } else {
890 struct disk_child *dc;
892 /* one of the items falls into CFR[h] */
893 internal_shift1_right(tb, h, n - child_pos + 1);
894 /* calculate number of new items that fall into R[h] */
895 k = tb->rnum[h] - n + child_pos - 1;
896 bi.tb = tb;
897 bi.bi_bh = tb->R[h];
898 bi.bi_parent = tb->FR[h];
899 bi.bi_position = get_right_neighbor_position(tb, h);
900 internal_insert_childs(&bi,
901 /*tb->R[h], tb->R[h]->b_child, */
902 0, k, insert_key + 1,
903 insert_ptr + 1);
905 replace_rkey(tb, h, insert_key + insert_num - k - 1);
907 /* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1] */
908 dc = B_N_CHILD(tb->R[h], 0);
909 put_dc_size(dc,
910 MAX_CHILD_SIZE(insert_ptr
911 [insert_num - k - 1]) -
912 B_FREE_SPACE(insert_ptr
913 [insert_num - k - 1]));
914 put_dc_block_number(dc,
915 insert_ptr[insert_num - k -
916 1]->b_blocknr);
918 do_balance_mark_internal_dirty(tb, tb->R[h], 0);
920 insert_num -= (k + 1);
924 /** Fill new node that appears instead of S[h] **/
925 RFALSE(tb->blknum[h] > 2, "blknum can not be > 2 for internal level");
926 RFALSE(tb->blknum[h] < 0, "blknum can not be < 0");
928 if (!tb->blknum[h]) { /* node S[h] is empty now */
929 RFALSE(!tbSh, "S[h] is equal NULL");
931 /* do what is needed for buffer thrown from tree */
932 reiserfs_invalidate_buffer(tb, tbSh);
933 return order;
936 if (!tbSh) {
937 /* create new root */
938 struct disk_child *dc;
939 struct buffer_head *tbSh_1 = PATH_H_PBUFFER(tb->tb_path, h - 1);
940 struct block_head *blkh;
942 if (tb->blknum[h] != 1)
943 reiserfs_panic(NULL,
944 "balance_internal: One new node required for creating the new root");
945 /* S[h] = empty buffer from the list FEB. */
946 tbSh = get_FEB(tb);
947 blkh = B_BLK_HEAD(tbSh);
948 set_blkh_level(blkh, h + 1);
950 /* Put the unique node-pointer to S[h] that points to S[h-1]. */
952 dc = B_N_CHILD(tbSh, 0);
953 put_dc_block_number(dc, tbSh_1->b_blocknr);
954 put_dc_size(dc,
955 (MAX_CHILD_SIZE(tbSh_1) - B_FREE_SPACE(tbSh_1)));
957 tb->insert_size[h] -= DC_SIZE;
958 set_blkh_free_space(blkh, blkh_free_space(blkh) - DC_SIZE);
960 do_balance_mark_internal_dirty(tb, tbSh, 0);
962 /*&&&&&&&&&&&&&&&&&&&&&&&& */
963 check_internal(tbSh);
964 /*&&&&&&&&&&&&&&&&&&&&&&&& */
966 /* put new root into path structure */
967 PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) =
968 tbSh;
970 /* Change root in structure super block. */
971 PUT_SB_ROOT_BLOCK(tb->tb_sb, tbSh->b_blocknr);
972 PUT_SB_TREE_HEIGHT(tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1);
973 do_balance_mark_sb_dirty(tb, REISERFS_SB(tb->tb_sb)->s_sbh, 1);
976 if (tb->blknum[h] == 2) {
977 int snum;
978 struct buffer_info dest_bi, src_bi;
980 /* S_new = free buffer from list FEB */
981 S_new = get_FEB(tb);
983 set_blkh_level(B_BLK_HEAD(S_new), h + 1);
985 dest_bi.tb = tb;
986 dest_bi.bi_bh = S_new;
987 dest_bi.bi_parent = NULL;
988 dest_bi.bi_position = 0;
989 src_bi.tb = tb;
990 src_bi.bi_bh = tbSh;
991 src_bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
992 src_bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
994 n = B_NR_ITEMS(tbSh); /* number of items in S[h] */
995 snum = (insert_num + n + 1) / 2;
996 if (n - snum >= child_pos) {
997 /* new items don't fall into S_new */
998 /* store the delimiting key for the next level */
999 /* new_insert_key = (n - snum)'th key in S[h] */
1000 memcpy(&new_insert_key, B_N_PDELIM_KEY(tbSh, n - snum),
1001 KEY_SIZE);
1002 /* last parameter is del_par */
1003 internal_move_pointers_items(&dest_bi, &src_bi,
1004 LAST_TO_FIRST, snum, 0);
1005 /* internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0); */
1006 } else if (n + insert_num - snum < child_pos) {
1007 /* all new items fall into S_new */
1008 /* store the delimiting key for the next level */
1009 /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
1010 memcpy(&new_insert_key,
1011 B_N_PDELIM_KEY(tbSh, n + insert_num - snum),
1012 KEY_SIZE);
1013 /* last parameter is del_par */
1014 internal_move_pointers_items(&dest_bi, &src_bi,
1015 LAST_TO_FIRST,
1016 snum - insert_num, 0);
1017 /* internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0); */
1019 /* insert insert_num keys and node-pointers into S_new */
1020 internal_insert_childs(&dest_bi,
1021 /*S_new,tb->S[h-1]->b_next, */
1022 child_pos - n - insert_num +
1023 snum - 1,
1024 insert_num, insert_key,
1025 insert_ptr);
1027 insert_num = 0;
1028 } else {
1029 struct disk_child *dc;
1031 /* some items fall into S_new, but some don't fall */
1032 /* last parameter is del_par */
1033 internal_move_pointers_items(&dest_bi, &src_bi,
1034 LAST_TO_FIRST,
1035 n - child_pos + 1, 1);
1036 /* internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1); */
1037 /* calculate number of new items that fall into S_new */
1038 k = snum - n + child_pos - 1;
1040 internal_insert_childs(&dest_bi, /*S_new, */ 0, k,
1041 insert_key + 1, insert_ptr + 1);
1043 /* new_insert_key = insert_key[insert_num - k - 1] */
1044 memcpy(&new_insert_key, insert_key + insert_num - k - 1,
1045 KEY_SIZE);
1046 /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
1048 dc = B_N_CHILD(S_new, 0);
1049 put_dc_size(dc,
1050 (MAX_CHILD_SIZE
1051 (insert_ptr[insert_num - k - 1]) -
1052 B_FREE_SPACE(insert_ptr
1053 [insert_num - k - 1])));
1054 put_dc_block_number(dc,
1055 insert_ptr[insert_num - k -
1056 1]->b_blocknr);
1058 do_balance_mark_internal_dirty(tb, S_new, 0);
1060 insert_num -= (k + 1);
1062 /* new_insert_ptr = node_pointer to S_new */
1063 new_insert_ptr = S_new;
1065 RFALSE(!buffer_journaled(S_new) || buffer_journal_dirty(S_new)
1066 || buffer_dirty(S_new), "cm-00001: bad S_new (%b)",
1067 S_new);
1069 // S_new is released in unfix_nodes
1072 n = B_NR_ITEMS(tbSh); /*number of items in S[h] */
1074 if (0 <= child_pos && child_pos <= n && insert_num > 0) {
1075 bi.tb = tb;
1076 bi.bi_bh = tbSh;
1077 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, h);
1078 bi.bi_position = PATH_H_POSITION(tb->tb_path, h + 1);
1079 internal_insert_childs(&bi, /*tbSh, */
1080 /* ( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next : tb->S[h]->b_child->b_next, */
1081 child_pos, insert_num, insert_key,
1082 insert_ptr);
1085 memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
1086 insert_ptr[0] = new_insert_ptr;
1088 return order;