2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)hash_page.c 10.55 (Sleepycat) 1/3/99";
57 * Page manipulation for hashing package.
69 #ifndef NO_SYSTEM_INCLUDES
70 #include <sys/types.h>
80 static int __ham_lock_bucket
__P((DBC
*, db_lockmode_t
));
83 static void __account_page(DB
*, db_pgno_t
, int);
87 * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t));
100 hcp
= (HASH_CURSOR
*)dbc
->internal
;
102 if (F_ISSET(hcp
, H_DELETED
))
104 F_CLR(hcp
, H_OK
| H_NOMORE
);
106 /* Check if we need to get a page for this cursor. */
107 if ((ret
= __ham_get_cpage(dbc
, mode
)) != 0)
110 /* Check if we are looking for space in which to insert an item. */
111 if (hcp
->seek_size
&& hcp
->seek_found_page
== PGNO_INVALID
112 && hcp
->seek_size
< P_FREESPACE(hcp
->pagep
))
113 hcp
->seek_found_page
= hcp
->pgno
;
115 /* Check if we need to go on to the next page. */
116 if (F_ISSET(hcp
, H_ISDUP
) && hcp
->dpgno
== PGNO_INVALID
)
118 * ISDUP is set, and offset is at the beginning of the datum.
119 * We need to grab the length of the datum, then set the datum
120 * pointer to be the beginning of the datum.
122 memcpy(&hcp
->dup_len
,
123 HKEYDATA_DATA(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
)) +
124 hcp
->dup_off
, sizeof(db_indx_t
));
125 else if (F_ISSET(hcp
, H_ISDUP
)) {
126 /* Make sure we're not about to run off the page. */
127 if (hcp
->dpagep
== NULL
&& (ret
= __ham_get_page(dbp
,
128 hcp
->dpgno
, &hcp
->dpagep
)) != 0)
131 if (hcp
->dndx
>= NUM_ENT(hcp
->dpagep
)) {
132 if (NEXT_PGNO(hcp
->dpagep
) == PGNO_INVALID
) {
133 if (F_ISSET(hcp
, H_DUPONLY
)) {
135 F_SET(hcp
, H_NOMORE
);
138 if ((ret
= __ham_put_page(dbp
,
139 hcp
->dpagep
, 0)) != 0)
143 hcp
->dpgno
= PGNO_INVALID
;
144 hcp
->dndx
= NDX_INVALID
;
146 } else if ((ret
= __ham_next_cpage(dbc
,
147 NEXT_PGNO(hcp
->dpagep
), 0, H_ISDUP
)) != 0)
152 if (hcp
->bndx
>= (db_indx_t
)H_NUMPAIRS(hcp
->pagep
)) {
153 /* Fetch next page. */
154 if (NEXT_PGNO(hcp
->pagep
) == PGNO_INVALID
) {
155 F_SET(hcp
, H_NOMORE
);
156 if (hcp
->dpagep
!= NULL
&&
157 (ret
= __ham_put_page(dbp
, hcp
->dpagep
, 0)) != 0)
159 hcp
->dpgno
= PGNO_INVALID
;
160 return (DB_NOTFOUND
);
162 next_pgno
= NEXT_PGNO(hcp
->pagep
);
164 if ((ret
= __ham_next_cpage(dbc
, next_pgno
, 0, 0)) != 0)
173 * PUBLIC: int __ham_item_reset __P((DBC *));
176 __ham_item_reset(dbc
)
185 hcp
= (HASH_CURSOR
*)dbc
->internal
;
186 if (hcp
->pagep
!= NULL
)
187 ret
= __ham_put_page(dbp
, hcp
->pagep
, 0);
188 if (ret
== 0 && hcp
->dpagep
!= NULL
)
189 ret
= __ham_put_page(dbp
, hcp
->dpagep
, 0);
191 __ham_item_init(hcp
);
196 * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
203 * If this cursor still holds any locks, we must
204 * release them if we are not running with transactions.
206 if (hcp
->lock
&& hcp
->dbc
->txn
== NULL
)
207 (void)lock_put(hcp
->dbc
->dbp
->dbenv
->lk_info
, hcp
->lock
);
210 * The following fields must *not* be initialized here
211 * because they may have meaning across inits.
212 * hlock, hdr, split_buf, stats
214 hcp
->bucket
= BUCKET_INVALID
;
215 hcp
->lbucket
= BUCKET_INVALID
;
218 hcp
->pgno
= PGNO_INVALID
;
219 hcp
->bndx
= NDX_INVALID
;
221 hcp
->dpgno
= PGNO_INVALID
;
222 hcp
->dndx
= NDX_INVALID
;
227 hcp
->seek_found_page
= PGNO_INVALID
;
232 * PUBLIC: int __ham_item_done __P((DBC *, int));
235 __ham_item_done(dbc
, dirty
)
244 hcp
= (HASH_CURSOR
*)dbc
->internal
;
248 ret
= __ham_put_page(dbp
, hcp
->pagep
,
249 dirty
&& hcp
->dpagep
== NULL
);
253 t_ret
= __ham_put_page(dbp
, hcp
->dpagep
, dirty
);
256 if (ret
== 0 && t_ret
!= 0)
260 * We don't throw out the page number since we might want to
261 * continue getting on this page.
263 return (ret
!= 0 ? ret
: t_ret
);
267 * Returns the last item in a bucket.
269 * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t));
272 __ham_item_last(dbc
, mode
)
279 hcp
= (HASH_CURSOR
*)dbc
->internal
;
280 if ((ret
= __ham_item_reset(dbc
)) != 0)
283 hcp
->bucket
= hcp
->hdr
->max_bucket
;
285 return (__ham_item_prev(dbc
, mode
));
289 * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t));
292 __ham_item_first(dbc
, mode
)
299 hcp
= (HASH_CURSOR
*)dbc
->internal
;
300 if ((ret
= __ham_item_reset(dbc
)) != 0)
304 return (__ham_item_next(dbc
, mode
));
309 * Returns a pointer to key/data pair on a page. In the case of
310 * bigkeys, just returns the page number and index of the bigkey
313 * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t));
316 __ham_item_prev(dbc
, mode
)
326 hcp
= (HASH_CURSOR
*)dbc
->internal
;
328 * There are N cases for backing up in a hash file.
329 * Case 1: In the middle of a page, no duplicates, just dec the index.
330 * Case 2: In the middle of a duplicate set, back up one.
331 * Case 3: At the beginning of a duplicate set, get out of set and
332 * back up to next key.
333 * Case 4: At the beginning of a page; go to previous page.
334 * Case 5: At the beginning of a bucket; go to prev bucket.
336 F_CLR(hcp
, H_OK
| H_NOMORE
| H_DELETED
);
339 * First handle the duplicates. Either you'll get the key here
340 * or you'll exit the duplicate set and drop into the code below
341 * to handle backing up through keys.
343 if (F_ISSET(hcp
, H_ISDUP
)) {
344 if (hcp
->dpgno
== PGNO_INVALID
) {
345 /* Duplicates are on-page. */
346 if (hcp
->dup_off
!= 0)
347 if ((ret
= __ham_get_cpage(dbc
, mode
)) != 0)
352 memcpy(&h
->dup_len
, HKEYDATA_DATA(
353 H_PAIRDATA(h
->pagep
, h
->bndx
))
354 + h
->dup_off
- sizeof(db_indx_t
),
357 DUP_SIZE(hcp
->dup_len
);
359 return (__ham_item(dbc
, mode
));
361 } else if (hcp
->dndx
> 0) { /* Duplicates are off-page. */
363 return (__ham_item(dbc
, mode
));
364 } else if ((ret
= __ham_get_cpage(dbc
, mode
)) != 0)
366 else if (PREV_PGNO(hcp
->dpagep
) == PGNO_INVALID
) {
367 if (F_ISSET(hcp
, H_DUPONLY
)) {
369 F_SET(hcp
, H_NOMORE
);
372 F_CLR(hcp
, H_ISDUP
); /* End of dups */
373 hcp
->dpgno
= PGNO_INVALID
;
374 if (hcp
->dpagep
!= NULL
)
375 (void)__ham_put_page(dbp
,
379 } else if ((ret
= __ham_next_cpage(dbc
,
380 PREV_PGNO(hcp
->dpagep
), 0, H_ISDUP
)) != 0)
383 hcp
->dndx
= NUM_ENT(hcp
->pagep
) - 1;
384 return (__ham_item(dbc
, mode
));
389 * If we get here, we are not in a duplicate set, and just need
390 * to back up the cursor. There are still three cases:
391 * midpage, beginning of page, beginning of bucket.
394 if (F_ISSET(hcp
, H_DUPONLY
)) {
396 F_SET(hcp
, H_NOMORE
);
400 if (hcp
->bndx
== 0) { /* Beginning of page. */
401 if ((ret
= __ham_get_cpage(dbc
, mode
)) != 0)
403 hcp
->pgno
= PREV_PGNO(hcp
->pagep
);
404 if (hcp
->pgno
== PGNO_INVALID
) {
405 /* Beginning of bucket. */
406 F_SET(hcp
, H_NOMORE
);
407 return (DB_NOTFOUND
);
409 __ham_next_cpage(dbc
, hcp
->pgno
, 0, 0)) != 0)
412 hcp
->bndx
= H_NUMPAIRS(hcp
->pagep
);
416 * Either we've got the cursor set up to be decremented, or we
417 * have to find the end of a bucket.
419 if (hcp
->bndx
== NDX_INVALID
) {
420 if (hcp
->pagep
== NULL
)
421 next_pgno
= BUCKET_TO_PAGE(hcp
, hcp
->bucket
);
426 if ((ret
= __ham_next_cpage(dbc
, next_pgno
, 0, 0)) != 0)
428 got_page
: next_pgno
= NEXT_PGNO(hcp
->pagep
);
429 hcp
->bndx
= H_NUMPAIRS(hcp
->pagep
);
430 } while (next_pgno
!= PGNO_INVALID
);
432 if (hcp
->bndx
== 0) {
433 /* Bucket was empty. */
434 F_SET(hcp
, H_NOMORE
);
435 return (DB_NOTFOUND
);
441 return (__ham_item(dbc
, mode
));
445 * Sets the cursor to the next key/data pair on a page.
447 * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t));
450 __ham_item_next(dbc
, mode
)
456 hcp
= (HASH_CURSOR
*)dbc
->internal
;
458 * Deleted on-page duplicates are a weird case. If we delete the last
459 * one, then our cursor is at the very end of a duplicate set and
460 * we actually need to go on to the next key.
462 if (F_ISSET(hcp
, H_DELETED
)) {
463 if (hcp
->bndx
!= NDX_INVALID
&&
464 F_ISSET(hcp
, H_ISDUP
) &&
465 hcp
->dpgno
== PGNO_INVALID
&&
466 hcp
->dup_tlen
== hcp
->dup_off
) {
467 if (F_ISSET(hcp
, H_DUPONLY
)) {
469 F_SET(hcp
, H_NOMORE
);
473 hcp
->dpgno
= PGNO_INVALID
;
476 } else if (!F_ISSET(hcp
, H_ISDUP
) &&
477 F_ISSET(hcp
, H_DUPONLY
)) {
479 F_SET(hcp
, H_NOMORE
);
482 F_CLR(hcp
, H_DELETED
);
483 } else if (hcp
->bndx
== NDX_INVALID
) {
485 hcp
->dpgno
= PGNO_INVALID
;
487 } else if (F_ISSET(hcp
, H_ISDUP
) && hcp
->dpgno
!= PGNO_INVALID
)
489 else if (F_ISSET(hcp
, H_ISDUP
)) {
490 if (hcp
->dup_off
+ DUP_SIZE(hcp
->dup_len
) >=
491 hcp
->dup_tlen
&& F_ISSET(hcp
, H_DUPONLY
)) {
493 F_SET(hcp
, H_NOMORE
);
497 hcp
->dup_off
+= DUP_SIZE(hcp
->dup_len
);
498 if (hcp
->dup_off
>= hcp
->dup_tlen
) {
500 hcp
->dpgno
= PGNO_INVALID
;
503 } else if (F_ISSET(hcp
, H_DUPONLY
)) {
505 F_SET(hcp
, H_NOMORE
);
510 return (__ham_item(dbc
, mode
));
514 * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
516 * This is a little bit sleazy in that we're overloading the meaning
517 * of the H_OFFPAGE type here. When we recover deletes, we have the
518 * entire entry instead of having only the DBT, so we'll pass type
519 * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
520 * an H_KEYDATA around it.
523 __ham_putitem(p
, dbt
, type
)
532 /* Put the item element on the page. */
533 if (type
== H_OFFPAGE
) {
534 off
= HOFFSET(p
) - dbt
->size
;
535 HOFFSET(p
) = p
->inp
[n
] = off
;
536 memcpy(P_ENTRY(p
, n
), dbt
->data
, dbt
->size
);
538 off
= HOFFSET(p
) - HKEYDATA_SIZE(dbt
->size
);
539 HOFFSET(p
) = p
->inp
[n
] = off
;
540 PUT_HKEYDATA(P_ENTRY(p
, n
), dbt
->data
, dbt
->size
, type
);
543 /* Adjust page info. */
548 * PUBLIC: void __ham_reputpair
549 * PUBLIC: __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
551 * This is a special case to restore a key/data pair to its original
552 * location during recovery. We are guaranteed that the pair fits
553 * on the page and is not the last pair on the page (because if it's
554 * the last pair, the normal insert works).
557 __ham_reputpair(p
, psize
, ndx
, key
, data
)
559 u_int32_t psize
, ndx
;
560 const DBT
*key
, *data
;
562 db_indx_t i
, movebytes
, newbytes
;
565 /* First shuffle the existing items up on the page. */
567 (ndx
== 0 ? psize
: p
->inp
[H_DATAINDEX(ndx
- 1)]) - HOFFSET(p
);
568 newbytes
= key
->size
+ data
->size
;
569 from
= (u_int8_t
*)p
+ HOFFSET(p
);
570 memmove(from
- newbytes
, from
, movebytes
);
573 * Adjust the indices and move them up 2 spaces. Note that we
574 * have to check the exit condition inside the loop just in case
575 * we are dealing with index 0 (db_indx_t's are unsigned).
577 for (i
= NUM_ENT(p
) - 1; ; i
-- ) {
578 p
->inp
[i
+ 2] = p
->inp
[i
] - newbytes
;
579 if (i
== H_KEYINDEX(ndx
))
583 /* Put the key and data on the page. */
584 p
->inp
[H_KEYINDEX(ndx
)] =
585 (ndx
== 0 ? psize
: p
->inp
[H_DATAINDEX(ndx
- 1)]) - key
->size
;
586 p
->inp
[H_DATAINDEX(ndx
)] = p
->inp
[H_KEYINDEX(ndx
)] - data
->size
;
587 memcpy(P_ENTRY(p
, H_KEYINDEX(ndx
)), key
->data
, key
->size
);
588 memcpy(P_ENTRY(p
, H_DATAINDEX(ndx
)), data
->data
, data
->size
);
590 /* Adjust page info. */
591 HOFFSET(p
) -= newbytes
;
597 * PUBLIC: int __ham_del_pair __P((DBC *, int));
600 __ham_del_pair(dbc
, reclaim_page
)
606 DBT data_dbt
, key_dbt
;
608 DB_LSN new_lsn
, *n_lsn
, tmp_lsn
;
611 db_pgno_t chg_pgno
, pgno
;
615 hcp
= (HASH_CURSOR
*)dbc
->internal
;
619 if (hcp
->pagep
== NULL
&&
620 (ret
= __ham_get_page(dbp
, hcp
->pgno
, &hcp
->pagep
)) != 0)
626 * We optimize for the normal case which is when neither the key nor
627 * the data are large. In this case, we write a single log record
628 * and do the delete. If either is large, we'll call __big_delete
629 * to remove the big item and then update the page to remove the
630 * entry referring to the big item.
633 if (HPAGE_PTYPE(H_PAIRKEY(p
, ndx
)) == H_OFFPAGE
) {
634 memcpy(&pgno
, HOFFPAGE_PGNO(P_ENTRY(p
, H_KEYINDEX(ndx
))),
636 ret
= __db_doff(dbc
, pgno
, __ham_del_page
);
640 switch (HPAGE_PTYPE(H_PAIRDATA(p
, ndx
))) {
643 HOFFPAGE_PGNO(P_ENTRY(p
, H_DATAINDEX(ndx
))),
645 ret
= __db_doff(dbc
, pgno
, __ham_del_page
);
649 HOFFDUP_PGNO(P_ENTRY(p
, H_DATAINDEX(ndx
))),
651 ret
= __db_ddup(dbc
, pgno
, __ham_del_page
);
656 * If we delete a pair that is/was a duplicate, then
657 * we had better clear the flag so that we update the
658 * cursor appropriately.
667 /* Now log the delete off this page. */
668 if (DB_LOGGING(dbc
)) {
669 key_dbt
.data
= P_ENTRY(p
, H_KEYINDEX(ndx
));
671 LEN_HITEM(p
, hcp
->hdr
->pagesize
, H_KEYINDEX(ndx
));
672 data_dbt
.data
= P_ENTRY(p
, H_DATAINDEX(ndx
));
674 LEN_HITEM(p
, hcp
->hdr
->pagesize
, H_DATAINDEX(ndx
));
676 if ((ret
= __ham_insdel_log(dbenv
->lg_info
,
677 dbc
->txn
, &new_lsn
, 0, DELPAIR
,
678 dbp
->log_fileid
, PGNO(p
), (u_int32_t
)ndx
,
679 &LSN(p
), &key_dbt
, &data_dbt
)) != 0)
682 /* Move lsn onto page. */
686 __ham_dpair(dbp
, p
, ndx
);
689 * If we are locking, we will not maintain this, because it is
691 * XXX perhaps we can retain incremental numbers and apply them
694 if (!F_ISSET(dbp
, DB_AM_LOCKING
))
698 * If we need to reclaim the page, then check if the page is empty.
699 * There are two cases. If it's empty and it's not the first page
700 * in the bucket (i.e., the bucket page) then we can simply remove
701 * it. If it is the first chain in the bucket, then we need to copy
702 * the second page into it and remove the second page.
704 if (reclaim_page
&& NUM_ENT(p
) == 0 && PREV_PGNO(p
) == PGNO_INVALID
&&
705 NEXT_PGNO(p
) != PGNO_INVALID
) {
706 PAGE
*n_pagep
, *nn_pagep
;
710 * First page in chain is empty and we know that there
711 * are more pages in the chain.
714 __ham_get_page(dbp
, NEXT_PGNO(p
), &n_pagep
)) != 0)
717 if (NEXT_PGNO(n_pagep
) != PGNO_INVALID
) {
719 __ham_get_page(dbp
, NEXT_PGNO(n_pagep
),
721 (void) __ham_put_page(dbp
, n_pagep
, 0);
726 if (DB_LOGGING(dbc
)) {
727 key_dbt
.data
= n_pagep
;
728 key_dbt
.size
= hcp
->hdr
->pagesize
;
729 if ((ret
= __ham_copypage_log(dbenv
->lg_info
,
730 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
, PGNO(p
),
731 &LSN(p
), PGNO(n_pagep
), &LSN(n_pagep
),
733 NEXT_PGNO(n_pagep
) == PGNO_INVALID
? NULL
:
734 &LSN(nn_pagep
), &key_dbt
)) != 0)
737 /* Move lsn onto page. */
738 LSN(p
) = new_lsn
; /* Structure assignment. */
739 LSN(n_pagep
) = new_lsn
;
740 if (NEXT_PGNO(n_pagep
) != PGNO_INVALID
)
741 LSN(nn_pagep
) = new_lsn
;
743 if (NEXT_PGNO(n_pagep
) != PGNO_INVALID
) {
744 PREV_PGNO(nn_pagep
) = PGNO(p
);
745 (void)__ham_put_page(dbp
, nn_pagep
, 1);
750 memcpy(p
, n_pagep
, hcp
->hdr
->pagesize
);
753 PREV_PGNO(p
) = PGNO_INVALID
;
756 * Cursor is advanced to the beginning of the next page.
760 F_SET(hcp
, H_DELETED
);
762 if ((ret
= __ham_dirty_page(dbp
, p
)) != 0 ||
763 (ret
= __ham_del_page(dbc
, n_pagep
)) != 0)
765 } else if (reclaim_page
&&
766 NUM_ENT(p
) == 0 && PREV_PGNO(p
) != PGNO_INVALID
) {
767 PAGE
*n_pagep
, *p_pagep
;
770 __ham_get_page(dbp
, PREV_PGNO(p
), &p_pagep
)) != 0)
773 if (NEXT_PGNO(p
) != PGNO_INVALID
) {
774 if ((ret
= __ham_get_page(dbp
,
775 NEXT_PGNO(p
), &n_pagep
)) != 0) {
776 (void)__ham_put_page(dbp
, p_pagep
, 0);
779 n_lsn
= &LSN(n_pagep
);
785 NEXT_PGNO(p_pagep
) = NEXT_PGNO(p
);
787 PREV_PGNO(n_pagep
) = PGNO(p_pagep
);
789 if (DB_LOGGING(dbc
)) {
790 if ((ret
= __ham_newpage_log(dbenv
->lg_info
,
791 dbc
->txn
, &new_lsn
, 0, DELOVFL
,
792 dbp
->log_fileid
, PREV_PGNO(p
), &LSN(p_pagep
),
793 PGNO(p
), &LSN(p
), NEXT_PGNO(p
), n_lsn
)) != 0)
796 /* Move lsn onto page. */
797 LSN(p_pagep
) = new_lsn
; /* Structure assignment. */
799 LSN(n_pagep
) = new_lsn
;
802 hcp
->pgno
= NEXT_PGNO(p
);
805 * Since we are about to delete the cursor page and we have
806 * just moved the cursor, we need to make sure that the
807 * old page pointer isn't left hanging around in the cursor.
811 ret
= __ham_del_page(dbc
, p
);
812 if ((tret
= __ham_put_page(dbp
, p_pagep
, 1)) != 0 &&
815 if (n_pagep
!= NULL
&&
816 (tret
= __ham_put_page(dbp
, n_pagep
, 1)) != 0 &&
823 * Mark item deleted so that we don't try to return it, and
824 * so that we update the cursor correctly on the next call
827 F_SET(hcp
, H_DELETED
);
828 chg_pgno
= hcp
->pgno
;
829 ret
= __ham_dirty_page(dbp
, p
);
831 __ham_c_update(hcp
, chg_pgno
, 0, 0, 0);
834 * Since we just deleted a pair from the master page, anything
835 * in hcp->dpgno should be cleared.
837 hcp
->dpgno
= PGNO_INVALID
;
845 * Given the key data indicated by the cursor, replace part/all of it
846 * according to the fields in the dbt.
848 * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t));
851 __ham_replpair(dbc
, dbt
, make_dup
)
858 DBT old_dbt
, tdata
, tmp
;
860 int32_t change
; /* XXX: Possible overflow. */
862 int is_big
, ret
, type
;
863 u_int8_t
*beg
, *dest
, *end
, *hk
, *src
;
866 * Big item replacements are handled in generic code.
867 * Items that fit on the current page fall into 4 classes.
868 * 1. On-page element, same size
869 * 2. On-page element, new is bigger (fits)
870 * 3. On-page element, new is bigger (does not fit)
871 * 4. On-page element, old is bigger
872 * Numbers 1, 2, and 4 are essentially the same (and should
873 * be the common case). We handle case 3 as a delete and
877 hcp
= (HASH_CURSOR
*)dbc
->internal
;
880 * We need to compute the number of bytes that we are adding or
881 * removing from the entry. Normally, we can simply substract
882 * the number of bytes we are replacing (dbt->dlen) from the
883 * number of bytes we are inserting (dbt->size). However, if
884 * we are doing a partial put off the end of a record, then this
885 * formula doesn't work, because we are essentially adding
888 change
= dbt
->size
- dbt
->dlen
;
890 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
891 is_big
= HPAGE_PTYPE(hk
) == H_OFFPAGE
;
894 memcpy(&len
, HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
896 len
= LEN_HKEYDATA(hcp
->pagep
,
897 dbp
->pgsize
, H_DATAINDEX(hcp
->bndx
));
899 if (dbt
->doff
+ dbt
->dlen
> len
)
900 change
+= dbt
->doff
+ dbt
->dlen
- len
;
903 if (change
> (int32_t)P_FREESPACE(hcp
->pagep
) || is_big
) {
905 * Case 3 -- two subcases.
906 * A. This is not really a partial operation, but an overwrite.
907 * Simple del and add works.
908 * B. This is a partial and we need to construct the data that
909 * we are really inserting (yuck).
910 * In both cases, we need to grab the key off the page (in
911 * some cases we could do this outside of this routine; for
912 * cleanliness we do it here. If you happen to be on a big
913 * key, this could be a performance hit).
916 F_SET(&tmp
, DB_DBT_MALLOC
| DB_DBT_INTERNAL
);
918 __db_ret(dbp
, hcp
->pagep
, H_KEYINDEX(hcp
->bndx
),
919 &tmp
, &dbc
->rkey
.data
, &dbc
->rkey
.size
)) != 0)
922 if (dbt
->doff
== 0 && dbt
->dlen
== len
) {
923 ret
= __ham_del_pair(dbc
, 0);
925 ret
= __ham_add_el(dbc
, &tmp
, dbt
, H_KEYDATA
);
926 } else { /* Case B */
927 type
= HPAGE_PTYPE(hk
) != H_OFFPAGE
?
928 HPAGE_PTYPE(hk
) : H_KEYDATA
;
930 F_SET(&tdata
, DB_DBT_MALLOC
| DB_DBT_INTERNAL
);
932 if ((ret
= __db_ret(dbp
, hcp
->pagep
,
933 H_DATAINDEX(hcp
->bndx
), &tdata
, &dbc
->rdata
.data
,
934 &dbc
->rdata
.size
)) != 0)
937 /* Now we can delete the item. */
938 if ((ret
= __ham_del_pair(dbc
, 0)) != 0) {
939 __os_free(tdata
.data
, tdata
.size
);
943 /* Now shift old data around to make room for new. */
945 if ((ret
= __os_realloc(&tdata
.data
,
946 tdata
.size
+ change
)) != 0)
948 memset((u_int8_t
*)tdata
.data
+ tdata
.size
,
951 end
= (u_int8_t
*)tdata
.data
+ tdata
.size
;
953 src
= (u_int8_t
*)tdata
.data
+ dbt
->doff
+ dbt
->dlen
;
954 if (src
< end
&& tdata
.size
> dbt
->doff
+ dbt
->dlen
) {
955 len
= tdata
.size
- dbt
->doff
- dbt
->dlen
;
957 memmove(dest
, src
, len
);
959 memcpy((u_int8_t
*)tdata
.data
+ dbt
->doff
,
960 dbt
->data
, dbt
->size
);
961 tdata
.size
+= change
;
963 /* Now add the pair. */
964 ret
= __ham_add_el(dbc
, &tmp
, &tdata
, type
);
965 __os_free(tdata
.data
, tdata
.size
);
967 err
: __os_free(tmp
.data
, tmp
.size
);
972 * Set up pointer into existing data. Do it before the log
973 * message so we can use it inside of the log setup.
975 beg
= HKEYDATA_DATA(H_PAIRDATA(hcp
->pagep
, hcp
->bndx
));
979 * If we are going to have to move bytes at all, figure out
980 * all the parameters here. Then log the call before moving
983 if (DB_LOGGING(dbc
)) {
985 old_dbt
.size
= dbt
->dlen
;
986 if ((ret
= __ham_replace_log(dbp
->dbenv
->lg_info
,
987 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
, PGNO(hcp
->pagep
),
988 (u_int32_t
)H_DATAINDEX(hcp
->bndx
), &LSN(hcp
->pagep
),
989 (u_int32_t
)dbt
->doff
, &old_dbt
, dbt
, make_dup
)) != 0)
992 LSN(hcp
->pagep
) = new_lsn
; /* Structure assignment. */
995 __ham_onpage_replace(hcp
->pagep
, dbp
->pgsize
,
996 (u_int32_t
)H_DATAINDEX(hcp
->bndx
), (int32_t)dbt
->doff
, change
, dbt
);
1002 * Replace data on a page with new data, possibly growing or shrinking what's
1003 * there. This is called on two different occasions. On one (from replpair)
1004 * we are interested in changing only the data. On the other (from recovery)
1005 * we are replacing the entire data (header and all) with a new element. In
1006 * the latter case, the off argument is negative.
1007 * pagep: the page that we're changing
1008 * ndx: page index of the element that is growing/shrinking.
1009 * off: Offset at which we are beginning the replacement.
1010 * change: the number of bytes (+ or -) that the element is growing/shrinking.
1011 * dbt: the new data that gets written at beg.
1012 * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
1013 * PUBLIC: int32_t, DBT *));
1016 __ham_onpage_replace(pagep
, pgsize
, ndx
, off
, change
, dbt
)
1026 u_int8_t
*src
, *dest
;
1031 src
= (u_int8_t
*)(pagep
) + HOFFSET(pagep
);
1033 len
= pagep
->inp
[ndx
] - HOFFSET(pagep
);
1034 else if ((u_int32_t
)off
>= LEN_HKEYDATA(pagep
, pgsize
, ndx
)) {
1035 len
= HKEYDATA_DATA(P_ENTRY(pagep
, ndx
)) +
1036 LEN_HKEYDATA(pagep
, pgsize
, ndx
) - src
;
1039 len
= (HKEYDATA_DATA(P_ENTRY(pagep
, ndx
)) + off
) - src
;
1040 dest
= src
- change
;
1041 memmove(dest
, src
, len
);
1043 memset(dest
+ len
, 0, change
);
1045 /* Now update the indices. */
1046 for (i
= ndx
; i
< NUM_ENT(pagep
); i
++)
1047 pagep
->inp
[i
] -= change
;
1048 HOFFSET(pagep
) -= change
;
1051 memcpy(HKEYDATA_DATA(P_ENTRY(pagep
, ndx
)) + off
,
1052 dbt
->data
, dbt
->size
);
1054 memcpy(P_ENTRY(pagep
, ndx
), dbt
->data
, dbt
->size
);
1058 * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
1061 __ham_split_page(dbc
, obucket
, nbucket
)
1063 u_int32_t obucket
, nbucket
;
1070 PAGE
**pp
, *old_pagep
, *temp_pagep
, *new_pagep
;
1072 db_pgno_t bucket_pgno
, next_pgno
;
1073 u_int32_t big_len
, len
;
1078 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1080 temp_pagep
= old_pagep
= new_pagep
= NULL
;
1082 bucket_pgno
= BUCKET_TO_PAGE(hcp
, obucket
);
1083 if ((ret
= __ham_get_page(dbp
, bucket_pgno
, &old_pagep
)) != 0)
1085 if ((ret
= __ham_new_page(dbp
, BUCKET_TO_PAGE(hcp
, nbucket
), P_HASH
,
1089 temp_pagep
= hcp
->split_buf
;
1090 memcpy(temp_pagep
, old_pagep
, hcp
->hdr
->pagesize
);
1092 if (DB_LOGGING(dbc
)) {
1093 page_dbt
.size
= hcp
->hdr
->pagesize
;
1094 page_dbt
.data
= old_pagep
;
1095 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1096 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
, SPLITOLD
,
1097 PGNO(old_pagep
), &page_dbt
, &LSN(old_pagep
))) != 0)
1101 P_INIT(old_pagep
, hcp
->hdr
->pagesize
, PGNO(old_pagep
), PGNO_INVALID
,
1102 PGNO_INVALID
, 0, P_HASH
);
1104 if (DB_LOGGING(dbc
))
1105 LSN(old_pagep
) = new_lsn
; /* Structure assignment. */
1110 while (temp_pagep
!= NULL
) {
1111 for (n
= 0; n
< (db_indx_t
)H_NUMPAIRS(temp_pagep
); n
++) {
1113 __db_ret(dbp
, temp_pagep
, H_KEYINDEX(n
),
1114 &key
, &big_buf
, &big_len
)) != 0)
1117 if (__ham_call_hash(hcp
, key
.data
, key
.size
)
1124 * Figure out how many bytes we need on the new
1125 * page to store the key/data pair.
1128 len
= LEN_HITEM(temp_pagep
, hcp
->hdr
->pagesize
,
1130 LEN_HITEM(temp_pagep
, hcp
->hdr
->pagesize
,
1132 2 * sizeof(db_indx_t
);
1134 if (P_FREESPACE(*pp
) < len
) {
1135 if (DB_LOGGING(dbc
)) {
1136 page_dbt
.size
= hcp
->hdr
->pagesize
;
1137 page_dbt
.data
= *pp
;
1138 if ((ret
= __ham_splitdata_log(
1139 dbenv
->lg_info
, dbc
->txn
,
1140 &new_lsn
, 0, dbp
->log_fileid
,
1141 SPLITNEW
, PGNO(*pp
), &page_dbt
,
1147 __ham_add_ovflpage(dbc
, *pp
, 1, pp
)) != 0)
1150 __ham_copy_item(dbp
->pgsize
,
1151 temp_pagep
, H_KEYINDEX(n
), *pp
);
1152 __ham_copy_item(dbp
->pgsize
,
1153 temp_pagep
, H_DATAINDEX(n
), *pp
);
1155 next_pgno
= NEXT_PGNO(temp_pagep
);
1157 /* Clear temp_page; if it's a link overflow page, free it. */
1158 if (PGNO(temp_pagep
) != bucket_pgno
&& (ret
=
1159 __ham_del_page(dbc
, temp_pagep
)) != 0)
1162 if (next_pgno
== PGNO_INVALID
)
1165 __ham_get_page(dbp
, next_pgno
, &temp_pagep
)) != 0)
1168 if (temp_pagep
!= NULL
&& DB_LOGGING(dbc
)) {
1169 page_dbt
.size
= hcp
->hdr
->pagesize
;
1170 page_dbt
.data
= temp_pagep
;
1171 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1172 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
,
1173 SPLITOLD
, PGNO(temp_pagep
),
1174 &page_dbt
, &LSN(temp_pagep
))) != 0)
1176 LSN(temp_pagep
) = new_lsn
;
1179 if (big_buf
!= NULL
)
1180 __os_free(big_buf
, big_len
);
1183 * If the original bucket spanned multiple pages, then we've got
1184 * a pointer to a page that used to be on the bucket chain. It
1185 * should be deleted.
1187 if (temp_pagep
!= NULL
&& PGNO(temp_pagep
) != bucket_pgno
&&
1188 (ret
= __ham_del_page(dbc
, temp_pagep
)) != 0)
1192 * Write new buckets out.
1194 if (DB_LOGGING(dbc
)) {
1195 page_dbt
.size
= hcp
->hdr
->pagesize
;
1196 page_dbt
.data
= old_pagep
;
1197 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1198 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
,
1199 SPLITNEW
, PGNO(old_pagep
),
1200 &page_dbt
, &LSN(old_pagep
))) != 0)
1202 LSN(old_pagep
) = new_lsn
;
1204 page_dbt
.data
= new_pagep
;
1205 if ((ret
= __ham_splitdata_log(dbenv
->lg_info
,
1206 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
,
1207 SPLITNEW
, PGNO(new_pagep
), &page_dbt
, &LSN(new_pagep
))) != 0)
1209 LSN(new_pagep
) = new_lsn
;
1211 ret
= __ham_put_page(dbp
, old_pagep
, 1);
1212 if ((tret
= __ham_put_page(dbp
, new_pagep
, 1)) != 0 &&
1217 err
: if (old_pagep
!= NULL
)
1218 (void)__ham_put_page(dbp
, old_pagep
, 1);
1219 if (new_pagep
!= NULL
)
1220 (void)__ham_put_page(dbp
, new_pagep
, 1);
1221 if (temp_pagep
!= NULL
&& PGNO(temp_pagep
) != bucket_pgno
)
1222 (void)__ham_put_page(dbp
, temp_pagep
, 1);
1228 * Add the given pair to the page. The page in question may already be
1229 * held (i.e. it was already gotten). If it is, then the page is passed
1230 * in via the pagep parameter. On return, pagep will contain the page
1231 * to which we just added something. This allows us to link overflow
1232 * pages and return the new page having correctly put the last page.
1234 * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
1237 __ham_add_el(dbc
, key
, val
, type
)
1239 const DBT
*key
, *val
;
1244 const DBT
*pkey
, *pdata
;
1245 DBT key_dbt
, data_dbt
;
1247 HOFFPAGE doff
, koff
;
1248 db_pgno_t next_pgno
;
1249 u_int32_t data_size
, key_size
, pairsize
, rectype
;
1250 int do_expand
, is_keybig
, is_databig
, ret
;
1251 int key_type
, data_type
;
1254 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1257 if (hcp
->pagep
== NULL
&& (ret
= __ham_get_page(dbp
,
1258 hcp
->seek_found_page
!= PGNO_INVALID
? hcp
->seek_found_page
:
1259 hcp
->pgno
, &hcp
->pagep
)) != 0)
1262 key_size
= HKEYDATA_PSIZE(key
->size
);
1263 data_size
= HKEYDATA_PSIZE(val
->size
);
1264 is_keybig
= ISBIG(hcp
, key
->size
);
1265 is_databig
= ISBIG(hcp
, val
->size
);
1267 key_size
= HOFFPAGE_PSIZE
;
1269 data_size
= HOFFPAGE_PSIZE
;
1271 pairsize
= key_size
+ data_size
;
1273 /* Advance to first page in chain with room for item. */
1274 while (H_NUMPAIRS(hcp
->pagep
) && NEXT_PGNO(hcp
->pagep
) !=
1277 * This may not be the end of the chain, but the pair may fit
1278 * anyway. Check if it's a bigpair that fits or a regular
1281 if (P_FREESPACE(hcp
->pagep
) >= pairsize
)
1283 next_pgno
= NEXT_PGNO(hcp
->pagep
);
1285 __ham_next_cpage(dbc
, next_pgno
, 0, 0)) != 0)
1290 * Check if we need to allocate a new page.
1292 if (P_FREESPACE(hcp
->pagep
) < pairsize
) {
1294 if ((ret
= __ham_add_ovflpage(dbc
,
1295 hcp
->pagep
, 1, &hcp
->pagep
)) != 0)
1297 hcp
->pgno
= PGNO(hcp
->pagep
);
1303 hcp
->bndx
= H_NUMPAIRS(hcp
->pagep
);
1304 F_CLR(hcp
, H_DELETED
);
1306 koff
.type
= H_OFFPAGE
;
1307 UMRW(koff
.unused
[0]);
1308 UMRW(koff
.unused
[1]);
1309 UMRW(koff
.unused
[2]);
1310 if ((ret
= __db_poff(dbc
,
1311 key
, &koff
.pgno
, __ham_overflow_page
)) != 0)
1313 koff
.tlen
= key
->size
;
1314 key_dbt
.data
= &koff
;
1315 key_dbt
.size
= sizeof(koff
);
1317 key_type
= H_OFFPAGE
;
1320 key_type
= H_KEYDATA
;
1324 doff
.type
= H_OFFPAGE
;
1325 UMRW(doff
.unused
[0]);
1326 UMRW(doff
.unused
[1]);
1327 UMRW(doff
.unused
[2]);
1328 if ((ret
= __db_poff(dbc
,
1329 val
, &doff
.pgno
, __ham_overflow_page
)) != 0)
1331 doff
.tlen
= val
->size
;
1332 data_dbt
.data
= &doff
;
1333 data_dbt
.size
= sizeof(doff
);
1335 data_type
= H_OFFPAGE
;
1341 if (DB_LOGGING(dbc
)) {
1344 rectype
|= PAIR_DATAMASK
;
1346 rectype
|= PAIR_KEYMASK
;
1348 if ((ret
= __ham_insdel_log(dbp
->dbenv
->lg_info
,
1349 dbc
->txn
, &new_lsn
, 0, rectype
,
1350 dbp
->log_fileid
, PGNO(hcp
->pagep
),
1351 (u_int32_t
)H_NUMPAIRS(hcp
->pagep
),
1352 &LSN(hcp
->pagep
), pkey
, pdata
)) != 0)
1355 /* Move lsn onto page. */
1356 LSN(hcp
->pagep
) = new_lsn
; /* Structure assignment. */
1359 __ham_putitem(hcp
->pagep
, pkey
, key_type
);
1360 __ham_putitem(hcp
->pagep
, pdata
, data_type
);
1363 * For splits, we are going to update item_info's page number
1364 * field, so that we can easily return to the same page the
1365 * next time we come in here. For other operations, this shouldn't
1366 * matter, since odds are this is the last thing that happens before
1367 * we return to the user program.
1369 hcp
->pgno
= PGNO(hcp
->pagep
);
1372 * XXX Maybe keep incremental numbers here
1374 if (!F_ISSET(dbp
, DB_AM_LOCKING
))
1377 if (do_expand
|| (hcp
->hdr
->ffactor
!= 0 &&
1378 (u_int32_t
)H_NUMPAIRS(hcp
->pagep
) > hcp
->hdr
->ffactor
))
1379 F_SET(hcp
, H_EXPAND
);
1385 * Special __putitem call used in splitting -- copies one entry to
1386 * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1387 * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we
1388 * do not need to do any logging here.
1390 * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
1393 __ham_copy_item(pgsize
, src_page
, src_ndx
, dest_page
)
1403 * Copy the key and data entries onto this new page.
1405 src
= P_ENTRY(src_page
, src_ndx
);
1407 /* Set up space on dest. */
1408 len
= LEN_HITEM(src_page
, pgsize
, src_ndx
);
1409 HOFFSET(dest_page
) -= len
;
1410 dest_page
->inp
[NUM_ENT(dest_page
)] = HOFFSET(dest_page
);
1411 dest
= P_ENTRY(dest_page
, NUM_ENT(dest_page
));
1412 NUM_ENT(dest_page
)++;
1414 memcpy(dest
, src
, len
);
1420 * pointer on success
1423 * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
1426 __ham_add_ovflpage(dbc
, pagep
, release
, pp
)
1439 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1441 if ((ret
= __ham_overflow_page(dbc
, P_HASH
, &new_pagep
)) != 0)
1444 if (DB_LOGGING(dbc
)) {
1445 if ((ret
= __ham_newpage_log(dbp
->dbenv
->lg_info
,
1446 dbc
->txn
, &new_lsn
, 0, PUTOVFL
,
1447 dbp
->log_fileid
, PGNO(pagep
), &LSN(pagep
),
1448 PGNO(new_pagep
), &LSN(new_pagep
), PGNO_INVALID
, NULL
)) != 0)
1451 /* Move lsn onto page. */
1452 LSN(pagep
) = LSN(new_pagep
) = new_lsn
;
1454 NEXT_PGNO(pagep
) = PGNO(new_pagep
);
1455 PREV_PGNO(new_pagep
) = PGNO(pagep
);
1458 ret
= __ham_put_page(dbp
, pagep
, 1);
1460 hcp
->stats
.hash_overflows
++;
1467 * PUBLIC: int __ham_new_page __P((DB *, u_int32_t, u_int32_t, PAGE **));
1470 __ham_new_page(dbp
, addr
, type
, pp
)
1472 u_int32_t addr
, type
;
1478 if ((ret
= memp_fget(dbp
->mpf
,
1479 &addr
, DB_MPOOL_CREATE
, &pagep
)) != 0)
1482 /* This should not be necessary because page-in should do it. */
1483 P_INIT(pagep
, dbp
->pgsize
, addr
, PGNO_INVALID
, PGNO_INVALID
, 0, type
);
1490 * PUBLIC: int __ham_del_page __P((DBC *, PAGE *));
1493 __ham_del_page(dbc
, pagep
)
1503 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1505 DIRTY_META(dbp
, hcp
, ret
);
1508 __db_err(dbp
->dbenv
,
1509 "free_ovflpage: unable to lock meta data page %s\n",
1512 * If we are going to return an error, then we should free
1513 * the page, so it doesn't stay pinned forever.
1515 (void)__ham_put_page(dbp
, pagep
, 0);
1519 if (DB_LOGGING(dbc
)) {
1520 if ((ret
= __ham_newpgno_log(dbp
->dbenv
->lg_info
,
1521 dbc
->txn
, &new_lsn
, 0, DELPGNO
,
1522 dbp
->log_fileid
, PGNO(pagep
), hcp
->hdr
->last_freed
,
1523 (u_int32_t
)TYPE(pagep
), NEXT_PGNO(pagep
), P_INVALID
,
1524 &LSN(pagep
), &hcp
->hdr
->lsn
)) != 0)
1527 hcp
->hdr
->lsn
= new_lsn
;
1528 LSN(pagep
) = new_lsn
;
1535 __pgno
= pagep
->pgno
;
1537 memset(pagep
, 0xdb, dbp
->pgsize
);
1538 pagep
->pgno
= __pgno
;
1542 TYPE(pagep
) = P_INVALID
;
1543 NEXT_PGNO(pagep
) = hcp
->hdr
->last_freed
;
1544 hcp
->hdr
->last_freed
= PGNO(pagep
);
1546 return (__ham_put_page(dbp
, pagep
, 1));
1551 * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
1554 __ham_put_page(dbp
, pagep
, is_dirty
)
1560 __account_page(dbp
, ((BKT
*)((char *)pagep
- sizeof(BKT
)))->pgno
, -1);
1562 return (memp_fput(dbp
->mpf
, pagep
, (is_dirty
? DB_MPOOL_DIRTY
: 0)));
1566 * __ham_dirty_page --
1567 * Mark a page dirty.
1569 * PUBLIC: int __ham_dirty_page __P((DB *, PAGE *));
1572 __ham_dirty_page(dbp
, pagep
)
1576 return (memp_fset(dbp
->mpf
, pagep
, DB_MPOOL_DIRTY
));
1580 * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
1583 __ham_get_page(dbp
, addr
, pagep
)
1590 ret
= memp_fget(dbp
->mpf
, &addr
, DB_MPOOL_CREATE
, pagep
);
1593 __account_page(dbp
, addr
, 1);
1599 * PUBLIC: int __ham_overflow_page
1600 * PUBLIC: __P((DBC *, u_int32_t, PAGE **));
1603 __ham_overflow_page(dbc
, type
, pp
)
1610 DB_LSN
*lsnp
, new_lsn
;
1612 db_pgno_t new_addr
, next_free
, newalloc_flag
;
1613 u_int32_t offset
, splitnum
;
1618 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1619 DIRTY_META(dbp
, hcp
, ret
);
1624 * This routine is split up into two parts. First we have
1625 * to figure out the address of the new page that we are
1626 * allocating. Then we have to log the allocation. Only
1627 * after the log do we get to complete allocation of the
1630 new_addr
= hcp
->hdr
->last_freed
;
1631 if (new_addr
!= PGNO_INVALID
) {
1632 if ((ret
= __ham_get_page(dbp
, new_addr
, &p
)) != 0)
1634 next_free
= NEXT_PGNO(p
);
1638 splitnum
= hcp
->hdr
->ovfl_point
;
1639 hcp
->hdr
->spares
[splitnum
]++;
1640 offset
= hcp
->hdr
->spares
[splitnum
] -
1641 (splitnum
? hcp
->hdr
->spares
[splitnum
- 1] : 0);
1642 new_addr
= PGNO_OF(hcp
, hcp
->hdr
->ovfl_point
, offset
);
1643 if (new_addr
> MAX_PAGES(hcp
)) {
1644 __db_err(dbp
->dbenv
, "hash: out of file pages");
1645 hcp
->hdr
->spares
[splitnum
]--;
1648 next_free
= PGNO_INVALID
;
1654 if (DB_LOGGING(dbc
)) {
1655 if ((ret
= __ham_newpgno_log(dbp
->dbenv
->lg_info
,
1656 dbc
->txn
, &new_lsn
, 0, ALLOCPGNO
,
1657 dbp
->log_fileid
, new_addr
, next_free
,
1658 0, newalloc_flag
, type
, lsnp
, &hcp
->hdr
->lsn
)) != 0)
1661 hcp
->hdr
->lsn
= new_lsn
;
1667 /* We just took something off the free list, initialize it. */
1668 hcp
->hdr
->last_freed
= next_free
;
1669 P_INIT(p
, hcp
->hdr
->pagesize
, PGNO(p
), PGNO_INVALID
,
1670 PGNO_INVALID
, 0, (u_int8_t
)type
);
1672 /* Get the new page. */
1673 if ((ret
= __ham_new_page(dbp
, new_addr
, type
, &p
)) != 0)
1676 if (DB_LOGGING(dbc
))
1685 * PUBLIC: #ifdef DEBUG
1686 * PUBLIC: db_pgno_t __bucket_to_page __P((HASH_CURSOR *, db_pgno_t));
1690 __bucket_to_page(hcp
, n
)
1698 ret_val
+= hcp
->hdr
->spares
[__db_log2(n
+ 1) - 1];
1704 * Create a bunch of overflow pages at the current split point.
1705 * PUBLIC: void __ham_init_ovflpages __P((DBC *));
1708 __ham_init_ovflpages(dbc
)
1715 db_pgno_t last_pgno
, new_pgno
;
1716 u_int32_t i
, curpages
, numpages
;
1719 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1721 curpages
= hcp
->hdr
->spares
[hcp
->hdr
->ovfl_point
] -
1722 hcp
->hdr
->spares
[hcp
->hdr
->ovfl_point
- 1];
1723 numpages
= hcp
->hdr
->ovfl_point
+ 1 - curpages
;
1725 last_pgno
= hcp
->hdr
->last_freed
;
1726 new_pgno
= PGNO_OF(hcp
, hcp
->hdr
->ovfl_point
, curpages
+ 1);
1727 if (DB_LOGGING(dbc
)) {
1728 (void)__ham_ovfl_log(dbp
->dbenv
->lg_info
,
1729 dbc
->txn
, &new_lsn
, 0, dbp
->log_fileid
, new_pgno
,
1730 numpages
, last_pgno
, hcp
->hdr
->ovfl_point
, &hcp
->hdr
->lsn
);
1731 hcp
->hdr
->lsn
= new_lsn
;
1735 hcp
->hdr
->spares
[hcp
->hdr
->ovfl_point
] += numpages
;
1736 for (i
= numpages
; i
> 0; i
--) {
1737 if (__ham_new_page(dbp
,
1738 PGNO_OF(hcp
, hcp
->hdr
->ovfl_point
, curpages
+ i
),
1739 P_INVALID
, &p
) != 0)
1742 NEXT_PGNO(p
) = last_pgno
;
1743 last_pgno
= PGNO(p
);
1744 (void)__ham_put_page(dbp
, p
, 1);
1746 hcp
->hdr
->last_freed
= last_pgno
;
1750 * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t));
1753 __ham_get_cpage(dbc
, mode
)
1762 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1765 * There are three cases with respect to buckets and locks. If there
1766 * is no lock held, then if we are locking, we should get the lock.
1767 * If there is a lock held and it's for the current bucket, we don't
1768 * need to do anything. If there is a lock, but it's for a different
1769 * bucket, then we need to release and get.
1771 if (F_ISSET(dbp
, DB_AM_LOCKING
)) {
1772 if (hcp
->lock
!= 0 && hcp
->lbucket
!= hcp
->bucket
) {
1774 * If this is the original lock, don't release it,
1775 * because we may need to restore it upon exit.
1777 if (dbc
->txn
== NULL
&&
1778 !F_ISSET(hcp
, H_ORIGINAL
) && (ret
=
1779 lock_put(dbp
->dbenv
->lk_info
, hcp
->lock
)) != 0)
1781 F_CLR(hcp
, H_ORIGINAL
);
1784 if (hcp
->lock
== 0 && (ret
= __ham_lock_bucket(dbc
, mode
)) != 0)
1786 hcp
->lbucket
= hcp
->bucket
;
1789 if (hcp
->pagep
== NULL
) {
1790 if (hcp
->pgno
== PGNO_INVALID
) {
1791 hcp
->pgno
= BUCKET_TO_PAGE(hcp
, hcp
->bucket
);
1796 __ham_get_page(dbp
, hcp
->pgno
, &hcp
->pagep
)) != 0)
1800 if (hcp
->dpgno
!= PGNO_INVALID
&& hcp
->dpagep
== NULL
)
1802 __ham_get_page(dbp
, hcp
->dpgno
, &hcp
->dpagep
)) != 0)
1808 * Get a new page at the cursor, putting the last page if necessary.
1809 * If the flag is set to H_ISDUP, then we are talking about the
1810 * duplicate page, not the main page.
1812 * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int, u_int32_t));
1815 __ham_next_cpage(dbc
, pgno
, dirty
, flags
)
1827 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1828 if (LF_ISSET(H_ISDUP
) && hcp
->dpagep
!= NULL
&&
1829 (ret
= __ham_put_page(dbp
, hcp
->dpagep
, dirty
)) != 0)
1831 else if (!LF_ISSET(H_ISDUP
) && hcp
->pagep
!= NULL
&&
1832 (ret
= __ham_put_page(dbp
, hcp
->pagep
, dirty
)) != 0)
1835 if ((ret
= __ham_get_page(dbp
, pgno
, &p
)) != 0)
1838 if (LF_ISSET(H_ISDUP
)) {
1852 * __ham_lock_bucket --
1853 * Get the lock on a particular bucket.
1856 __ham_lock_bucket(dbc
, mode
)
1863 hcp
= (HASH_CURSOR
*)dbc
->internal
;
1864 dbc
->lock
.pgno
= (db_pgno_t
)(hcp
->bucket
);
1865 if (dbc
->txn
== NULL
)
1866 ret
= lock_get(dbc
->dbp
->dbenv
->lk_info
, dbc
->locker
, 0,
1867 &dbc
->lock_dbt
, mode
, &hcp
->lock
);
1869 ret
= lock_tget(dbc
->dbp
->dbenv
->lk_info
, dbc
->txn
, 0,
1870 &dbc
->lock_dbt
, mode
, &hcp
->lock
);
1872 return (ret
< 0 ? EAGAIN
: ret
);
1877 * Delete a pair on a page, paying no attention to what the pair
1878 * represents. The caller is responsible for freeing up duplicates
1879 * or offpage entries that might be referenced by this pair.
1881 * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1884 __ham_dpair(dbp
, p
, pndx
)
1890 u_int8_t
*dest
, *src
;
1893 * Compute "delta", the amount we have to shift all of the
1894 * offsets. To find the delta, we just need to calculate
1895 * the size of the pair of elements we are removing.
1897 delta
= H_PAIRSIZE(p
, dbp
->pgsize
, pndx
);
1900 * The hard case: we want to remove something other than
1901 * the last item on the page. We need to shift data and
1904 if ((db_indx_t
)pndx
!= H_NUMPAIRS(p
) - 1) {
1906 * Move the data: src is the first occupied byte on
1907 * the page. (Length is delta.)
1909 src
= (u_int8_t
*)p
+ HOFFSET(p
);
1912 * Destination is delta bytes beyond src. This might
1913 * be an overlapping copy, so we have to use memmove.
1916 memmove(dest
, src
, p
->inp
[H_DATAINDEX(pndx
)] - HOFFSET(p
));
1919 /* Adjust the offsets. */
1920 for (n
= (db_indx_t
)pndx
; n
< (db_indx_t
)(H_NUMPAIRS(p
) - 1); n
++) {
1921 p
->inp
[H_KEYINDEX(n
)] = p
->inp
[H_KEYINDEX(n
+1)] + delta
;
1922 p
->inp
[H_DATAINDEX(n
)] = p
->inp
[H_DATAINDEX(n
+1)] + delta
;
1925 /* Adjust page metadata. */
1926 HOFFSET(p
) = HOFFSET(p
) + delta
;
1927 NUM_ENT(p
) = NUM_ENT(p
) - 2;