2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993
13 * The Regents of the University of California. All rights reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. All advertising materials mentioning features or use of this software
24 * must display the following acknowledgement:
25 * This product includes software developed by the University of
26 * California, Berkeley and its contributors.
27 * 4. Neither the name of the University nor the names of its contributors
28 * may be used to endorse or promote products derived from this software
29 * without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 static const char sccsid
[] = "@(#)bt_rsearch.c 10.21 (Sleepycat) 12/2/98";
50 #ifndef NO_SYSTEM_INCLUDES
51 #include <sys/types.h>
60 * Search a btree for a record number.
62 * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
65 __bam_rsearch(dbc
, recnop
, flags
, stop
, exactp
)
79 db_recno_t i
, recno
, total
;
88 * There are several ways we search a btree tree. The flags argument
89 * specifies if we're acquiring read or write locks and if we are
90 * locking pairs of pages. In addition, if we're adding or deleting
91 * an item, we have to lock the entire tree, regardless. See btree.h
94 * If write-locking pages, we need to know whether or not to acquire a
95 * write lock on a page before getting it. This depends on how deep it
96 * is in tree, which we don't know until we acquire the root page. So,
97 * if we need to lock the root page we may have to upgrade it later,
98 * because we won't get the correct lock initially.
100 * Retrieve the root page.
103 stack
= LF_ISSET(S_STACK
);
104 if ((ret
= __bam_lget(dbc
,
105 0, pg
, stack
? DB_LOCK_WRITE
: DB_LOCK_READ
, &lock
)) != 0)
107 if ((ret
= memp_fget(dbp
->mpf
, &pg
, 0, &h
)) != 0) {
108 (void)__BT_LPUT(dbc
, lock
);
113 * Decide if we need to save this page; if we do, write lock it.
114 * We deliberately don't lock-couple on this call. If the tree
115 * is tiny, i.e., one page, and two threads are busily updating
116 * the root page, we're almost guaranteed deadlocks galore, as
117 * each one gets a read lock and then blocks the other's attempt
121 ((LF_ISSET(S_PARENT
) && (u_int8_t
)(stop
+ 1) >= h
->level
) ||
122 (LF_ISSET(S_WRITE
) && h
->level
== LEAFLEVEL
))) {
123 (void)memp_fput(dbp
->mpf
, h
, 0);
124 (void)__BT_LPUT(dbc
, lock
);
125 if ((ret
= __bam_lget(dbc
, 0, pg
, DB_LOCK_WRITE
, &lock
)) != 0)
127 if ((ret
= memp_fget(dbp
->mpf
, &pg
, 0, &h
)) != 0) {
128 (void)__BT_LPUT(dbc
, lock
);
135 * If appending to the tree, set the record number now -- we have the
138 * Delete only deletes exact matches, read only returns exact matches.
139 * Note, this is different from __bam_search(), which returns non-exact
142 * The record may not exist. We can only return the correct location
143 * for the record immediately after the last record in the tree, so do
147 if (LF_ISSET(S_APPEND
)) {
149 *recnop
= recno
= total
+ 1;
156 if (!LF_ISSET(S_PAST_EOF
) || recno
> total
+ 1) {
157 (void)memp_fput(dbp
->mpf
, h
, 0);
158 (void)__BT_LPUT(dbc
, lock
);
159 return (DB_NOTFOUND
);
166 * Record numbers in the tree are 0-based, but the recno is
167 * 1-based. All of the calculations below have to take this
176 * There may be logically deleted records on the page,
177 * walk the page correcting for them. The record may
178 * not exist if there are enough deleted records in the
181 if (recno
<= (db_recno_t
)NUM_ENT(h
) / P_INDX
)
182 for (i
= recno
- 1;; --i
) {
183 if (B_DISSET(GET_BKEYDATA(h
,
184 i
* P_INDX
+ O_INDX
)->type
))
189 if (recno
> (db_recno_t
)NUM_ENT(h
) / P_INDX
) {
191 if (!LF_ISSET(S_PAST_EOF
) || recno
>
192 (db_recno_t
)(NUM_ENT(h
) / P_INDX
+ 1)) {
199 /* Correct from 1-based to 0-based for a page offset. */
201 BT_STK_ENTER(cp
, h
, recno
* P_INDX
, lock
, ret
);
204 for (indx
= 0, top
= NUM_ENT(h
);;) {
205 bi
= GET_BINTERNAL(h
, indx
);
206 if (++indx
== top
|| total
+ bi
->nrecs
>= recno
)
215 /* Correct from 1-based to 0-based for a page offset. */
217 BT_STK_ENTER(cp
, h
, recno
, lock
, ret
);
220 for (indx
= 0, top
= NUM_ENT(h
);;) {
221 ri
= GET_RINTERNAL(h
, indx
);
222 if (++indx
== top
|| total
+ ri
->nrecs
>= recno
)
229 return (__db_pgfmt(dbp
, h
->pgno
));
234 /* Return if this is the lowest page wanted. */
235 if (LF_ISSET(S_PARENT
) && stop
== h
->level
) {
236 BT_STK_ENTER(cp
, h
, indx
, lock
, ret
);
239 BT_STK_PUSH(cp
, h
, indx
, lock
, ret
);
244 __bam_lget(dbc
, 0, pg
, DB_LOCK_WRITE
, &lock
)) != 0)
248 * Decide if we want to return a pointer to the next
249 * page in the stack. If we do, write lock it and
252 if ((LF_ISSET(S_PARENT
) &&
253 (u_int8_t
)(stop
+ 1) >= (u_int8_t
)(h
->level
- 1)) ||
254 (h
->level
- 1) == LEAFLEVEL
)
257 (void)memp_fput(dbp
->mpf
, h
, 0);
260 __bam_lget(dbc
, 1, pg
, stack
&& LF_ISSET(S_WRITE
) ?
261 DB_LOCK_WRITE
: DB_LOCK_READ
, &lock
)) != 0)
265 if ((ret
= memp_fget(dbp
->mpf
, &pg
, 0, &h
)) != 0)
271 __bam_stkrel(dbc
, 0);
277 * Adjust the tree after adding or deleting a record.
279 * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
282 __bam_adjust(dbc
, adjust
)
295 /* Update the record counts for the tree. */
296 for (epg
= cp
->sp
; epg
<= cp
->csp
; ++epg
) {
298 if (TYPE(h
) == P_IBTREE
|| TYPE(h
) == P_IRECNO
) {
299 if (DB_LOGGING(dbc
) &&
300 (ret
= __bam_cadjust_log(dbp
->dbenv
->lg_info
,
301 dbc
->txn
, &LSN(h
), 0, dbp
->log_fileid
,
302 PGNO(h
), &LSN(h
), (u_int32_t
)epg
->indx
,
306 if (TYPE(h
) == P_IBTREE
)
307 GET_BINTERNAL(h
, epg
->indx
)->nrecs
+= adjust
;
309 GET_RINTERNAL(h
, epg
->indx
)->nrecs
+= adjust
;
311 if (PGNO(h
) == PGNO_ROOT
)
312 RE_NREC_ADJ(h
, adjust
);
314 if ((ret
= memp_fset(dbp
->mpf
, h
, DB_MPOOL_DIRTY
)) != 0)
323 * Return the number of records in the tree.
325 * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
328 __bam_nrecs(dbc
, rep
)
341 if ((ret
= __bam_lget(dbc
, 0, pgno
, DB_LOCK_READ
, &lock
)) != 0)
343 if ((ret
= memp_fget(dbp
->mpf
, &pgno
, 0, &h
)) != 0)
348 (void)memp_fput(dbp
->mpf
, h
, 0);
349 (void)__BT_TLPUT(dbc
, lock
);
356 * Return the number of records below a page.
358 * PUBLIC: db_recno_t __bam_total __P((PAGE *));
372 /* Check for logically deleted records. */
373 for (indx
= 0; indx
< top
; indx
+= P_INDX
)
374 if (!B_DISSET(GET_BKEYDATA(h
, indx
+ O_INDX
)->type
))
378 for (indx
= 0; indx
< top
; indx
+= O_INDX
)
379 nrecs
+= GET_BINTERNAL(h
, indx
)->nrecs
;
385 for (indx
= 0; indx
< top
; indx
+= O_INDX
)
386 nrecs
+= GET_RINTERNAL(h
, indx
)->nrecs
;