2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994, 1995
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)bt_open.c 10.27 (Sleepycat) 5/6/98";
54 * Implementation of btree access method for 4.4BSD.
56 * The design here was originally based on that of the btree access method
57 * used in the Postgres database system at UC Berkeley. This implementation
58 * is wholly independent of the Postgres code.
61 #ifndef NO_SYSTEM_INCLUDES
62 #include <sys/types.h>
73 static int __bam_keyalloc
__P((BTREE
*));
74 static int __bam_setmeta
__P((DB
*, BTREE
*));
80 * PUBLIC: int __bam_open __P((DB *, DBTYPE, DB_INFO *));
83 __bam_open(dbp
, type
, dbinfo
)
91 /* Allocate the btree internal structure. */
92 if ((t
= (BTREE
*)__db_calloc(1, sizeof(BTREE
))) == NULL
)
95 t
->bt_sp
= t
->bt_csp
= t
->bt_stack
;
96 t
->bt_esp
= t
->bt_stack
+ sizeof(t
->bt_stack
) / sizeof(t
->bt_stack
[0]);
98 if ((type
== DB_RECNO
|| F_ISSET(dbp
, DB_BT_RECNUM
)) &&
99 (ret
= __bam_keyalloc(t
)) != 0)
103 * Intention is to make sure all of the user's selections are okay
104 * here and then use them without checking.
106 if (dbinfo
!= NULL
) {
107 /* Minimum number of keys per page. */
108 if (dbinfo
->bt_minkey
== 0)
109 t
->bt_minkey
= DEFMINKEYPAGE
;
111 if (dbinfo
->bt_minkey
< 2)
113 t
->bt_minkey
= dbinfo
->bt_minkey
;
116 /* Maximum number of keys per page. */
117 if (dbinfo
->bt_maxkey
== 0)
120 if (dbinfo
->bt_maxkey
< 1)
122 t
->bt_maxkey
= dbinfo
->bt_maxkey
;
126 * If no comparison, use default comparison. If no comparison
127 * and no prefix, use default prefix. (We can't default the
128 * prefix if the user supplies a comparison routine; shortening
129 * the keys may break their comparison algorithm.)
131 t
->bt_compare
= dbinfo
->bt_compare
== NULL
?
132 __bam_defcmp
: dbinfo
->bt_compare
;
133 t
->bt_prefix
= dbinfo
->bt_prefix
== NULL
?
134 (dbinfo
->bt_compare
== NULL
?
135 __bam_defpfx
: NULL
) : dbinfo
->bt_prefix
;
137 t
->bt_minkey
= DEFMINKEYPAGE
;
138 t
->bt_compare
= __bam_defcmp
;
139 t
->bt_prefix
= __bam_defpfx
;
142 /* Initialize the remaining fields of the DB. */
145 dbp
->cursor
= __bam_cursor
;
146 dbp
->del
= __bam_delete
;
147 dbp
->get
= __bam_get
;
148 dbp
->put
= __bam_put
;
149 dbp
->stat
= __bam_stat
;
150 dbp
->sync
= __bam_sync
;
153 * The btree data structure requires that at least two key/data pairs
154 * can fit on a page, but other than that there's no fixed requirement.
155 * Translate the minimum number of items into the bytes a key/data pair
156 * can use before being placed on an overflow page. We calculate for
157 * the worst possible alignment by assuming every item requires the
158 * maximum alignment for padding.
160 * Recno uses the btree bt_ovflsize value -- it's close enough.
162 t
->bt_ovflsize
= (dbp
->pgsize
- P_OVERHEAD
) / (t
->bt_minkey
* P_INDX
)
163 - (BKEYDATA_PSIZE(0) + ALIGN(1, 4));
165 /* Create a root page if new tree. */
166 if ((ret
= __bam_setmeta(dbp
, t
)) != 0)
171 einval
: ret
= EINVAL
;
173 err
: if (t
!= NULL
) {
174 /* If we allocated room for key/data return, discard it. */
175 if (t
->bt_rkey
.data
!= NULL
)
176 __db_free(t
->bt_rkey
.data
);
178 FREE(t
, sizeof(BTREE
));
185 * Create a BTREE handle for a threaded DB handle.
187 * PUBLIC: int __bam_bdup __P((DB *, DB *));
190 __bam_bdup(orig
, new)
198 if ((t
= (BTREE
*)__db_calloc(1, sizeof(*t
))) == NULL
)
203 * Ignore the cursor queue, only the first DB has attached cursors.
206 t
->bt_sp
= t
->bt_csp
= t
->bt_stack
;
207 t
->bt_esp
= t
->bt_stack
+ sizeof(t
->bt_stack
) / sizeof(t
->bt_stack
[0]);
209 if ((orig
->type
== DB_RECNO
|| F_ISSET(orig
, DB_BT_RECNUM
)) &&
210 (ret
= __bam_keyalloc(t
)) != 0) {
215 t
->bt_maxkey
= ot
->bt_maxkey
;
216 t
->bt_minkey
= ot
->bt_minkey
;
217 t
->bt_compare
= ot
->bt_compare
;
218 t
->bt_prefix
= ot
->bt_prefix
;
219 t
->bt_ovflsize
= ot
->bt_ovflsize
;
223 * The entire RECNO structure is shared. If it breaks, the application
224 * was misusing it to start with.
226 t
->bt_recno
= ot
->bt_recno
;
235 * Allocate return memory for recno keys.
242 * Recno keys are always the same size, and we don't want to have
243 * to check for space on each return. Allocate it now.
245 if ((t
->bt_rkey
.data
= (void *)__db_malloc(sizeof(db_recno_t
))) == NULL
)
247 t
->bt_rkey
.ulen
= sizeof(db_recno_t
);
253 * Check (and optionally create) a tree.
256 __bam_setmeta(dbp
, t
)
262 DB_LOCK metalock
, rootlock
;
266 /* Get, and optionally create the metadata page. */
267 pgno
= PGNO_METADATA
;
269 __bam_lget(dbp
, 0, PGNO_METADATA
, DB_LOCK_WRITE
, &metalock
)) != 0)
272 __bam_pget(dbp
, (PAGE
**)&meta
, &pgno
, DB_MPOOL_CREATE
)) != 0) {
273 (void)__BT_LPUT(dbp
, metalock
);
278 * If the magic number is correct, we're not creating the tree.
279 * Correct any fields that may not be right. Note, all of the
280 * local flags were set by db_open(3).
282 if (meta
->magic
!= 0) {
283 t
->bt_maxkey
= meta
->maxkey
;
284 t
->bt_minkey
= meta
->minkey
;
286 (void)memp_fput(dbp
->mpf
, (PAGE
*)meta
, 0);
287 (void)__BT_LPUT(dbp
, metalock
);
291 /* Initialize the tree structure metadata information. */
292 memset(meta
, 0, sizeof(BTMETA
));
294 meta
->pgno
= PGNO_METADATA
;
295 meta
->magic
= DB_BTREEMAGIC
;
296 meta
->version
= DB_BTREEVERSION
;
297 meta
->pagesize
= dbp
->pgsize
;
298 meta
->maxkey
= t
->bt_maxkey
;
299 meta
->minkey
= t
->bt_minkey
;
300 meta
->free
= PGNO_INVALID
;
301 if (dbp
->type
== DB_RECNO
)
302 F_SET(meta
, BTM_RECNO
);
303 if (F_ISSET(dbp
, DB_AM_DUP
))
304 F_SET(meta
, BTM_DUP
);
305 if (F_ISSET(dbp
, DB_RE_FIXEDLEN
))
306 F_SET(meta
, BTM_FIXEDLEN
);
307 if (F_ISSET(dbp
, DB_BT_RECNUM
))
308 F_SET(meta
, BTM_RECNUM
);
309 if (F_ISSET(dbp
, DB_RE_RENUMBER
))
310 F_SET(meta
, BTM_RENUMBER
);
311 memcpy(meta
->uid
, dbp
->lock
.fileid
, DB_FILE_ID_LEN
);
313 /* Create and initialize a root page. */
316 __bam_lget(dbp
, 0, PGNO_ROOT
, DB_LOCK_WRITE
, &rootlock
)) != 0)
318 if ((ret
= __bam_pget(dbp
, &root
, &pgno
, DB_MPOOL_CREATE
)) != 0) {
319 (void)__BT_LPUT(dbp
, rootlock
);
322 P_INIT(root
, dbp
->pgsize
, PGNO_ROOT
, PGNO_INVALID
,
323 PGNO_INVALID
, 1, dbp
->type
== DB_RECNO
? P_LRECNO
: P_LBTREE
);
326 /* Release the metadata and root pages. */
327 if ((ret
= memp_fput(dbp
->mpf
, (PAGE
*)meta
, DB_MPOOL_DIRTY
)) != 0)
329 if ((ret
= memp_fput(dbp
->mpf
, root
, DB_MPOOL_DIRTY
)) != 0)
333 * Flush the metadata and root pages to disk -- since the user can't
334 * transaction protect open, the pages have to exist during recovery.
337 * It's not useful to return not-yet-flushed here -- convert it to
340 if ((ret
= memp_fsync(dbp
->mpf
)) == DB_INCOMPLETE
)
343 /* Release the locks. */
344 (void)__BT_LPUT(dbp
, metalock
);
345 (void)__BT_LPUT(dbp
, rootlock
);