2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1997, 1998
5 * Sleepycat Software. All rights reserved.
11 static const char sccsid
[] = "@(#)bt_recno.c 10.53 (Sleepycat) 12/11/98";
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
31 static int __ram_add
__P((DBC
*, db_recno_t
*, DBT
*, u_int32_t
, u_int32_t
));
32 static int __ram_delete
__P((DB
*, DB_TXN
*, DBT
*, u_int32_t
));
33 static int __ram_fmap
__P((DBC
*, db_recno_t
));
34 static int __ram_i_delete
__P((DBC
*));
35 static int __ram_put
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, u_int32_t
));
36 static int __ram_source
__P((DB
*, RECNO
*, const char *));
37 static int __ram_sync
__P((DB
*, u_int32_t
));
38 static int __ram_update
__P((DBC
*, db_recno_t
, int));
39 static int __ram_vmap
__P((DBC
*, db_recno_t
));
40 static int __ram_writeback
__P((DBC
*));
43 * In recno, there are two meanings to the on-page "deleted" flag. If we're
44 * re-numbering records, it means the record was implicitly created. We skip
45 * over implicitly created records if doing a cursor "next" or "prev", and
46 * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering
47 * records, it means that the record was implicitly created, or was deleted.
48 * We skip over implicitly created or deleted records if doing a cursor "next"
49 * or "prev", and return DB_KEYEMPTY if they're explicitly requested.
51 * If we're re-numbering records, then we have to detect in the cursor that
52 * a record was deleted, and adjust the cursor as necessary on the next get.
53 * If we're not re-numbering records, then we can detect that a record has
54 * been deleted by looking at the actual on-page record, so we completely
55 * ignore the cursor's delete flag. This is different from the B+tree code.
56 * It also maintains whether the cursor references a deleted record in the
57 * cursor, and it doesn't always check the on-page value.
59 #define CD_SET(dbp, cp) { \
60 if (F_ISSET(dbp, DB_RE_RENUMBER)) \
61 F_SET(cp, C_DELETED); \
63 #define CD_CLR(dbp, cp) { \
64 if (F_ISSET(dbp, DB_RE_RENUMBER)) \
65 F_CLR(cp, C_DELETED); \
67 #define CD_ISSET(dbp, cp) \
68 (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, C_DELETED))
72 * Recno open function.
74 * PUBLIC: int __ram_open __P((DB *, DB_INFO *));
77 __ram_open(dbp
, dbinfo
)
86 /* Allocate and initialize the private btree structure. */
87 if ((ret
= __os_calloc(1, sizeof(BTREE
), &t
)) != 0)
90 __bam_setovflsize(dbp
);
92 /* Allocate and initialize the private recno structure. */
93 if ((ret
= __os_calloc(1, sizeof(*rp
), &rp
)) != 0)
95 /* Link in the private recno structure. */
99 * Intention is to make sure all of the user's selections are okay
100 * here and then use them without checking.
102 if (dbinfo
== NULL
) {
106 F_SET(rp
, RECNO_EOF
);
109 * If the user specified a source tree, open it and map it in.
112 * We don't complain if the user specified transactions or
113 * threads. It's possible to make it work, but you'd better
114 * know what you're doing!
116 if (dbinfo
->re_source
== NULL
) {
118 F_SET(rp
, RECNO_EOF
);
121 __ram_source(dbp
, rp
, dbinfo
->re_source
)) != 0)
125 /* Copy delimiter, length and padding values. */
127 F_ISSET(dbp
, DB_RE_DELIMITER
) ? dbinfo
->re_delim
: '\n';
128 rp
->re_pad
= F_ISSET(dbp
, DB_RE_PAD
) ? dbinfo
->re_pad
: ' ';
130 if (F_ISSET(dbp
, DB_RE_FIXEDLEN
)) {
131 if ((rp
->re_len
= dbinfo
->re_len
) == 0) {
133 "record length must be greater than 0");
141 /* Initialize the remaining fields/methods of the DB. */
142 dbp
->am_close
= __ram_close
;
143 dbp
->del
= __ram_delete
;
144 dbp
->put
= __ram_put
;
145 dbp
->stat
= __bam_stat
;
146 dbp
->sync
= __ram_sync
;
148 /* Start up the tree. */
149 if ((ret
= __bam_read_root(dbp
)) != 0)
152 /* Set the overflow page size. */
153 __bam_setovflsize(dbp
);
155 /* If we're snapshotting an underlying source file, do it now. */
156 if (dbinfo
!= NULL
&& F_ISSET(dbinfo
, DB_SNAPSHOT
)) {
157 /* Allocate a cursor. */
158 if ((ret
= dbp
->cursor(dbp
, NULL
, &dbc
, 0)) != 0)
161 /* Do the snapshot. */
162 if ((ret
= __ram_update(dbc
,
163 DB_MAX_RECORDS
, 0)) != 0 && ret
== DB_NOTFOUND
)
166 /* Discard the cursor. */
167 if ((t_ret
= dbc
->c_close(dbc
)) != 0 && ret
== 0)
176 err
: /* If we mmap'd a source file, discard it. */
177 if (rp
->re_smap
!= NULL
)
178 (void)__db_unmapfile(rp
->re_smap
, rp
->re_msize
);
180 /* If we opened a source file, discard it. */
182 (void)__os_close(rp
->re_fd
);
183 if (rp
->re_source
!= NULL
)
184 __os_freestr(rp
->re_source
);
186 __os_free(rp
, sizeof(*rp
));
193 * Recno db->del function.
196 __ram_delete(dbp
, txn
, key
, flags
)
209 /* Check for invalid flags. */
210 if ((ret
= __db_delchk(dbp
,
211 key
, flags
, F_ISSET(dbp
, DB_AM_RDONLY
))) != 0)
214 /* Acquire a cursor. */
215 if ((ret
= dbp
->cursor(dbp
, txn
, &dbc
, DB_WRITELOCK
)) != 0)
218 DEBUG_LWRITE(dbc
, txn
, "ram_delete", key
, NULL
, flags
);
220 /* Check the user's record number and fill in as necessary. */
221 if ((ret
= __ram_getno(dbc
, key
, &recno
, 0)) != 0)
227 ret
= __ram_i_delete(dbc
);
229 /* Release the cursor. */
230 err
: if ((t_ret
= dbc
->c_close(dbc
)) != 0 && ret
== 0)
238 * Internal version of recno delete, called by __ram_delete and
252 int exact
, ret
, stack
;
260 * If this is CDB and this isn't a write cursor, then it's an error.
261 * If it is a write cursor, but we don't yet hold the write lock, then
262 * we need to upgrade to the write lock.
264 if (F_ISSET(dbp
, DB_AM_CDB
)) {
265 /* Make sure it's a valid update cursor. */
266 if (!F_ISSET(dbc
, DBC_RMW
| DBC_WRITER
))
269 if (F_ISSET(dbc
, DBC_RMW
) &&
270 (ret
= lock_get(dbp
->dbenv
->lk_info
, dbc
->locker
,
271 DB_LOCK_UPGRADE
, &dbc
->lock_dbt
, DB_LOCK_WRITE
,
276 /* Search the tree for the key; delete only deletes exact matches. */
277 if ((ret
= __bam_rsearch(dbc
, &cp
->recno
, S_DELETE
, 1, &exact
)) != 0)
286 indx
= cp
->csp
->indx
;
289 * If re-numbering records, the on-page deleted flag can only mean
290 * that this record was implicitly created. Applications aren't
291 * permitted to delete records they never created, return an error.
293 * If not re-numbering records, the on-page deleted flag means that
294 * this record was implicitly created, or, was deleted at some time.
295 * The former is an error because applications aren't permitted to
296 * delete records they never created, the latter is an error because
297 * if the record was "deleted", we could never have found it.
299 if (B_DISSET(GET_BKEYDATA(h
, indx
)->type
)) {
304 if (F_ISSET(dbp
, DB_RE_RENUMBER
)) {
305 /* Delete the item, adjust the counts, adjust the cursors. */
306 if ((ret
= __bam_ditem(dbc
, h
, indx
)) != 0)
308 __bam_adjust(dbc
, -1);
309 __ram_ca(dbp
, cp
->recno
, CA_DELETE
);
312 * If the page is empty, delete it. The whole tree is locked
313 * so there are no preparations to make.
315 if (NUM_ENT(h
) == 0 && h
->pgno
!= PGNO_ROOT
) {
317 ret
= __bam_dpages(dbc
);
320 /* Use a delete/put pair to replace the record with a marker. */
321 if ((ret
= __bam_ditem(dbc
, h
, indx
)) != 0)
324 B_TSET(bk
.type
, B_KEYDATA
, 1);
326 memset(&hdr
, 0, sizeof(hdr
));
328 hdr
.size
= SSZA(BKEYDATA
, data
);
329 memset(&data
, 0, sizeof(data
));
330 data
.data
= (char *)"";
332 if ((ret
= __db_pitem(dbc
,
333 h
, indx
, BKEYDATA_SIZE(0), &hdr
, &data
)) != 0)
336 F_SET(t
->recno
, RECNO_MODIFIED
);
339 __bam_stkrel(dbc
, 0);
341 /* If we upgraded the CDB lock upon entry; downgrade it now. */
342 if (F_ISSET(dbp
, DB_AM_CDB
) && F_ISSET(dbc
, DBC_RMW
))
343 (void)__lock_downgrade(dbp
->dbenv
->lk_info
, dbc
->mylock
,
350 * Recno db->put function.
353 __ram_put(dbp
, txn
, key
, data
, flags
)
365 /* Check for invalid flags. */
366 if ((ret
= __db_putchk(dbp
,
367 key
, data
, flags
, F_ISSET(dbp
, DB_AM_RDONLY
), 0)) != 0)
370 /* Allocate a cursor. */
371 if ((ret
= dbp
->cursor(dbp
, txn
, &dbc
, DB_WRITELOCK
)) != 0)
374 DEBUG_LWRITE(dbc
, txn
, "ram_put", key
, data
, flags
);
377 * If we're appending to the tree, make sure we've read in all of
378 * the backing source file. Otherwise, check the user's record
379 * number and fill in as necessary.
381 ret
= flags
== DB_APPEND
?
382 __ram_update(dbc
, DB_MAX_RECORDS
, 0) :
383 __ram_getno(dbc
, key
, &recno
, 1);
385 /* Add the record. */
387 ret
= __ram_add(dbc
, &recno
, data
, flags
, 0);
389 /* Discard the cursor. */
390 if ((t_ret
= dbc
->c_close(dbc
)) != 0 && ret
== 0)
393 /* Return the record number if we're appending to the tree. */
394 if (ret
== 0 && flags
== DB_APPEND
)
395 *(db_recno_t
*)key
->data
= recno
;
402 * Recno db->sync function.
405 __ram_sync(dbp
, flags
)
413 * Sync the underlying btree.
416 * We don't need to do a panic check or flags check, the "real"
417 * sync function does all that for us.
419 if ((ret
= __db_sync(dbp
, flags
)) != 0)
422 /* Allocate a cursor. */
423 if ((ret
= dbp
->cursor(dbp
, NULL
, &dbc
, 0)) != 0)
426 DEBUG_LWRITE(dbc
, NULL
, "ram_sync", NULL
, NULL
, flags
);
428 /* Copy back the backing source file. */
429 ret
= __ram_writeback(dbc
);
431 /* Discard the cursor. */
432 if ((t_ret
= dbc
->c_close(dbc
)) != 0 && ret
== 0)
440 * Recno db->close function.
442 * PUBLIC: int __ram_close __P((DB *));
450 rp
= ((BTREE
*)dbp
->internal
)->recno
;
452 /* Close any underlying mmap region. */
453 if (rp
->re_smap
!= NULL
)
454 (void)__db_unmapfile(rp
->re_smap
, rp
->re_msize
);
456 /* Close any backing source file descriptor. */
458 (void)__os_close(rp
->re_fd
);
460 /* Free any backing source file name. */
461 if (rp
->re_source
!= NULL
)
462 __os_freestr(rp
->re_source
);
464 /* Free allocated memory. */
465 __os_free(rp
, sizeof(RECNO
));
466 ((BTREE
*)dbp
->internal
)->recno
= NULL
;
468 /* Close the underlying btree. */
469 return (__bam_close(dbp
));
474 * Recno cursor->c_del function.
476 * PUBLIC: int __ram_c_del __P((DBC *, u_int32_t));
479 __ram_c_del(dbc
, flags
)
492 /* Check for invalid flags. */
493 if ((ret
= __db_cdelchk(dbp
, flags
,
494 F_ISSET(dbp
, DB_AM_RDONLY
), cp
->recno
!= RECNO_OOB
)) != 0)
497 DEBUG_LWRITE(dbc
, dbc
->txn
, "ram_c_del", NULL
, NULL
, flags
);
500 * If we are running CDB, this had better be either a write
501 * cursor or an immediate writer.
503 if (F_ISSET(dbp
, DB_AM_CDB
))
504 if (!F_ISSET(dbc
, DBC_RMW
| DBC_WRITER
))
508 * The semantics of cursors during delete are as follows: if record
509 * numbers are mutable (DB_RE_RENUMBER is set), deleting a record
510 * causes the cursor to automatically point to the record immediately
511 * following. In this case it is possible to use a single cursor for
512 * repeated delete operations, without intervening operations.
514 * If record numbers are not mutable, then records are replaced with
515 * a marker containing a delete flag. If the record referenced by
516 * this cursor has already been deleted, we will detect that as part
517 * of the delete operation, and fail.
519 return (__ram_i_delete(dbc
));
524 * Recno cursor->c_get function.
526 * PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
529 __ram_c_get(dbc
, key
, data
, flags
)
538 int exact
, ret
, stack
, tmp_rmw
;
545 /* Check for invalid flags. */
546 if ((ret
= __db_cgetchk(dbc
->dbp
,
547 key
, data
, flags
, cp
->recno
!= RECNO_OOB
)) != 0)
550 /* Clear OR'd in additional bits so we can check for flag equality. */
552 if (LF_ISSET(DB_RMW
)) {
553 if (!F_ISSET(dbp
, DB_AM_CDB
)) {
560 DEBUG_LREAD(dbc
, dbc
->txn
, "ram_c_get",
561 flags
== DB_SET
|| flags
== DB_SET_RANGE
? key
: NULL
, NULL
, flags
);
563 /* Initialize the cursor for a new retrieval. */
566 retry
: /* Update the record number. */
571 * If record numbers are mutable: if we just deleted a record,
572 * there is no action necessary, we return the record following
573 * the deleted item by virtue of renumbering the tree.
578 * If record numbers are mutable: if we just deleted a record,
579 * we have to avoid incrementing the record number so that we
580 * return the right record by virtue of renumbering the tree.
582 if (CD_ISSET(dbp
, cp
))
585 if (cp
->recno
!= RECNO_OOB
) {
595 if (cp
->recno
!= RECNO_OOB
) {
596 if (cp
->recno
== 1) {
606 if (((ret
= __ram_update(dbc
,
607 DB_MAX_RECORDS
, 0)) != 0) && ret
!= DB_NOTFOUND
)
609 if ((ret
= __bam_nrecs(dbc
, &cp
->recno
)) != 0)
611 if (cp
->recno
== 0) {
618 if ((ret
= __ram_getno(dbc
, key
, &cp
->recno
, 0)) != 0)
623 /* Return the key if the user didn't give us one. */
624 if (flags
!= DB_SET
&& flags
!= DB_SET_RANGE
&&
625 (ret
= __db_retcopy(key
, &cp
->recno
, sizeof(cp
->recno
),
626 &dbc
->rkey
.data
, &dbc
->rkey
.ulen
, dbp
->db_malloc
)) != 0)
629 /* Search the tree for the record. */
630 if ((ret
= __bam_rsearch(dbc
, &cp
->recno
,
631 F_ISSET(dbc
, DBC_RMW
) ? S_FIND_WR
: S_FIND
, 1, &exact
)) != 0)
639 indx
= cp
->csp
->indx
;
642 * If re-numbering records, the on-page deleted flag means this record
643 * was implicitly created. If not re-numbering records, the on-page
644 * deleted flag means this record was implicitly created, or, it was
645 * deleted at some time. Regardless, we skip such records if doing
646 * cursor next/prev operations, and fail if the application requested
649 if (B_DISSET(GET_BKEYDATA(h
, indx
)->type
)) {
650 if (flags
== DB_NEXT
|| flags
== DB_PREV
) {
651 (void)__bam_stkrel(dbc
, 0);
658 /* Return the data item. */
659 if ((ret
= __db_ret(dbp
,
660 h
, indx
, data
, &dbc
->rdata
.data
, &dbc
->rdata
.ulen
)) != 0)
663 /* The cursor was reset, no further delete adjustment is necessary. */
667 (void)__bam_stkrel(dbc
, 0);
669 /* Release temporary lock upgrade. */
681 * Recno cursor->c_put function.
683 * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
686 __ram_c_put(dbc
, key
, data
, flags
)
701 if ((ret
= __db_cputchk(dbc
->dbp
, key
, data
, flags
,
702 F_ISSET(dbc
->dbp
, DB_AM_RDONLY
), cp
->recno
!= RECNO_OOB
)) != 0)
705 DEBUG_LWRITE(dbc
, dbc
->txn
, "ram_c_put", NULL
, data
, flags
);
708 * If we are running CDB, this had better be either a write
709 * cursor or an immediate writer. If it's a regular writer,
710 * that means we have an IWRITE lock and we need to upgrade
711 * it to a write lock.
713 if (F_ISSET(dbp
, DB_AM_CDB
)) {
714 if (!F_ISSET(dbc
, DBC_RMW
| DBC_WRITER
))
717 if (F_ISSET(dbc
, DBC_RMW
) &&
718 (ret
= lock_get(dbp
->dbenv
->lk_info
, dbc
->locker
,
719 DB_LOCK_UPGRADE
, &dbc
->lock_dbt
, DB_LOCK_WRITE
,
724 /* Initialize the cursor for a new retrieval. */
728 * To split, we need a valid key for the page. Since it's a cursor,
729 * we have to build one.
731 * The split code discards all short-term locks and stack pages.
734 split
: arg
= &cp
->recno
;
735 if ((ret
= __bam_split(dbc
, arg
)) != 0)
739 if ((ret
= __bam_rsearch(dbc
, &cp
->recno
, S_INSERT
, 1, &exact
)) != 0)
745 if ((ret
= __bam_iitem(dbc
, &cp
->csp
->page
,
746 &cp
->csp
->indx
, key
, data
, flags
, 0)) == DB_NEEDSPLIT
) {
747 if ((ret
= __bam_stkrel(dbc
, 0)) != 0)
751 if ((ret
= __bam_stkrel(dbc
, 0)) != 0)
756 /* Adjust the cursors. */
757 __ram_ca(dbp
, cp
->recno
, CA_IAFTER
);
759 /* Set this cursor to reference the new record. */
760 cp
->recno
= copy
.recno
+ 1;
763 /* Adjust the cursors. */
764 __ram_ca(dbp
, cp
->recno
, CA_IBEFORE
);
766 /* Set this cursor to reference the new record. */
767 cp
->recno
= copy
.recno
;
771 /* The cursor was reset, no further delete adjustment is necessary. */
774 err
: if (F_ISSET(dbp
, DB_AM_CDB
) && F_ISSET(dbc
, DBC_RMW
))
775 (void)__lock_downgrade(dbp
->dbenv
->lk_info
, dbc
->mylock
,
788 * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
791 __ram_ca(dbp
, recno
, op
)
800 * Adjust the cursors. See the comment in __bam_ca_delete().
803 for (dbc
= TAILQ_FIRST(&dbp
->active_queue
);
804 dbc
!= NULL
; dbc
= TAILQ_NEXT(dbc
, links
)) {
808 if (recno
> cp
->recno
)
810 if (recno
== cp
->recno
)
814 if (recno
> cp
->recno
)
818 if (recno
>= cp
->recno
)
823 DB_THREAD_UNLOCK(dbp
);
828 * Check the user's record number, and make sure we've seen it.
830 * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
833 __ram_getno(dbc
, key
, rep
, can_create
)
844 /* Check the user's record number. */
845 if ((recno
= *(db_recno_t
*)key
->data
) == 0) {
846 __db_err(dbp
->dbenv
, "illegal record number of 0");
853 * Btree can neither create records nor read them in. Recno can
854 * do both, see if we can find the record.
856 return (dbp
->type
== DB_RECNO
?
857 __ram_update(dbc
, recno
, can_create
) : 0);
862 * Ensure the tree has records up to and including the specified one.
865 __ram_update(dbc
, recno
, can_create
)
881 * If we can't create records and we've read the entire backing input
884 if (!can_create
&& F_ISSET(rp
, RECNO_EOF
))
888 * If we haven't seen this record yet, try to get it from the original
891 if ((ret
= __bam_nrecs(dbc
, &nrecs
)) != 0)
893 if (!F_ISSET(rp
, RECNO_EOF
) && recno
> nrecs
) {
894 if ((ret
= rp
->re_irec(dbc
, recno
)) != 0)
896 if ((ret
= __bam_nrecs(dbc
, &nrecs
)) != 0)
901 * If we can create records, create empty ones up to the requested
904 if (!can_create
|| recno
<= nrecs
+ 1)
909 dbc
->rdata
.flags
= 0;
910 if (F_ISSET(dbp
, DB_RE_FIXEDLEN
)) {
911 if (dbc
->rdata
.ulen
< rp
->re_len
) {
913 __os_realloc(&dbc
->rdata
.data
, rp
->re_len
)) != 0) {
915 dbc
->rdata
.data
= NULL
;
918 dbc
->rdata
.ulen
= rp
->re_len
;
920 dbc
->rdata
.size
= rp
->re_len
;
921 memset(dbc
->rdata
.data
, rp
->re_pad
, rp
->re_len
);
925 while (recno
> ++nrecs
)
926 if ((ret
= __ram_add(dbc
,
927 &nrecs
, &dbc
->rdata
, 0, BI_DELETED
)) != 0)
934 * Load information about the backing file.
937 __ram_source(dbp
, rp
, fname
)
943 u_int32_t bytes
, mbytes
, oflags
;
948 * The caller has full responsibility for cleaning up on error --
949 * (it has to anyway, in case it fails after this routine succeeds).
951 if ((ret
= __db_appname(dbp
->dbenv
,
952 DB_APP_DATA
, NULL
, fname
, 0, NULL
, &rp
->re_source
)) != 0)
955 oflags
= F_ISSET(dbp
, DB_AM_RDONLY
) ? DB_RDONLY
: 0;
957 __db_open(rp
->re_source
, oflags
, oflags
, 0, &rp
->re_fd
)) != 0) {
958 __db_err(dbp
->dbenv
, "%s: %s", rp
->re_source
, strerror(ret
));
964 * We'd like to test to see if the file is too big to mmap. Since we
965 * don't know what size or type off_t's or size_t's are, or the largest
966 * unsigned integral type is, or what random insanity the local C
967 * compiler will perpetrate, doing the comparison in a portable way is
968 * flatly impossible. Hope that mmap fails if the file is too large.
970 if ((ret
= __os_ioinfo(rp
->re_source
,
971 rp
->re_fd
, &mbytes
, &bytes
, NULL
)) != 0) {
972 __db_err(dbp
->dbenv
, "%s: %s", rp
->re_source
, strerror(ret
));
975 if (mbytes
== 0 && bytes
== 0) {
976 F_SET(rp
, RECNO_EOF
);
980 size
= mbytes
* MEGABYTE
+ bytes
;
981 if ((ret
= __db_mapfile(rp
->re_source
,
982 rp
->re_fd
, (size_t)size
, 1, &rp
->re_smap
)) != 0)
984 rp
->re_cmap
= rp
->re_smap
;
985 rp
->re_emap
= (u_int8_t
*)rp
->re_smap
+ (rp
->re_msize
= size
);
986 rp
->re_irec
= F_ISSET(dbp
, DB_RE_FIXEDLEN
) ? __ram_fmap
: __ram_vmap
;
992 * Rewrite the backing file.
1004 u_int8_t delim
, *pad
;
1007 rp
= ((BTREE
*)dbp
->internal
)->recno
;
1009 /* If the file wasn't modified, we're done. */
1010 if (!F_ISSET(rp
, RECNO_MODIFIED
))
1013 /* If there's no backing source file, we're done. */
1014 if (rp
->re_source
== NULL
) {
1015 F_CLR(rp
, RECNO_MODIFIED
);
1020 * Read any remaining records into the tree.
1023 * This is why we can't support transactions when applications specify
1024 * backing (re_source) files. At this point we have to read in the
1025 * rest of the records from the file so that we can write all of the
1026 * records back out again, which could modify a page for which we'd
1027 * have to log changes and which we don't have locked. This could be
1028 * partially fixed by taking a snapshot of the entire file during the
1029 * db_open(), or, since db_open() isn't transaction protected, as part
1030 * of the first DB operation. But, if a checkpoint occurs then, the
1031 * part of the log holding the copy of the file could be discarded, and
1032 * that would make it impossible to recover in the face of disaster.
1033 * This could all probably be fixed, but it would require transaction
1034 * protecting the backing source file, i.e. mpool would have to know
1035 * about it, and we don't want to go there.
1038 __ram_update(dbc
, DB_MAX_RECORDS
, 0)) != 0 && ret
!= DB_NOTFOUND
)
1043 * Close any underlying mmap region. This is required for Windows NT
1044 * (4.0, Service Pack 2) -- if the file is still mapped, the following
1047 if (rp
->re_smap
!= NULL
) {
1048 (void)__db_unmapfile(rp
->re_smap
, rp
->re_msize
);
1052 /* Get rid of any backing file descriptor, just on GP's. */
1053 if (rp
->re_fd
!= -1) {
1054 (void)__os_close(rp
->re_fd
);
1058 /* Open the file, truncating it. */
1059 if ((ret
= __db_open(rp
->re_source
,
1060 DB_SEQUENTIAL
| DB_TRUNCATE
,
1061 DB_SEQUENTIAL
| DB_TRUNCATE
, 0, &fd
)) != 0) {
1062 __db_err(dbp
->dbenv
, "%s: %s", rp
->re_source
, strerror(ret
));
1067 * We step through the records, writing each one out. Use the record
1068 * number and the dbp->get() function, instead of a cursor, so we find
1069 * and write out "deleted" or non-existent records.
1071 memset(&key
, 0, sizeof(key
));
1072 memset(&data
, 0, sizeof(data
));
1073 key
.size
= sizeof(db_recno_t
);
1077 * We'll need the delimiter if we're doing variable-length records,
1078 * and the pad character if we're doing fixed-length records.
1080 delim
= rp
->re_delim
;
1081 if (F_ISSET(dbp
, DB_RE_FIXEDLEN
)) {
1082 if ((ret
= __os_malloc(rp
->re_len
, NULL
, &pad
)) != 0)
1084 memset(pad
, rp
->re_pad
, rp
->re_len
);
1086 COMPQUIET(pad
, NULL
);
1087 for (keyno
= 1;; ++keyno
) {
1088 switch (ret
= dbp
->get(dbp
, NULL
, &key
, &data
, 0)) {
1091 __os_write(fd
, data
.data
, data
.size
, &nw
)) != 0)
1093 if (nw
!= (ssize_t
)data
.size
) {
1099 if (F_ISSET(dbp
, DB_RE_FIXEDLEN
)) {
1101 __os_write(fd
, pad
, rp
->re_len
, &nw
)) != 0)
1103 if (nw
!= (ssize_t
)rp
->re_len
) {
1113 if (!F_ISSET(dbp
, DB_RE_FIXEDLEN
)) {
1114 if ((ret
= __os_write(fd
, &delim
, 1, &nw
)) != 0)
1124 done
: /* Close the file descriptor. */
1125 if ((t_ret
= __os_close(fd
)) != 0 || ret
== 0)
1129 F_CLR(rp
, RECNO_MODIFIED
);
1135 * Get fixed length records from a file.
1138 __ram_fmap(dbc
, top
)
1147 u_int8_t
*sp
, *ep
, *p
;
1150 if ((ret
= __bam_nrecs(dbc
, &recno
)) != 0)
1154 rp
= ((BTREE
*)(dbp
->internal
))->recno
;
1156 if (dbc
->rdata
.ulen
< rp
->re_len
) {
1157 if ((ret
= __os_realloc(&dbc
->rdata
.data
, rp
->re_len
)) != 0) {
1158 dbc
->rdata
.ulen
= 0;
1159 dbc
->rdata
.data
= NULL
;
1162 dbc
->rdata
.ulen
= rp
->re_len
;
1165 memset(&data
, 0, sizeof(data
));
1166 data
.data
= dbc
->rdata
.data
;
1167 data
.size
= rp
->re_len
;
1169 sp
= (u_int8_t
*)rp
->re_cmap
;
1170 ep
= (u_int8_t
*)rp
->re_emap
;
1171 while (recno
< top
) {
1173 F_SET(rp
, RECNO_EOF
);
1174 return (DB_NOTFOUND
);
1177 for (p
= dbc
->rdata
.data
;
1178 sp
< ep
&& len
> 0; *p
++ = *sp
++, --len
)
1182 * Another process may have read this record from the input
1183 * file and stored it into the database already, in which
1184 * case we don't need to repeat that operation. We detect
1185 * this by checking if the last record we've read is greater
1186 * or equal to the number of records in the database.
1189 * We should just do a seek, since the records are fixed
1192 if (rp
->re_last
>= recno
) {
1194 memset(p
, rp
->re_pad
, len
);
1197 if ((ret
= __ram_add(dbc
, &recno
, &data
, 0, 0)) != 0)
1208 * Get variable length records from a file.
1211 __ram_vmap(dbc
, top
)
1221 rp
= ((BTREE
*)(dbc
->dbp
->internal
))->recno
;
1223 if ((ret
= __bam_nrecs(dbc
, &recno
)) != 0)
1226 memset(&data
, 0, sizeof(data
));
1228 delim
= rp
->re_delim
;
1230 sp
= (u_int8_t
*)rp
->re_cmap
;
1231 ep
= (u_int8_t
*)rp
->re_emap
;
1232 while (recno
< top
) {
1234 F_SET(rp
, RECNO_EOF
);
1235 return (DB_NOTFOUND
);
1237 for (data
.data
= sp
; sp
< ep
&& *sp
!= delim
; ++sp
)
1241 * Another process may have read this record from the input
1242 * file and stored it into the database already, in which
1243 * case we don't need to repeat that operation. We detect
1244 * this by checking if the last record we've read is greater
1245 * or equal to the number of records in the database.
1247 if (rp
->re_last
>= recno
) {
1248 data
.size
= sp
- (u_int8_t
*)data
.data
;
1250 if ((ret
= __ram_add(dbc
, &recno
, &data
, 0, 0)) != 0)
1262 * Add records into the tree.
1265 __ram_add(dbc
, recnop
, data
, flags
, bi_flags
)
1269 u_int32_t flags
, bi_flags
;
1276 int exact
, isdeleted
, ret
, stack
;
1281 retry
: /* Find the slot for insertion. */
1282 if ((ret
= __bam_rsearch(dbc
, recnop
,
1283 S_INSERT
| (flags
== DB_APPEND
? S_APPEND
: 0), 1, &exact
)) != 0)
1286 indx
= cp
->csp
->indx
;
1290 * If re-numbering records, the on-page deleted flag means this record
1291 * was implicitly created. If not re-numbering records, the on-page
1292 * deleted flag means this record was implicitly created, or, it was
1293 * deleted at some time.
1295 * If DB_NOOVERWRITE is set and the item already exists in the tree,
1296 * return an error unless the item was either marked for deletion or
1297 * only implicitly created.
1301 bk
= GET_BKEYDATA(h
, indx
);
1302 if (B_DISSET(bk
->type
))
1305 if (flags
== DB_NOOVERWRITE
) {
1312 * Select the arguments for __bam_iitem() and do the insert. If the
1313 * key is an exact match, or we're replacing the data item with a
1314 * new data item, replace the current item. If the key isn't an exact
1315 * match, we're inserting a new key/data pair, before the search
1318 switch (ret
= __bam_iitem(dbc
,
1319 &h
, &indx
, NULL
, data
, exact
? DB_CURRENT
: DB_BEFORE
, bi_flags
)) {
1322 * Don't adjust anything.
1324 * If we inserted a record, no cursors need adjusting because
1325 * the only new record it's possible to insert is at the very
1326 * end of the tree. The necessary adjustments to the internal
1327 * page counts were made by __bam_iitem().
1329 * If we overwrote a record, no cursors need adjusting because
1330 * future DBcursor->get calls will simply return the underlying
1331 * record (there's no adjustment made for the DB_CURRENT flag
1332 * when a cursor get operation immediately follows a cursor
1333 * delete operation, and the normal adjustment for the DB_NEXT
1334 * flag is still correct).
1338 /* Discard the stack of pages and split the page. */
1339 (void)__bam_stkrel(dbc
, 0);
1342 if ((ret
= __bam_split(dbc
, recnop
)) != 0)
1353 __bam_stkrel(dbc
, 0);