Sync usage with man page.
[netbsd-mini2440.git] / lib / libc / db / btree / storage.c
blob963d16c066c910e9bc014e476720d71089284dc8
1 /*-
2 * Copyright (c) 1990 The Regents of the University of California.
3 * All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Mike Olson.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)storage.c 5.2 (Berkeley) 2/22/91";
39 #endif /* LIBC_SCCS and not lint */
41 #include <sys/param.h>
42 #include <db.h>
43 #include <errno.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include "btree.h"
50 * BT_GETPAGE -- Make pgno the current page of the btree.
52 * This routine is just a wrapper that decides whether to call the
53 * memory or disk-based routine to do the work.
55 * Parameters:
56 * t -- btree in which to get page
57 * pgno -- page number to get
59 * Returns:
60 * RET_SUCCESS or RET_ERROR.
63 int
64 _bt_getpage(t, pgno)
65 BTREE_P t;
66 pgno_t pgno;
68 #ifdef DEBUG
69 if (pgno == P_NONE)
70 _punt();
71 #endif /* DEBUG */
73 /* see if we can get away without doing any work */
74 if (t->bt_curpage != (BTHEADER *) NULL) {
75 if (t->bt_curpage->h_pgno == pgno)
76 return (RET_SUCCESS);
79 if (t->bt_fname == (char *) NULL)
80 return (_bt_getmpage(t, pgno));
81 else
82 return (_bt_getdpage(t, pgno));
86 * _BT_GETMPAGE -- Make pgno the current page of the btree.
88 * This routine gets pages for in-memory btrees.
90 * Parameters:
91 * t -- btree in which to get page
92 * pgno -- page number to get
94 * Returns:
95 * RET_SUCCESS or RET_ERROR.
98 int
99 _bt_getmpage(t, pgno)
100 register BTREE_P t;
101 pgno_t pgno;
103 int htindex;
104 BTHEADER *h;
105 HTBUCKET *b;
107 if (t->bt_curpage == (BTHEADER *) NULL) {
108 if (pgno != P_ROOT) {
109 errno = EBADF;
110 return (RET_ERROR);
113 t->bt_npages++;
114 h = (BTHEADER *) malloc((unsigned) t->bt_psize);
115 if (h == (BTHEADER *) NULL)
116 return (RET_ERROR);
118 h->h_pgno = P_ROOT;
119 h->h_flags = F_LEAF;
120 h->h_lower = (index_t)
121 (((char *) &(h->h_linp[0])) - ((char *) h));
122 h->h_upper = t->bt_psize;
123 h->h_prevpg = h->h_nextpg = P_NONE;
125 t->bt_curpage = h;
127 /* get the root page into the hash table */
128 if (_bt_write(t, h, RELEASE) == RET_ERROR)
129 return (RET_ERROR);
132 htindex = HASHKEY(pgno);
134 for (b = t->bt_s.bt_ht[htindex];
135 b != (HTBUCKET *) NULL;
136 b = b->ht_next) {
137 if (b->ht_pgno == pgno) {
138 t->bt_curpage = b->ht_page;
139 return (RET_SUCCESS);
142 return (RET_ERROR);
146 * _BT_GETDPAGE -- Make pgno the current page of the btree.
148 * This routine gets pages for disk btrees.
150 * Because disk btree pages must be readable across machine architectures,
151 * the btree code writes integers out in network format. This routine
152 * converts them back to host format before returning the page.
154 * Parameters:
155 * t -- btree in which to get page
156 * pgno -- page number to get
158 * Returns:
159 * RET_SUCCESS, RET_ERROR.
163 _bt_getdpage(t, pgno)
164 register BTREE_P t;
165 pgno_t pgno;
167 BTHEADER *h;
168 char *cache;
169 long pos;
170 int n, nbytes;
172 /* if we have an lru cache, let the cache code do the work */
173 if (ISCACHE(t)) {
174 cache = t->bt_s.bt_d.d_cache;
176 /* release the old page */
177 if (t->bt_curpage != (BTHEADER *) NULL) {
178 pgno_t opgno = t->bt_curpage->h_pgno;
179 t->bt_curpage->h_flags &= ~F_DIRTY;
181 if (lruwrite(cache, (int) opgno) < 0)
182 return (RET_ERROR);
184 if (lrurelease(cache, (int) opgno) < 0)
185 return (RET_ERROR);
188 if (pgno > t->bt_npages) {
189 if ((h = (BTHEADER *) lrugetnew(cache, (int)pgno, &nbytes))
190 == (BTHEADER *) NULL)
191 return (RET_ERROR);
192 t->bt_npages = pgno;
193 } else {
194 if ((h = (BTHEADER *) lruget(cache, (int)pgno, &nbytes))
195 == (BTHEADER *) NULL)
196 return (RET_ERROR);
199 /* init this page, if necessary */
200 if (nbytes == 0) {
201 h->h_pgno = pgno;
202 h->h_flags = F_LEAF;
203 h->h_lower = (index_t)
204 (((char *) &(h->h_linp[0])) - ((char *) h));
205 h->h_upper = t->bt_psize;
206 h->h_prevpg = h->h_nextpg = P_NONE;
209 t->bt_curpage = h;
211 return (RET_SUCCESS);
214 /* sync the current page, if necessary */
215 if (t->bt_curpage != (BTHEADER *) NULL) {
216 if (t->bt_curpage->h_flags & F_DIRTY)
217 if (_bt_write(t, t->bt_curpage, RELEASE) == RET_ERROR)
218 return (RET_ERROR);
219 } else {
220 if (t->bt_npages == 0)
221 t->bt_npages = 1;
223 /* if no current page, get space for one */
224 if ((t->bt_curpage = (BTHEADER *) malloc((unsigned) t->bt_psize))
225 == (BTHEADER *) NULL) {
226 return (RET_ERROR);
230 n = t->bt_psize;
231 pos = (long) (pgno * n);
233 /* seek to correct location in file */
234 if (lseek(t->bt_s.bt_d.d_fd, pos, L_SET) != pos) {
235 return (RET_ERROR);
238 /* read the page */
239 if ((nbytes = read(t->bt_s.bt_d.d_fd, t->bt_curpage, n)) < n) {
242 * If we didn't get a full page, we must have gotten no
243 * data at all -- in which case we're trying to read a
244 * root page that doesn't exist yet. This is the only
245 * case in which this is okay. If this happens, construct
246 * an empty root page by hand.
248 if (nbytes != 0 || pgno != P_ROOT) {
249 errno = EBADF;
250 return (RET_ERROR);
253 h = (BTHEADER *) t->bt_curpage;
254 h->h_pgno = pgno;
255 h->h_flags = F_LEAF;
256 h->h_lower = (index_t)
257 (((char *) &(h->h_linp[0])) - ((char *) h));
258 h->h_upper = t->bt_psize;
259 h->h_prevpg = h->h_nextpg = P_NONE;
260 } else
261 (void) _bt_pgin(t->bt_curpage, (char *) t->bt_lorder);
263 return (RET_SUCCESS);
267 * _BT_PGOUT, _BT_PGIN -- Convert host-specific number layout to/from
268 * the host-independent format stored on disk.
270 * Parameters:
271 * h -- page to convert
272 * _lorder -- byte order for pages (stored as a char * in the
273 * cache, and passed around as a magic cookie).
275 * Returns:
276 * RET_SUCCESS (lru code requires a return value).
278 * Side Effects:
279 * Layout of tree metadata on the page is changed in place.
281 * Warnings:
282 * Everywhere else in the code, the types pgno_t and index_t
283 * are opaque. These two routines know what they really
284 * are.
288 _bt_pgout(h, _lorder)
289 BTHEADER *h;
290 char *_lorder;
292 int i;
293 int top;
294 int lorder;
295 DATUM *d;
296 IDATUM *id;
297 size_t chain;
299 lorder = (int) _lorder;
300 if (lorder == BYTE_ORDER)
301 return (RET_SUCCESS);
303 if (h->h_flags & F_LEAF) {
304 if (h->h_flags & F_CONT) {
305 if (h->h_prevpg == P_NONE) {
306 size_t longsz;
308 (void) bcopy((char *) &(h->h_linp[0]),
309 (char *) &longsz,
310 sizeof(longsz));
311 BLSWAP(longsz);
312 (void) bcopy((char *) &longsz,
313 (char *) &(h->h_linp[0]),
314 sizeof(longsz));
316 } else {
317 top = NEXTINDEX(h);
318 for (i = 0; i < top; i++) {
319 d = (DATUM *) GETDATUM(h, i);
320 if (d->d_flags & D_BIGKEY) {
321 (void) bcopy((char *) &(d->d_bytes[0]),
322 (char *) &chain,
323 sizeof(chain));
324 BLSWAP(chain);
325 (void) bcopy((char *) &chain,
326 (char *) &(d->d_bytes[0]),
327 sizeof(chain));
329 if (d->d_flags & D_BIGDATA) {
330 (void) bcopy((char *) &(d->d_bytes[d->d_ksize]),
331 (char *) &chain,
332 sizeof(chain));
333 BLSWAP(chain);
334 (void) bcopy((char *) &chain,
335 (char *) &(d->d_bytes[d->d_ksize]),
336 sizeof(chain));
338 BLSWAP(d->d_dsize);
339 BLSWAP(d->d_ksize);
340 BLSWAP(d->d_flags);
341 BLSWAP(h->h_linp[i]);
344 } else {
345 top = NEXTINDEX(h);
346 for (i = 0; i < top; i++) {
347 id = (IDATUM *) GETDATUM(h, i);
348 BLSWAP(id->i_size);
349 BLSWAP(id->i_pgno);
350 BLSWAP(id->i_flags);
351 if (id->i_flags & D_BIGKEY) {
352 (void) bcopy((char *) &(id->i_bytes[0]),
353 (char *) &chain,
354 sizeof(chain));
355 BLSWAP(chain);
356 (void) bcopy((char *) &chain,
357 (char *) &(id->i_bytes[0]),
358 sizeof(chain));
360 BLSWAP(h->h_linp[i]);
363 BLSWAP(h->h_flags);
364 BLSWAP(h->h_pgno);
365 BLSWAP(h->h_prevpg);
366 BLSWAP(h->h_nextpg);
367 BLSWAP(h->h_lower);
368 BLSWAP(h->h_upper);
370 return (RET_SUCCESS);
374 _bt_pgin(h, _lorder)
375 BTHEADER *h;
376 char *_lorder;
378 int i;
379 int top;
380 int lorder;
381 DATUM *d;
382 IDATUM *id;
383 size_t chain;
386 * If btree pages are to be stored in the host byte order, don't
387 * bother swapping.
389 lorder = (int) _lorder;
390 if (lorder == BYTE_ORDER)
391 return (RET_SUCCESS);
393 BLSWAP(h->h_upper);
394 BLSWAP(h->h_lower);
395 BLSWAP(h->h_nextpg);
396 BLSWAP(h->h_prevpg);
397 BLSWAP(h->h_pgno);
398 BLSWAP(h->h_flags);
400 if (h->h_flags & F_LEAF) {
401 if (h->h_flags & F_CONT) {
402 if (h->h_prevpg == P_NONE) {
403 size_t longsz;
405 (void) bcopy((char *) &(h->h_linp[0]),
406 (char *) &longsz,
407 sizeof(longsz));
408 BLSWAP(longsz);
409 (void) bcopy((char *) &longsz,
410 (char *) &(h->h_linp[0]),
411 sizeof(longsz));
413 } else {
414 top = NEXTINDEX(h);
415 for (i = 0; i < top; i++) {
416 BLSWAP(h->h_linp[i]);
417 d = (DATUM *) GETDATUM(h, i);
418 BLSWAP(d->d_dsize);
419 BLSWAP(d->d_ksize);
420 BLSWAP(d->d_flags);
421 if (d->d_flags & D_BIGKEY) {
422 (void) bcopy((char *) &(d->d_bytes[0]),
423 (char *) &chain,
424 sizeof(chain));
425 BLSWAP(chain);
426 (void) bcopy((char *) &chain,
427 (char *) &(d->d_bytes[0]),
428 sizeof(chain));
430 if (d->d_flags & D_BIGDATA) {
431 (void) bcopy((char *) &(d->d_bytes[d->d_ksize]),
432 (char *) &chain,
433 sizeof(chain));
434 BLSWAP(chain);
435 (void) bcopy((char *) &chain,
436 (char *) &(d->d_bytes[d->d_ksize]),
437 sizeof(chain));
441 } else {
442 top = NEXTINDEX(h);
443 for (i = 0; i < top; i++) {
444 BLSWAP(h->h_linp[i]);
445 id = (IDATUM *) GETDATUM(h, i);
446 BLSWAP(id->i_size);
447 BLSWAP(id->i_pgno);
448 BLSWAP(id->i_flags);
449 if (id->i_flags & D_BIGKEY) {
450 (void) bcopy((char *) &(id->i_bytes[0]),
451 (char *) &chain,
452 sizeof(chain));
453 BLSWAP(chain);
454 (void) bcopy((char *) &chain,
455 (char *) &(id->i_bytes[0]),
456 sizeof(chain));
460 return (RET_SUCCESS);
464 * _BT_ALLOCPG -- allocate a new page in the btree.
466 * This is called when we split a page, to get space to do the split.
467 * For disk btrees, these pages are released when the split is done.
468 * For memory btrees, they are not.
470 * Parameters:
471 * t -- tree in which to do split
473 * Returns:
474 * Pointer to the newly-allocated page
477 BTHEADER *
478 _bt_allocpg(t)
479 BTREE_P t;
481 BTHEADER *h = t->bt_curpage;
482 BTHEADER *nh;
483 int nbytes;
485 /* if we have a cache, let the cache code do the work */
486 if (ISDISK(t) && ISCACHE(t)) {
487 nh = (BTHEADER *) lrugetnew(t->bt_s.bt_d.d_cache,
488 (int) (t->bt_npages + 1),
489 &nbytes);
490 } else {
491 nh = (BTHEADER *) malloc((unsigned) t->bt_psize);
494 if (nh != (BTHEADER *) NULL) {
495 nh->h_pgno = nh->h_prevpg = nh->h_nextpg = P_NONE;
496 nh->h_flags = h->h_flags;
497 nh->h_lower = (index_t)
498 (((char *) &(nh->h_linp[0])) - ((char *) nh));
499 nh->h_upper = t->bt_psize;
502 return (nh);
506 * _BT_WRITE -- Write a specific page to a btree file.
508 * Parameters:
509 * t -- btree to get the page
510 * h -- page to write
511 * relflag -- (int) this page may/may not be released
513 * Returns:
514 * RET_SUCCESS, RET_ERROR.
516 * Side Effects:
517 * Writes a metadata page if none has been written yet.
521 _bt_write(t, h, relflag)
522 BTREE_P t;
523 BTHEADER *h;
524 int relflag;
526 long pos;
527 int htindex;
528 HTBUCKET *b;
529 char *cache;
530 pgno_t pgno;
532 h->h_flags &= ~F_DIRTY;
533 if (ISDISK(t)) {
535 /* if we haven't done so yet, write the metadata */
536 if (!(t->bt_flags & BTF_METAOK)) {
537 if (_bt_wrtmeta(t) == RET_ERROR)
538 return (RET_ERROR);
541 pgno = h->h_pgno;
544 /* if we have a cache, let the cache code do the work */
545 if ((cache = t->bt_s.bt_d.d_cache) != (char *) NULL) {
546 if (lruwrite(cache, (int) pgno) < 0)
547 return (RET_ERROR);
548 if (relflag && lrurelease(cache, (int) pgno) < 0)
549 return (RET_ERROR);
551 } else {
552 (void) _bt_pgout(h, (char *) t->bt_lorder);
553 /* now write the current page */
554 pos = (long) (pgno * t->bt_psize);
555 if (lseek(t->bt_s.bt_d.d_fd, pos, L_SET) != pos)
556 return (RET_ERROR);
557 if (write(t->bt_s.bt_d.d_fd, (char *) h, (int) t->bt_psize)
558 < t->bt_psize)
559 return (RET_ERROR);
560 if (!relflag)
561 (void) _bt_pgin(h, (char *) t->bt_lorder);
563 } else {
564 /* in-memory btree */
565 htindex = HASHKEY(h->h_pgno);
567 /* see if we need to overwrite existing entry */
568 for (b = t->bt_s.bt_ht[htindex];
569 b != (HTBUCKET *) NULL;
570 b = b->ht_next) {
571 if (b->ht_pgno == h->h_pgno) {
572 b->ht_page = h;
573 return (RET_SUCCESS);
577 /* new entry, write it */
578 b = (HTBUCKET *) malloc((unsigned) sizeof (HTBUCKET));
579 if (b == (HTBUCKET *) NULL)
580 return (RET_ERROR);
582 b->ht_pgno = h->h_pgno;
583 b->ht_page = h;
584 b->ht_next = t->bt_s.bt_ht[htindex];
585 t->bt_s.bt_ht[htindex] = b;
587 return (RET_SUCCESS);
591 * _BT_RELEASE -- Release a locked-down cache page
593 * During page splits, we want to force pages out to the cache
594 * before we actually release them, in some cases. This routine
595 * releases such a page when it is no longer needed.
597 * Parameters:
598 * t -- btree in which to release page
599 * h -- page to release
601 * Returns:
602 * RET_SUCCESS, RET_ERROR.
604 * Side Effects:
605 * None.
609 _bt_release(t, h)
610 BTREE_P t;
611 BTHEADER *h;
613 if (ISDISK(t) && ISCACHE(t)) {
614 if (lrurelease(t->bt_s.bt_d.d_cache, (int) (h->h_pgno)) < 0)
615 return (RET_ERROR);
617 return (RET_SUCCESS);
621 * _BT_WRTMETA -- Write metadata to the btree.
623 * Parameters:
624 * t -- tree to which to write
626 * Returns:
627 * RET_SUCCESS, RET_ERROR.
631 _bt_wrtmeta(t)
632 BTREE_P t;
634 BTMETA m;
636 if (lseek(t->bt_s.bt_d.d_fd, 0l, L_SET) != 0l)
637 return (RET_ERROR);
639 /* lorder has to be in host-independent format */
640 m.m_lorder = (u_long) htonl((long) t->bt_lorder);
642 m.m_magic = BTREEMAGIC;
643 m.m_version = BTREEVERSION;
644 m.m_psize = t->bt_psize;
645 m.m_free = t->bt_free;
646 m.m_flags = t->bt_flags & BTF_NODUPS;
648 if (t->bt_lorder != BYTE_ORDER) {
649 BLSWAP(m.m_magic);
650 BLSWAP(m.m_version);
651 BLSWAP(m.m_psize);
652 BLSWAP(m.m_free);
653 BLSWAP(m.m_flags);
656 if (write(t->bt_s.bt_d.d_fd, (char *) &m, sizeof(m))
657 != sizeof(m)) {
658 return (RET_ERROR);
661 t->bt_flags |= BTF_METAOK;
663 return (RET_SUCCESS);