kernel: scheduling fix for ARM
[minix.git] / lib / libc / db / btree / bt_seq.c
blob58f25a065f136966d8c5b113654bc1497525aa65
1 /* $NetBSD: bt_seq.c,v 1.17 2008/09/11 12:58:00 joerg Exp $ */
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #if HAVE_NBTOOL_CONFIG_H
36 #include "nbtool_config.h"
37 #endif
39 #include <sys/cdefs.h>
40 __RCSID("$NetBSD: bt_seq.c,v 1.17 2008/09/11 12:58:00 joerg Exp $");
42 #include "namespace.h"
43 #include <sys/types.h>
45 #include <assert.h>
46 #include <errno.h>
47 #include <stddef.h>
48 #include <stdio.h>
49 #include <stdlib.h>
51 #include <db.h>
52 #include "btree.h"
54 static int __bt_first(BTREE *, const DBT *, EPG *, int *);
55 static int __bt_seqadv(BTREE *, EPG *, int);
56 static int __bt_seqset(BTREE *, EPG *, DBT *, int);
59 * Sequential scan support.
61 * The tree can be scanned sequentially, starting from either end of the
62 * tree or from any specific key. A scan request before any scanning is
63 * done is initialized as starting from the least node.
67 * __bt_seq --
68 * Btree sequential scan interface.
70 * Parameters:
71 * dbp: pointer to access method
72 * key: key for positioning and return value
73 * data: data return value
74 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV.
76 * Returns:
77 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
79 int
80 __bt_seq(const DB *dbp, DBT *key, DBT *data, u_int flags)
82 BTREE *t;
83 EPG e;
84 int status;
86 t = dbp->internal;
88 /* Toss any page pinned across calls. */
89 if (t->bt_pinned != NULL) {
90 mpool_put(t->bt_mp, t->bt_pinned, 0);
91 t->bt_pinned = NULL;
95 * If scan unitialized as yet, or starting at a specific record, set
96 * the scan to a specific key. Both __bt_seqset and __bt_seqadv pin
97 * the page the cursor references if they're successful.
99 switch (flags) {
100 case R_NEXT:
101 case R_PREV:
102 if (F_ISSET(&t->bt_cursor, CURS_INIT)) {
103 status = __bt_seqadv(t, &e, (int)flags);
104 break;
106 /* FALLTHROUGH */
107 case R_FIRST:
108 case R_LAST:
109 case R_CURSOR:
110 status = __bt_seqset(t, &e, key, (int)flags);
111 break;
112 default:
113 errno = EINVAL;
114 return (RET_ERROR);
117 if (status == RET_SUCCESS) {
118 __bt_setcur(t, e.page->pgno, (u_int)e.index);
120 status =
121 __bt_ret(t, &e, key, &t->bt_rkey, data, &t->bt_rdata, 0);
124 * If the user is doing concurrent access, we copied the
125 * key/data, toss the page.
127 if (F_ISSET(t, B_DB_LOCK))
128 mpool_put(t->bt_mp, e.page, 0);
129 else
130 t->bt_pinned = e.page;
132 return (status);
136 * __bt_seqset --
137 * Set the sequential scan to a specific key.
139 * Parameters:
140 * t: tree
141 * ep: storage for returned key
142 * key: key for initial scan position
143 * flags: R_CURSOR, R_FIRST, R_LAST, R_NEXT, R_PREV
145 * Side effects:
146 * Pins the page the cursor references.
148 * Returns:
149 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
151 static int
152 __bt_seqset(BTREE *t, EPG *ep, DBT *key, int flags)
154 PAGE *h;
155 pgno_t pg;
156 int exact;
159 * Find the first, last or specific key in the tree and point the
160 * cursor at it. The cursor may not be moved until a new key has
161 * been found.
163 switch (flags) {
164 case R_CURSOR: /* Keyed scan. */
166 * Find the first instance of the key or the smallest key
167 * which is greater than or equal to the specified key.
169 if (key->data == NULL || key->size == 0) {
170 errno = EINVAL;
171 return (RET_ERROR);
173 return (__bt_first(t, key, ep, &exact));
174 case R_FIRST: /* First record. */
175 case R_NEXT:
176 /* Walk down the left-hand side of the tree. */
177 for (pg = P_ROOT;;) {
178 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
179 return (RET_ERROR);
181 /* Check for an empty tree. */
182 if (NEXTINDEX(h) == 0) {
183 mpool_put(t->bt_mp, h, 0);
184 return (RET_SPECIAL);
187 if (h->flags & (P_BLEAF | P_RLEAF))
188 break;
189 pg = GETBINTERNAL(h, 0)->pgno;
190 mpool_put(t->bt_mp, h, 0);
192 ep->page = h;
193 ep->index = 0;
194 break;
195 case R_LAST: /* Last record. */
196 case R_PREV:
197 /* Walk down the right-hand side of the tree. */
198 for (pg = P_ROOT;;) {
199 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
200 return (RET_ERROR);
202 /* Check for an empty tree. */
203 if (NEXTINDEX(h) == 0) {
204 mpool_put(t->bt_mp, h, 0);
205 return (RET_SPECIAL);
208 if (h->flags & (P_BLEAF | P_RLEAF))
209 break;
210 pg = GETBINTERNAL(h, NEXTINDEX(h) - 1)->pgno;
211 mpool_put(t->bt_mp, h, 0);
214 ep->page = h;
215 ep->index = NEXTINDEX(h) - 1;
216 break;
218 return (RET_SUCCESS);
222 * __bt_seqadvance --
223 * Advance the sequential scan.
225 * Parameters:
226 * t: tree
227 * flags: R_NEXT, R_PREV
229 * Side effects:
230 * Pins the page the new key/data record is on.
232 * Returns:
233 * RET_ERROR, RET_SUCCESS or RET_SPECIAL if there's no next key.
235 static int
236 __bt_seqadv(BTREE *t, EPG *ep, int flags)
238 CURSOR *c;
239 PAGE *h;
240 indx_t idx = 0; /* pacify gcc */
241 pgno_t pg;
242 int exact;
245 * There are a couple of states that we can be in. The cursor has
246 * been initialized by the time we get here, but that's all we know.
248 c = &t->bt_cursor;
251 * The cursor was deleted where there weren't any duplicate records,
252 * so the key was saved. Find out where that key would go in the
253 * current tree. It doesn't matter if the returned key is an exact
254 * match or not -- if it's an exact match, the record was added after
255 * the delete so we can just return it. If not, as long as there's
256 * a record there, return it.
258 if (F_ISSET(c, CURS_ACQUIRE))
259 return (__bt_first(t, &c->key, ep, &exact));
261 /* Get the page referenced by the cursor. */
262 if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
263 return (RET_ERROR);
266 * Find the next/previous record in the tree and point the cursor at
267 * it. The cursor may not be moved until a new key has been found.
269 switch (flags) {
270 case R_NEXT: /* Next record. */
272 * The cursor was deleted in duplicate records, and moved
273 * forward to a record that has yet to be returned. Clear
274 * that flag, and return the record.
276 if (F_ISSET(c, CURS_AFTER))
277 goto usecurrent;
278 idx = c->pg.index;
279 if (++idx == NEXTINDEX(h)) {
280 pg = h->nextpg;
281 mpool_put(t->bt_mp, h, 0);
282 if (pg == P_INVALID)
283 return (RET_SPECIAL);
284 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
285 return (RET_ERROR);
286 idx = 0;
288 break;
289 case R_PREV: /* Previous record. */
291 * The cursor was deleted in duplicate records, and moved
292 * backward to a record that has yet to be returned. Clear
293 * that flag, and return the record.
295 if (F_ISSET(c, CURS_BEFORE)) {
296 usecurrent: F_CLR(c, CURS_AFTER | CURS_BEFORE);
297 ep->page = h;
298 ep->index = c->pg.index;
299 return (RET_SUCCESS);
301 idx = c->pg.index;
302 if (idx == 0) {
303 pg = h->prevpg;
304 mpool_put(t->bt_mp, h, 0);
305 if (pg == P_INVALID)
306 return (RET_SPECIAL);
307 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
308 return (RET_ERROR);
309 idx = NEXTINDEX(h) - 1;
310 } else
311 --idx;
312 break;
315 ep->page = h;
316 ep->index = idx;
317 return (RET_SUCCESS);
321 * __bt_first --
322 * Find the first entry.
324 * Parameters:
325 * t: the tree
326 * key: the key
327 * erval: return EPG
328 * exactp: pointer to exact match flag
330 * Returns:
331 * The first entry in the tree greater than or equal to key,
332 * or RET_SPECIAL if no such key exists.
334 static int
335 __bt_first(BTREE *t, const DBT *key, EPG *erval, int *exactp)
337 PAGE *h;
338 EPG *ep, save;
339 pgno_t pg;
342 * Find any matching record; __bt_search pins the page.
344 * If it's an exact match and duplicates are possible, walk backwards
345 * in the tree until we find the first one. Otherwise, make sure it's
346 * a valid key (__bt_search may return an index just past the end of a
347 * page) and return it.
349 if ((ep = __bt_search(t, key, exactp)) == NULL)
350 return (0);
351 if (*exactp) {
352 if (F_ISSET(t, B_NODUPS)) {
353 *erval = *ep;
354 return (RET_SUCCESS);
358 * Walk backwards, as long as the entry matches and there are
359 * keys left in the tree. Save a copy of each match in case
360 * we go too far.
362 save = *ep;
363 h = ep->page;
364 do {
365 if (save.page->pgno != ep->page->pgno) {
366 mpool_put(t->bt_mp, save.page, 0);
367 save = *ep;
368 } else
369 save.index = ep->index;
372 * Don't unpin the page the last (or original) match
373 * was on, but make sure it's unpinned if an error
374 * occurs.
376 if (ep->index == 0) {
377 if (h->prevpg == P_INVALID)
378 break;
379 if (h->pgno != save.page->pgno)
380 mpool_put(t->bt_mp, h, 0);
381 if ((h = mpool_get(t->bt_mp,
382 h->prevpg, 0)) == NULL)
383 return (RET_ERROR);
384 ep->page = h;
385 ep->index = NEXTINDEX(h);
387 --ep->index;
388 } while (__bt_cmp(t, key, ep) == 0);
391 * Reach here with the last page that was looked at pinned,
392 * which may or may not be the same as the last (or original)
393 * match page. If it's not useful, release it.
395 if (h->pgno != save.page->pgno)
396 mpool_put(t->bt_mp, h, 0);
398 *erval = save;
399 return (RET_SUCCESS);
402 /* If at the end of a page, find the next entry. */
403 if (ep->index == NEXTINDEX(ep->page)) {
404 h = ep->page;
405 pg = h->nextpg;
406 mpool_put(t->bt_mp, h, 0);
407 if (pg == P_INVALID)
408 return (RET_SPECIAL);
409 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
410 return (RET_ERROR);
411 ep->index = 0;
412 ep->page = h;
414 *erval = *ep;
415 return (RET_SUCCESS);
419 * __bt_setcur --
420 * Set the cursor to an entry in the tree.
422 * Parameters:
423 * t: the tree
424 * pgno: page number
425 * idx: page index
427 void
428 __bt_setcur(BTREE *t, pgno_t pgno, u_int idx)
430 /* Lose any already deleted key. */
431 if (t->bt_cursor.key.data != NULL) {
432 free(t->bt_cursor.key.data);
433 t->bt_cursor.key.size = 0;
434 t->bt_cursor.key.data = NULL;
436 F_CLR(&t->bt_cursor, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE);
438 /* Update the cursor. */
439 t->bt_cursor.pg.pgno = pgno;
440 t->bt_cursor.pg.index = idx;
441 F_SET(&t->bt_cursor, CURS_INIT);