2 * Copyright (c) 1990 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid
[] = "@(#)big.c 5.2 (Berkeley) 2/22/91";
39 #endif /* LIBC_SCCS and not lint */
41 #include <sys/types.h>
48 * _BT_GETBIG -- Get big data from indirect pages.
50 * This routine chases indirect blocks for the big object at the
51 * specified page to a buffer, and return the address of the buffer.
54 * t -- btree with the indirect blocks
55 * pgno -- page number that starts the chain
56 * p -- (char **) to get the address of the buffer containing
58 * sz -- pointer to an int to get the size of the instantiated
62 * RET_SUCCESS, RET_ERROR.
69 _bt_getbig(t
, pgno
, p
, sz
)
79 char *top
, *from
, *where
;
81 save
= t
->bt_curpage
->h_pgno
;
82 if (_bt_getpage(t
, pgno
) == RET_ERROR
)
87 bcopy((char *) &(h
->h_linp
[0]),
89 (size_t) sizeof(nbytes
));
91 if ((*p
= (char *) malloc(nbytes
)) == (char *) NULL
)
95 from
= ((char *) (&h
->h_linp
[0])) + sizeof(nbytes
);
96 top
= ((char *) h
) + t
->bt_psize
;
98 /* need more space for data? */
103 nhere
= (int) (top
- from
);
104 if (nhere
> nbytes
) {
105 (void) bcopy(from
, where
, (size_t) nbytes
);
108 (void) bcopy(from
, where
, nhere
);
111 if (_bt_getpage(t
, h
->h_nextpg
) == RET_ERROR
)
114 top
= ((char *) h
) + t
->bt_psize
;
115 from
= (char *) &(h
->h_linp
[0]);
119 if (_bt_getpage(t
, save
) == RET_ERROR
)
122 return (RET_SUCCESS
);
126 * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree.
128 * When a large item is deleted from the tree, this routine puts the
129 * space that it occupied onto the free list for later reuse. The
130 * bt_free entry in the btree structure points at the head of this list.
131 * This value is also stored on disk in the btree's metadata.
134 * t -- btree from which to delete pages
135 * chain -- page number that starts the chain.
138 * RET_SUCCESS, RET_ERROR.
141 * Invalidates the current on-disk version of the btree's
142 * metadata (if any), and updates the free list appropriately.
146 _bt_delindir(t
, chain
)
156 if (_bt_getpage(t
, chain
) == RET_ERROR
)
160 * If some internal node is pointing at this chain, don't
164 if (t
->bt_curpage
->h_flags
& F_PRESERVE
) {
165 if (_bt_getpage(t
, save
) == RET_ERROR
)
167 return (RET_SUCCESS
);
170 /* if there's nothing on the free list, this is easy... */
171 if (t
->bt_free
== P_NONE
) {
174 oldfree
= t
->bt_free
;
176 /* find the end of the data chain for the deleted datum */
179 if (_bt_getpage(t
, chain
) == RET_ERROR
)
182 if (h
->h_nextpg
!= P_NONE
)
184 } while (h
->h_nextpg
!= P_NONE
);
186 /* link freed pages into free list */
187 h
->h_nextpg
= oldfree
;
188 if (_bt_write(t
, h
, RELEASE
) == RET_ERROR
)
190 if (_bt_getpage(t
, oldfree
) == RET_ERROR
)
194 if (_bt_write(t
, h
, RELEASE
) == RET_ERROR
)
198 /* restore the tree's current page pointer */
199 if (_bt_getpage(t
, save
) == RET_ERROR
)
202 /* we have trashed the tree metadata; rewrite it later */
203 t
->bt_flags
&= ~BTF_METAOK
;
205 return (RET_SUCCESS
);
209 * _BT_INDIRECT -- Write a series of indirect pages for big objects.
211 * A chain of indirect pages looks like
213 * +-------------------+ +---------------------+
214 * |hdr|size| | |hdr| |
215 * +---+----+ | +---+ |
216 * | ... data ... | | ... data ... | ...
218 * +-------------------+ +---------------------+
220 * where hdr is a standard btree page header (with the indirect bit
221 * set), size on the first page is the real size of the datum, and
222 * data are bytes of the datum, split across as many pages as necessary.
223 * Indirect pages are chained together with the h_prevpg and h_nextpg
224 * entries of the page header struct.
226 * A single DBT is written per chain, so space on the last page is
229 * We return the page number of the start of the chain.
231 * When a big object is deleted from a tree, the space that it occupied
232 * is placed on a free list for later reuse. This routine checks that
233 * free list before allocating new pages to the big datum being inserted.
236 * t -- btree in which to store indirect blocks
237 * data -- DBT with the big datum in it
238 * pgno -- place to put the starting page number of the chain
241 * RET_SUCCESS, RET_ERROR.
244 * Current page is changed on return.
248 _bt_indirect(t
, data
, pgno
)
262 /* get set for first page in chain */
265 from
= (char *) data
->data
;
267 /* if there are blocks on the free list, use them first */
268 if ((*pgno
= t
->bt_free
) == P_NONE
) {
269 if ((cur
= _bt_allocpg(t
)) == (BTHEADER
*) NULL
)
273 *pgno
= cur
->h_pgno
= ++(t
->bt_npages
);
275 if (_bt_getpage(t
, *pgno
) == RET_ERROR
)
281 cur
->h_flags
= F_CONT
|F_LEAF
;
282 (void) bcopy((char *) &dsize
, (char *) &cur
->h_linp
[0], sizeof(size_t));
283 where
= ((char *) (&cur
->h_linp
[0])) + sizeof(size_t);
285 /* fill and write pages in the chain */
289 top
= ((char *) cur
) + t
->bt_psize
;
290 cur
->h_prevpg
= prev
;
291 nextchn
= cur
->h_nextpg
;
292 nhere
= (int) (top
- where
);
294 if (nhere
>= dsize
) {
295 (void) bcopy(from
, where
, (int) dsize
);
296 cur
->h_nextpg
= P_NONE
;
299 (void) bcopy(from
, where
, (int) nhere
);
302 if (nextchn
== P_NONE
)
303 cur
->h_nextpg
= t
->bt_npages
+ 1;
307 /* current page is ready to go; write it out */
308 if (_bt_write(t
, cur
, RELEASE
) == RET_ERROR
)
311 /* free the current page, if appropriate */
312 if (ISDISK(t
) && !ISCACHE(t
) && !ischain
) {
313 (void) free ((char *) cur
);
320 /* allocate another page */
321 if (nextchn
== P_NONE
) {
322 if ((cur
= _bt_allocpg(t
)) == (BTHEADER
*) NULL
)
325 cur
->h_pgno
= ++(t
->bt_npages
);
327 if (_bt_getpage(t
, nextchn
) == RET_ERROR
)
332 cur
->h_flags
= F_LEAF
| F_CONT
;
334 where
= (char *) (&cur
->h_linp
[0]);
337 /* if we used pages from the free list, record changes to it */
338 if (*pgno
== t
->bt_free
) {
339 t
->bt_free
= nextchn
;
340 t
->bt_flags
&= ~BTF_METAOK
;
343 return (RET_SUCCESS
);
347 * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node.
349 * Chains of indirect blocks pointed to by leaf nodes get reclaimed
350 * when the item that points to them gets deleted. Chains pointed
351 * to by internal nodes never get deleted. This routine marks a
352 * chain as pointed to by an internal node.
355 * t -- tree in which to mark
356 * chain -- number of first page in chain
359 * RET_SUCCESS, RET_ERROR.
366 _bt_markchain(t
, chain
)
372 save
= t
->bt_curpage
->h_pgno
;
374 if (_bt_getpage(t
, chain
) == RET_ERROR
)
377 t
->bt_curpage
->h_flags
|= (F_DIRTY
|F_PRESERVE
);
379 if (_bt_getpage(t
, save
) == RET_ERROR
)
382 return (RET_SUCCESS
);