1 /*-------------------------------------------------------------------------
4 * Build a btree from sorted input by loading leaf pages sequentially.
8 * We use tuplesort.c to sort the given index tuples into order.
9 * Then we scan the index tuples in order and build the btree pages
10 * for each level. We load source tuples into leaf-level pages.
11 * Whenever we fill a page at one level, we add a link to it to its
12 * parent level (starting a new parent level if necessary). When
13 * done, we write out each final page on each level, adding it to
14 * its parent level. When we have only one page on a level, it must be
15 * the root -- it can be attached to the btree metapage and we are done.
17 * This code is moderately slow (~10% slower) compared to the regular
18 * btree (insertion) build code on sorted or well-clustered data. On
19 * random data, however, the insertion build code is unusable -- the
20 * difference on a 60MB heap is a factor of 15 because the random
21 * probes into the btree thrash the buffer pool. (NOTE: the above
22 * "10%" estimate is probably obsolete, since it refers to an old and
23 * not very good external sort implementation that used to exist in
24 * this module. tuplesort.c is almost certainly faster.)
26 * It is not wise to pack the pages entirely full, since then *any*
27 * insertion would cause a split (and not only of the leaf page; the need
28 * for a split would cascade right up the tree). The steady-state load
29 * factor for btrees is usually estimated at 70%. We choose to pack leaf
30 * pages to the user-controllable fill factor (default 90%) while upper pages
31 * are always packed to 70%. This gives us reasonable density (there aren't
32 * many upper pages if the keys are reasonable-size) without risking a lot of
33 * cascading splits during early insertions.
35 * Formerly the index pages being built were kept in shared buffers, but
36 * that is of no value (since other backends have no interest in them yet)
37 * and it created locking problems for CHECKPOINT, because the upper-level
38 * pages were held exclusive-locked for long periods. Now we just build
39 * the pages in local memory and smgrwrite or smgrextend them as we finish
40 * them. They will need to be re-read into shared buffers on first use after
43 * Since the index will never be used unless it is completely built,
44 * from a crash-recovery point of view there is no need to WAL-log the
45 * steps of the build. After completing the index build, we can just sync
46 * the whole file to disk using smgrimmedsync() before exiting this module.
47 * This can be seen to be sufficient for crash recovery by considering that
48 * it's effectively equivalent to what would happen if a CHECKPOINT occurred
49 * just after the index build. However, it is clearly not sufficient if the
50 * DBA is using the WAL log for PITR or replication purposes, since another
51 * machine would not be able to reconstruct the index from WAL. Therefore,
52 * we log the completed index pages to WAL if and only if WAL archiving is
55 * This code isn't concerned about the FSM at all. The caller is responsible
56 * for initializing that.
58 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
59 * Portions Copyright (c) 1994, Regents of the University of California
64 *-------------------------------------------------------------------------
69 #include "access/heapam.h"
70 #include "access/nbtree.h"
71 #include "miscadmin.h"
72 #include "storage/smgr.h"
73 #include "utils/rel.h"
74 #include "utils/tuplesort.h"
78 * Status record for spooling/sorting phase. (Note we may have two of
79 * these due to the special requirements for uniqueness-checking with
84 Tuplesortstate
*sortstate
; /* state data for tuplesort.c */
90 * Status record for a btree page being built. We have one of these
91 * for each active tree level.
93 * The reason we need to store a copy of the minimum key is that we'll
94 * need to propagate it to the parent node when this page is linked
95 * into its parent. However, if the page is not a leaf page, the first
96 * entry on the page doesn't need to contain a key, so we will not have
97 * stored the key itself on the page. (You might think we could skip
98 * copying the minimum key on leaf pages, but actually we must have a
99 * writable copy anyway because we'll poke the page's address into it
100 * before passing it up to the parent...)
102 typedef struct BTPageState
104 Page btps_page
; /* workspace for page building */
105 BlockNumber btps_blkno
; /* block # to write this page at */
106 IndexTuple btps_minkey
; /* copy of minimum key (first item) on page */
107 OffsetNumber btps_lastoff
; /* last item offset loaded */
108 uint32 btps_level
; /* tree level (0 = leaf) */
109 Size btps_full
; /* "full" if less than this much free space */
110 struct BTPageState
*btps_next
; /* link to parent level, if any */
114 * Overall status record for index writing phase.
116 typedef struct BTWriteState
119 bool btws_use_wal
; /* dump pages to WAL? */
120 BlockNumber btws_pages_alloced
; /* # pages allocated */
121 BlockNumber btws_pages_written
; /* # pages written out */
122 Page btws_zeropage
; /* workspace for filling zeroes */
126 static Page
_bt_blnewpage(uint32 level
);
127 static BTPageState
*_bt_pagestate(BTWriteState
*wstate
, uint32 level
);
128 static void _bt_slideleft(Page page
);
129 static void _bt_sortaddtup(Page page
, Size itemsize
,
130 IndexTuple itup
, OffsetNumber itup_off
);
131 static void _bt_buildadd(BTWriteState
*wstate
, BTPageState
*state
,
133 static void _bt_uppershutdown(BTWriteState
*wstate
, BTPageState
*state
);
134 static void _bt_load(BTWriteState
*wstate
,
135 BTSpool
*btspool
, BTSpool
*btspool2
);
144 * create and initialize a spool structure
147 _bt_spoolinit(Relation index
, bool isunique
, bool isdead
)
149 BTSpool
*btspool
= (BTSpool
*) palloc0(sizeof(BTSpool
));
152 btspool
->index
= index
;
153 btspool
->isunique
= isunique
;
156 * We size the sort area as maintenance_work_mem rather than work_mem to
157 * speed index creation. This should be OK since a single backend can't
158 * run multiple index creations in parallel. Note that creation of a
159 * unique index actually requires two BTSpool objects. We expect that the
160 * second one (for dead tuples) won't get very full, so we give it only
163 btKbytes
= isdead
? work_mem
: maintenance_work_mem
;
164 btspool
->sortstate
= tuplesort_begin_index_btree(index
, isunique
,
171 * clean up a spool structure and its substructures.
174 _bt_spooldestroy(BTSpool
*btspool
)
176 tuplesort_end(btspool
->sortstate
);
181 * spool an index entry into the sort file.
184 _bt_spool(IndexTuple itup
, BTSpool
*btspool
)
186 tuplesort_putindextuple(btspool
->sortstate
, itup
);
190 * given a spool loaded by successive calls to _bt_spool,
191 * create an entire btree.
194 _bt_leafbuild(BTSpool
*btspool
, BTSpool
*btspool2
)
198 #ifdef BTREE_BUILD_STATS
199 if (log_btree_build_stats
)
201 ShowUsage("BTREE BUILD (Spool) STATISTICS");
204 #endif /* BTREE_BUILD_STATS */
206 tuplesort_performsort(btspool
->sortstate
);
208 tuplesort_performsort(btspool2
->sortstate
);
210 wstate
.index
= btspool
->index
;
213 * We need to log index creation in WAL iff WAL archiving is enabled AND
214 * it's not a temp index.
216 wstate
.btws_use_wal
= XLogArchivingActive() && !wstate
.index
->rd_istemp
;
218 /* reserve the metapage */
219 wstate
.btws_pages_alloced
= BTREE_METAPAGE
+ 1;
220 wstate
.btws_pages_written
= 0;
221 wstate
.btws_zeropage
= NULL
; /* until needed */
223 _bt_load(&wstate
, btspool
, btspool2
);
233 * allocate workspace for a new, clean btree page, not linked to any siblings.
236 _bt_blnewpage(uint32 level
)
241 page
= (Page
) palloc(BLCKSZ
);
243 /* Zero the page and set up standard page header info */
244 _bt_pageinit(page
, BLCKSZ
);
246 /* Initialize BT opaque state */
247 opaque
= (BTPageOpaque
) PageGetSpecialPointer(page
);
248 opaque
->btpo_prev
= opaque
->btpo_next
= P_NONE
;
249 opaque
->btpo
.level
= level
;
250 opaque
->btpo_flags
= (level
> 0) ? 0 : BTP_LEAF
;
251 opaque
->btpo_cycleid
= 0;
253 /* Make the P_HIKEY line pointer appear allocated */
254 ((PageHeader
) page
)->pd_lower
+= sizeof(ItemIdData
);
260 * emit a completed btree page, and release the working storage.
263 _bt_blwritepage(BTWriteState
*wstate
, Page page
, BlockNumber blkno
)
265 /* Ensure rd_smgr is open (could have been closed by relcache flush!) */
266 RelationOpenSmgr(wstate
->index
);
269 if (wstate
->btws_use_wal
)
271 /* We use the heap NEWPAGE record type for this */
272 log_newpage(&wstate
->index
->rd_node
, MAIN_FORKNUM
, blkno
, page
);
276 /* Leave the page LSN zero if not WAL-logged, but set TLI anyway */
277 PageSetTLI(page
, ThisTimeLineID
);
281 * If we have to write pages nonsequentially, fill in the space with
282 * zeroes until we come back and overwrite. This is not logically
283 * necessary on standard Unix filesystems (unwritten space will read as
284 * zeroes anyway), but it should help to avoid fragmentation. The dummy
285 * pages aren't WAL-logged though.
287 while (blkno
> wstate
->btws_pages_written
)
289 if (!wstate
->btws_zeropage
)
290 wstate
->btws_zeropage
= (Page
) palloc0(BLCKSZ
);
291 smgrextend(wstate
->index
->rd_smgr
, MAIN_FORKNUM
,
292 wstate
->btws_pages_written
++,
293 (char *) wstate
->btws_zeropage
,
298 * Now write the page. We say isTemp = true even if it's not a temp
299 * index, because there's no need for smgr to schedule an fsync for this
300 * write; we'll do it ourselves before ending the build.
302 if (blkno
== wstate
->btws_pages_written
)
304 /* extending the file... */
305 smgrextend(wstate
->index
->rd_smgr
, MAIN_FORKNUM
, blkno
,
306 (char *) page
, true);
307 wstate
->btws_pages_written
++;
311 /* overwriting a block we zero-filled before */
312 smgrwrite(wstate
->index
->rd_smgr
, MAIN_FORKNUM
, blkno
,
313 (char *) page
, true);
320 * allocate and initialize a new BTPageState. the returned structure
321 * is suitable for immediate use by _bt_buildadd.
324 _bt_pagestate(BTWriteState
*wstate
, uint32 level
)
326 BTPageState
*state
= (BTPageState
*) palloc0(sizeof(BTPageState
));
328 /* create initial page for level */
329 state
->btps_page
= _bt_blnewpage(level
);
331 /* and assign it a page position */
332 state
->btps_blkno
= wstate
->btws_pages_alloced
++;
334 state
->btps_minkey
= NULL
;
335 /* initialize lastoff so first item goes into P_FIRSTKEY */
336 state
->btps_lastoff
= P_HIKEY
;
337 state
->btps_level
= level
;
338 /* set "full" threshold based on level. See notes at head of file. */
340 state
->btps_full
= (BLCKSZ
* (100 - BTREE_NONLEAF_FILLFACTOR
) / 100);
342 state
->btps_full
= RelationGetTargetPageFreeSpace(wstate
->index
,
343 BTREE_DEFAULT_FILLFACTOR
);
344 /* no parent level, yet */
345 state
->btps_next
= NULL
;
351 * slide an array of ItemIds back one slot (from P_FIRSTKEY to
352 * P_HIKEY, overwriting P_HIKEY). we need to do this when we discover
353 * that we have built an ItemId array in what has turned out to be a
357 _bt_slideleft(Page page
)
364 if (!PageIsEmpty(page
))
366 maxoff
= PageGetMaxOffsetNumber(page
);
367 previi
= PageGetItemId(page
, P_HIKEY
);
368 for (off
= P_FIRSTKEY
; off
<= maxoff
; off
= OffsetNumberNext(off
))
370 thisii
= PageGetItemId(page
, off
);
374 ((PageHeader
) page
)->pd_lower
-= sizeof(ItemIdData
);
379 * Add an item to a page being built.
381 * The main difference between this routine and a bare PageAddItem call
382 * is that this code knows that the leftmost data item on a non-leaf
383 * btree page doesn't need to have a key. Therefore, it strips such
384 * items down to just the item header.
386 * This is almost like nbtinsert.c's _bt_pgaddtup(), but we can't use
387 * that because it assumes that P_RIGHTMOST() will return the correct
388 * answer for the page. Here, we don't know yet if the page will be
389 * rightmost. Offset P_FIRSTKEY is always the first data key.
392 _bt_sortaddtup(Page page
,
395 OffsetNumber itup_off
)
397 BTPageOpaque opaque
= (BTPageOpaque
) PageGetSpecialPointer(page
);
398 IndexTupleData trunctuple
;
400 if (!P_ISLEAF(opaque
) && itup_off
== P_FIRSTKEY
)
403 trunctuple
.t_info
= sizeof(IndexTupleData
);
405 itemsize
= sizeof(IndexTupleData
);
408 if (PageAddItem(page
, (Item
) itup
, itemsize
, itup_off
,
409 false, false) == InvalidOffsetNumber
)
410 elog(ERROR
, "failed to add item to the index page");
414 * Add an item to a disk page from the sort output.
416 * We must be careful to observe the page layout conventions of nbtsearch.c:
417 * - rightmost pages start data items at P_HIKEY instead of at P_FIRSTKEY.
418 * - on non-leaf pages, the key portion of the first item need not be
419 * stored, we should store only the link.
421 * A leaf page being built looks like:
423 * +----------------+---------------------------------+
424 * | PageHeaderData | linp0 linp1 linp2 ... |
425 * +-----------+----+---------------------------------+
427 * +-----------+--------------------------------------+
430 * +-------------+------------------------------------+
432 * +-------------+------------------+-----------------+
433 * | ... item3 item2 item1 | "special space" |
434 * +--------------------------------+-----------------+
436 * Contrast this with the diagram in bufpage.h; note the mismatch
437 * between linps and items. This is because we reserve linp0 as a
438 * placeholder for the pointer to the "high key" item; when we have
439 * filled up the page, we will set linp0 to point to itemN and clear
440 * linpN. On the other hand, if we find this is the last (rightmost)
441 * page, we leave the items alone and slide the linp array over.
443 * 'last' pointer indicates the last offset added to the page.
447 _bt_buildadd(BTWriteState
*wstate
, BTPageState
*state
, IndexTuple itup
)
451 OffsetNumber last_off
;
456 * This is a handy place to check for cancel interrupts during the btree
457 * load phase of index creation.
459 CHECK_FOR_INTERRUPTS();
461 npage
= state
->btps_page
;
462 nblkno
= state
->btps_blkno
;
463 last_off
= state
->btps_lastoff
;
465 pgspc
= PageGetFreeSpace(npage
);
466 itupsz
= IndexTupleDSize(*itup
);
467 itupsz
= MAXALIGN(itupsz
);
470 * Check whether the item can fit on a btree page at all. (Eventually, we
471 * ought to try to apply TOAST methods if not.) We actually need to be
472 * able to fit three items on every page, so restrict any one item to 1/3
473 * the per-page available space. Note that at this point, itupsz doesn't
474 * include the ItemId.
476 * NOTE: similar code appears in _bt_insertonpg() to defend against
477 * oversize items being inserted into an already-existing index. But
478 * during creation of an index, we don't go through there.
480 if (itupsz
> BTMaxItemSize(npage
))
482 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
483 errmsg("index row size %lu exceeds btree maximum, %lu",
484 (unsigned long) itupsz
,
485 (unsigned long) BTMaxItemSize(npage
)),
486 errhint("Values larger than 1/3 of a buffer page cannot be indexed.\n"
487 "Consider a function index of an MD5 hash of the value, "
488 "or use full text indexing.")));
491 * Check to see if page is "full". It's definitely full if the item won't
492 * fit. Otherwise, compare to the target freespace derived from the
493 * fillfactor. However, we must put at least two items on each page, so
494 * disregard fillfactor if we don't have that many.
496 if (pgspc
< itupsz
|| (pgspc
< state
->btps_full
&& last_off
> P_FIRSTKEY
))
499 * Finish off the page and write it out.
502 BlockNumber oblkno
= nblkno
;
507 /* Create new page of same level */
508 npage
= _bt_blnewpage(state
->btps_level
);
510 /* and assign it a page position */
511 nblkno
= wstate
->btws_pages_alloced
++;
514 * We copy the last item on the page into the new page, and then
515 * rearrange the old page so that the 'last item' becomes its high key
516 * rather than a true data item. There had better be at least two
517 * items on the page already, else the page would be empty of useful
520 Assert(last_off
> P_FIRSTKEY
);
521 ii
= PageGetItemId(opage
, last_off
);
522 oitup
= (IndexTuple
) PageGetItem(opage
, ii
);
523 _bt_sortaddtup(npage
, ItemIdGetLength(ii
), oitup
, P_FIRSTKEY
);
526 * Move 'last' into the high key position on opage
528 hii
= PageGetItemId(opage
, P_HIKEY
);
530 ItemIdSetUnused(ii
); /* redundant */
531 ((PageHeader
) opage
)->pd_lower
-= sizeof(ItemIdData
);
534 * Link the old page into its parent, using its minimum key. If we
535 * don't have a parent, we have to create one; this adds a new btree
538 if (state
->btps_next
== NULL
)
539 state
->btps_next
= _bt_pagestate(wstate
, state
->btps_level
+ 1);
541 Assert(state
->btps_minkey
!= NULL
);
542 ItemPointerSet(&(state
->btps_minkey
->t_tid
), oblkno
, P_HIKEY
);
543 _bt_buildadd(wstate
, state
->btps_next
, state
->btps_minkey
);
544 pfree(state
->btps_minkey
);
547 * Save a copy of the minimum key for the new page. We have to copy
548 * it off the old page, not the new one, in case we are not at leaf
551 state
->btps_minkey
= CopyIndexTuple(oitup
);
554 * Set the sibling links for both pages.
557 BTPageOpaque oopaque
= (BTPageOpaque
) PageGetSpecialPointer(opage
);
558 BTPageOpaque nopaque
= (BTPageOpaque
) PageGetSpecialPointer(npage
);
560 oopaque
->btpo_next
= nblkno
;
561 nopaque
->btpo_prev
= oblkno
;
562 nopaque
->btpo_next
= P_NONE
; /* redundant */
566 * Write out the old page. We never need to touch it again, so we can
567 * free the opage workspace too.
569 _bt_blwritepage(wstate
, opage
, oblkno
);
572 * Reset last_off to point to new page
574 last_off
= P_FIRSTKEY
;
578 * If the new item is the first for its page, stash a copy for later. Note
579 * this will only happen for the first item on a level; on later pages,
580 * the first item for a page is copied from the prior page in the code
583 if (last_off
== P_HIKEY
)
585 Assert(state
->btps_minkey
== NULL
);
586 state
->btps_minkey
= CopyIndexTuple(itup
);
590 * Add the new item into the current page.
592 last_off
= OffsetNumberNext(last_off
);
593 _bt_sortaddtup(npage
, itupsz
, itup
, last_off
);
595 state
->btps_page
= npage
;
596 state
->btps_blkno
= nblkno
;
597 state
->btps_lastoff
= last_off
;
601 * Finish writing out the completed btree.
604 _bt_uppershutdown(BTWriteState
*wstate
, BTPageState
*state
)
607 BlockNumber rootblkno
= P_NONE
;
608 uint32 rootlevel
= 0;
612 * Each iteration of this loop completes one more level of the tree.
614 for (s
= state
; s
!= NULL
; s
= s
->btps_next
)
619 blkno
= s
->btps_blkno
;
620 opaque
= (BTPageOpaque
) PageGetSpecialPointer(s
->btps_page
);
623 * We have to link the last page on this level to somewhere.
625 * If we're at the top, it's the root, so attach it to the metapage.
626 * Otherwise, add an entry for it to its parent using its minimum key.
627 * This may cause the last page of the parent level to split, but
628 * that's not a problem -- we haven't gotten to it yet.
630 if (s
->btps_next
== NULL
)
632 opaque
->btpo_flags
|= BTP_ROOT
;
634 rootlevel
= s
->btps_level
;
638 Assert(s
->btps_minkey
!= NULL
);
639 ItemPointerSet(&(s
->btps_minkey
->t_tid
), blkno
, P_HIKEY
);
640 _bt_buildadd(wstate
, s
->btps_next
, s
->btps_minkey
);
641 pfree(s
->btps_minkey
);
642 s
->btps_minkey
= NULL
;
646 * This is the rightmost page, so the ItemId array needs to be slid
647 * back one slot. Then we can dump out the page.
649 _bt_slideleft(s
->btps_page
);
650 _bt_blwritepage(wstate
, s
->btps_page
, s
->btps_blkno
);
651 s
->btps_page
= NULL
; /* writepage freed the workspace */
655 * As the last step in the process, construct the metapage and make it
656 * point to the new root (unless we had no data at all, in which case it's
657 * set to point to "P_NONE"). This changes the index to the "valid" state
658 * by filling in a valid magic number in the metapage.
660 metapage
= (Page
) palloc(BLCKSZ
);
661 _bt_initmetapage(metapage
, rootblkno
, rootlevel
);
662 _bt_blwritepage(wstate
, metapage
, BTREE_METAPAGE
);
666 * Read tuples in correct sort order from tuplesort, and load them into
670 _bt_load(BTWriteState
*wstate
, BTSpool
*btspool
, BTSpool
*btspool2
)
672 BTPageState
*state
= NULL
;
673 bool merge
= (btspool2
!= NULL
);
679 TupleDesc tupdes
= RelationGetDescr(wstate
->index
);
681 keysz
= RelationGetNumberOfAttributes(wstate
->index
);
682 ScanKey indexScanKey
= NULL
;
687 * Another BTSpool for dead tuples exists. Now we have to merge
688 * btspool and btspool2.
691 /* the preparation of merge */
692 itup
= tuplesort_getindextuple(btspool
->sortstate
,
694 itup2
= tuplesort_getindextuple(btspool2
->sortstate
,
695 true, &should_free2
);
696 indexScanKey
= _bt_mkscankey_nodata(wstate
->index
);
700 load1
= true; /* load BTSpool next ? */
706 else if (itup
!= NULL
)
708 for (i
= 1; i
<= keysz
; i
++)
717 entry
= indexScanKey
+ i
- 1;
718 attrDatum1
= index_getattr(itup
, i
, tupdes
, &isNull1
);
719 attrDatum2
= index_getattr(itup2
, i
, tupdes
, &isNull2
);
723 compare
= 0; /* NULL "=" NULL */
724 else if (entry
->sk_flags
& SK_BT_NULLS_FIRST
)
725 compare
= -1; /* NULL "<" NOT_NULL */
727 compare
= 1; /* NULL ">" NOT_NULL */
731 if (entry
->sk_flags
& SK_BT_NULLS_FIRST
)
732 compare
= 1; /* NOT_NULL ">" NULL */
734 compare
= -1; /* NOT_NULL "<" NULL */
738 compare
= DatumGetInt32(FunctionCall2(&entry
->sk_func
,
742 if (entry
->sk_flags
& SK_BT_DESC
)
750 else if (compare
< 0)
757 /* When we see first tuple, create first index page */
759 state
= _bt_pagestate(wstate
, 0);
763 _bt_buildadd(wstate
, state
, itup
);
766 itup
= tuplesort_getindextuple(btspool
->sortstate
,
771 _bt_buildadd(wstate
, state
, itup2
);
774 itup2
= tuplesort_getindextuple(btspool2
->sortstate
,
775 true, &should_free2
);
778 _bt_freeskey(indexScanKey
);
782 /* merge is unnecessary */
783 while ((itup
= tuplesort_getindextuple(btspool
->sortstate
,
784 true, &should_free
)) != NULL
)
786 /* When we see first tuple, create first index page */
788 state
= _bt_pagestate(wstate
, 0);
790 _bt_buildadd(wstate
, state
, itup
);
796 /* Close down final pages and write the metapage */
797 _bt_uppershutdown(wstate
, state
);
800 * If the index isn't temp, we must fsync it down to disk before it's safe
801 * to commit the transaction. (For a temp index we don't care since the
802 * index will be uninteresting after a crash anyway.)
804 * It's obvious that we must do this when not WAL-logging the build. It's
805 * less obvious that we have to do it even if we did WAL-log the index
806 * pages. The reason is that since we're building outside shared buffers,
807 * a CHECKPOINT occurring during the build has no way to flush the
808 * previously written data to disk (indeed it won't know the index even
809 * exists). A crash later on would replay WAL from the checkpoint,
810 * therefore it wouldn't replay our earlier WAL entries. If we do not
811 * fsync those pages here, they might still not be on disk when the crash
814 if (!wstate
->index
->rd_istemp
)
816 RelationOpenSmgr(wstate
->index
);
817 smgrimmedsync(wstate
->index
->rd_smgr
, MAIN_FORKNUM
);