1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
12 *-------------------------------------------------------------------------
17 #include "access/gin.h"
18 #include "storage/bufmgr.h"
19 #include "utils/rel.h"
22 compareItemPointers(ItemPointer a
, ItemPointer b
)
24 if (GinItemPointerGetBlockNumber(a
) == GinItemPointerGetBlockNumber(b
))
26 if (GinItemPointerGetOffsetNumber(a
) == GinItemPointerGetOffsetNumber(b
))
28 return (GinItemPointerGetOffsetNumber(a
) > GinItemPointerGetOffsetNumber(b
)) ? 1 : -1;
31 return (GinItemPointerGetBlockNumber(a
) > GinItemPointerGetBlockNumber(b
)) ? 1 : -1;
35 * Merge two ordered arrays of itempointers, eliminating any duplicates.
36 * Returns the number of items in the result.
37 * Caller is responsible that there is enough space at *dst.
40 MergeItemPointers(ItemPointerData
*dst
,
41 ItemPointerData
*a
, uint32 na
,
42 ItemPointerData
*b
, uint32 nb
)
44 ItemPointerData
*dptr
= dst
;
45 ItemPointerData
*aptr
= a
,
48 while (aptr
- a
< na
&& bptr
- b
< nb
)
50 int cmp
= compareItemPointers(aptr
, bptr
);
56 /* we want only one copy of the identical items */
74 * Checks, should we move to right link...
75 * Compares inserting itemp pointer with right bound of current page
78 dataIsMoveRight(GinBtree btree
, Page page
)
80 ItemPointer iptr
= GinDataPageGetRightBound(page
);
82 if (GinPageRightMost(page
))
85 return (compareItemPointers(btree
->items
+ btree
->curitem
, iptr
) > 0) ? TRUE
: FALSE
;
89 * Find correct PostingItem in non-leaf page. It supposed that page
90 * correctly chosen and searching value SHOULD be on page
93 dataLocateItem(GinBtree btree
, GinBtreeStack
*stack
)
98 PostingItem
*pitem
= NULL
;
100 Page page
= BufferGetPage(stack
->buffer
);
102 Assert(!GinPageIsLeaf(page
));
103 Assert(GinPageIsData(page
));
107 stack
->off
= FirstOffsetNumber
;
108 stack
->predictNumber
*= GinPageGetOpaque(page
)->maxoff
;
109 return btree
->getLeftMostPage(btree
, page
);
112 low
= FirstOffsetNumber
;
113 maxoff
= high
= GinPageGetOpaque(page
)->maxoff
;
120 OffsetNumber mid
= low
+ ((high
- low
) / 2);
122 pitem
= (PostingItem
*) GinDataPageGetItem(page
, mid
);
127 * Right infinity, page already correctly chosen with a help of
133 pitem
= (PostingItem
*) GinDataPageGetItem(page
, mid
);
134 result
= compareItemPointers(btree
->items
+ btree
->curitem
, &(pitem
->key
));
140 return PostingItemGetBlockNumber(pitem
);
148 Assert(high
>= FirstOffsetNumber
&& high
<= maxoff
);
151 pitem
= (PostingItem
*) GinDataPageGetItem(page
, high
);
152 return PostingItemGetBlockNumber(pitem
);
156 * Searches correct position for value on leaf page.
157 * Page should be correctly chosen.
158 * Returns true if value found on page.
161 dataLocateLeafItem(GinBtree btree
, GinBtreeStack
*stack
)
163 Page page
= BufferGetPage(stack
->buffer
);
168 Assert(GinPageIsLeaf(page
));
169 Assert(GinPageIsData(page
));
173 stack
->off
= FirstOffsetNumber
;
177 low
= FirstOffsetNumber
;
178 high
= GinPageGetOpaque(page
)->maxoff
;
182 stack
->off
= FirstOffsetNumber
;
190 OffsetNumber mid
= low
+ ((high
- low
) / 2);
192 result
= compareItemPointers(btree
->items
+ btree
->curitem
, (ItemPointer
) GinDataPageGetItem(page
, mid
));
210 * Finds links to blkno on non-leaf page, returns
211 * offset of PostingItem
214 dataFindChildPtr(GinBtree btree
, Page page
, BlockNumber blkno
, OffsetNumber storedOff
)
217 maxoff
= GinPageGetOpaque(page
)->maxoff
;
220 Assert(!GinPageIsLeaf(page
));
221 Assert(GinPageIsData(page
));
223 /* if page isn't changed, we returns storedOff */
224 if (storedOff
>= FirstOffsetNumber
&& storedOff
<= maxoff
)
226 pitem
= (PostingItem
*) GinDataPageGetItem(page
, storedOff
);
227 if (PostingItemGetBlockNumber(pitem
) == blkno
)
231 * we hope, that needed pointer goes to right. It's true if there
234 for (i
= storedOff
+ 1; i
<= maxoff
; i
++)
236 pitem
= (PostingItem
*) GinDataPageGetItem(page
, i
);
237 if (PostingItemGetBlockNumber(pitem
) == blkno
)
241 maxoff
= storedOff
- 1;
245 for (i
= FirstOffsetNumber
; i
<= maxoff
; i
++)
247 pitem
= (PostingItem
*) GinDataPageGetItem(page
, i
);
248 if (PostingItemGetBlockNumber(pitem
) == blkno
)
252 return InvalidOffsetNumber
;
256 * returns blkno of leftmost child
259 dataGetLeftMostPage(GinBtree btree
, Page page
)
263 Assert(!GinPageIsLeaf(page
));
264 Assert(GinPageIsData(page
));
265 Assert(GinPageGetOpaque(page
)->maxoff
>= FirstOffsetNumber
);
267 pitem
= (PostingItem
*) GinDataPageGetItem(page
, FirstOffsetNumber
);
268 return PostingItemGetBlockNumber(pitem
);
272 * add ItemPointer or PostingItem to page. data should point to
273 * correct value! depending on leaf or non-leaf page
276 GinDataPageAddItem(Page page
, void *data
, OffsetNumber offset
)
278 OffsetNumber maxoff
= GinPageGetOpaque(page
)->maxoff
;
281 if (offset
== InvalidOffsetNumber
)
283 ptr
= GinDataPageGetItem(page
, maxoff
+ 1);
287 ptr
= GinDataPageGetItem(page
, offset
);
288 if (maxoff
+ 1 - offset
!= 0)
289 memmove(ptr
+ GinSizeOfItem(page
), ptr
, (maxoff
- offset
+ 1) * GinSizeOfItem(page
));
291 memcpy(ptr
, data
, GinSizeOfItem(page
));
293 GinPageGetOpaque(page
)->maxoff
++;
297 * Deletes posting item from non-leaf page
300 PageDeletePostingItem(Page page
, OffsetNumber offset
)
302 OffsetNumber maxoff
= GinPageGetOpaque(page
)->maxoff
;
304 Assert(!GinPageIsLeaf(page
));
305 Assert(offset
>= FirstOffsetNumber
&& offset
<= maxoff
);
307 if (offset
!= maxoff
)
308 memmove(GinDataPageGetItem(page
, offset
), GinDataPageGetItem(page
, offset
+ 1),
309 sizeof(PostingItem
) * (maxoff
- offset
));
311 GinPageGetOpaque(page
)->maxoff
--;
315 * checks space to install new value,
316 * item pointer never deletes!
319 dataIsEnoughSpace(GinBtree btree
, Buffer buf
, OffsetNumber off
)
321 Page page
= BufferGetPage(buf
);
323 Assert(GinPageIsData(page
));
324 Assert(!btree
->isDelete
);
326 if (GinPageIsLeaf(page
))
328 if (GinPageRightMost(page
) && off
> GinPageGetOpaque(page
)->maxoff
)
330 if ((btree
->nitem
- btree
->curitem
) * sizeof(ItemPointerData
) <= GinDataPageGetFreeSpace(page
))
333 else if (sizeof(ItemPointerData
) <= GinDataPageGetFreeSpace(page
))
336 else if (sizeof(PostingItem
) <= GinDataPageGetFreeSpace(page
))
343 * In case of previous split update old child blkno to
345 * item pointer never deletes!
348 dataPrepareData(GinBtree btree
, Page page
, OffsetNumber off
)
350 BlockNumber ret
= InvalidBlockNumber
;
352 Assert(GinPageIsData(page
));
354 if (!GinPageIsLeaf(page
) && btree
->rightblkno
!= InvalidBlockNumber
)
356 PostingItem
*pitem
= (PostingItem
*) GinDataPageGetItem(page
, off
);
358 PostingItemSetBlockNumber(pitem
, btree
->rightblkno
);
359 ret
= btree
->rightblkno
;
362 btree
->rightblkno
= InvalidBlockNumber
;
368 * Places keys to page and fills WAL record. In case leaf page and
369 * build mode puts all ItemPointers to page.
372 dataPlaceToPage(GinBtree btree
, Buffer buf
, OffsetNumber off
, XLogRecData
**prdata
)
374 Page page
= BufferGetPage(buf
);
375 static XLogRecData rdata
[3];
376 int sizeofitem
= GinSizeOfItem(page
);
377 static ginxlogInsert data
;
381 Assert(GinPageIsData(page
));
383 data
.updateBlkno
= dataPrepareData(btree
, page
, off
);
385 data
.node
= btree
->index
->rd_node
;
386 data
.blkno
= BufferGetBlockNumber(buf
);
389 data
.isDelete
= FALSE
;
391 data
.isLeaf
= GinPageIsLeaf(page
) ? TRUE
: FALSE
;
394 * Prevent full page write if child's split occurs. That is needed to
395 * remove incomplete splits while replaying WAL
397 * data.updateBlkno contains new block number (of newly created right
398 * page) for recently splited page.
400 if (data
.updateBlkno
== InvalidBlockNumber
)
402 rdata
[0].buffer
= buf
;
403 rdata
[0].buffer_std
= FALSE
;
404 rdata
[0].data
= NULL
;
406 rdata
[0].next
= &rdata
[1];
410 rdata
[cnt
].buffer
= InvalidBuffer
;
411 rdata
[cnt
].data
= (char *) &data
;
412 rdata
[cnt
].len
= sizeof(ginxlogInsert
);
413 rdata
[cnt
].next
= &rdata
[cnt
+ 1];
416 rdata
[cnt
].buffer
= InvalidBuffer
;
417 rdata
[cnt
].data
= (GinPageIsLeaf(page
)) ? ((char *) (btree
->items
+ btree
->curitem
)) : ((char *) &(btree
->pitem
));
418 rdata
[cnt
].len
= sizeofitem
;
419 rdata
[cnt
].next
= NULL
;
421 if (GinPageIsLeaf(page
))
423 if (GinPageRightMost(page
) && off
> GinPageGetOpaque(page
)->maxoff
)
425 /* usually, create index... */
426 uint32 savedPos
= btree
->curitem
;
428 while (btree
->curitem
< btree
->nitem
)
430 GinDataPageAddItem(page
, btree
->items
+ btree
->curitem
, off
);
434 data
.nitem
= btree
->curitem
- savedPos
;
435 rdata
[cnt
].len
= sizeofitem
* data
.nitem
;
439 GinDataPageAddItem(page
, btree
->items
+ btree
->curitem
, off
);
444 GinDataPageAddItem(page
, &(btree
->pitem
), off
);
448 * split page and fills WAL record. original buffer(lbuf) leaves untouched,
449 * returns shadow page of lbuf filled new data. In leaf page and build mode puts all
450 * ItemPointers to pages. Also, in build mode splits data by way to full fulled
454 dataSplitPage(GinBtree btree
, Buffer lbuf
, Buffer rbuf
, OffsetNumber off
, XLogRecData
**prdata
)
456 static ginxlogSplit data
;
457 static XLogRecData rdata
[4];
458 static char vector
[2 * BLCKSZ
];
460 OffsetNumber separator
;
462 Page lpage
= PageGetTempPageCopy(BufferGetPage(lbuf
));
463 ItemPointerData oldbound
= *GinDataPageGetRightBound(lpage
);
464 int sizeofitem
= GinSizeOfItem(lpage
);
465 OffsetNumber maxoff
= GinPageGetOpaque(lpage
)->maxoff
;
466 Page rpage
= BufferGetPage(rbuf
);
467 Size pageSize
= PageGetPageSize(lpage
);
471 GinInitPage(rpage
, GinPageGetOpaque(lpage
)->flags
, pageSize
);
472 freeSpace
= GinDataPageGetFreeSpace(rpage
);
475 data
.leftChildBlkno
= (GinPageIsLeaf(lpage
)) ?
476 InvalidOffsetNumber
: PostingItemGetBlockNumber(&(btree
->pitem
));
477 data
.updateBlkno
= dataPrepareData(btree
, lpage
, off
);
479 memcpy(vector
, GinDataPageGetItem(lpage
, FirstOffsetNumber
),
480 maxoff
* sizeofitem
);
482 if (GinPageIsLeaf(lpage
) && GinPageRightMost(lpage
) && off
> GinPageGetOpaque(lpage
)->maxoff
)
485 while (btree
->curitem
< btree
->nitem
&& maxoff
* sizeof(ItemPointerData
) < 2 * (freeSpace
- sizeof(ItemPointerData
)))
487 memcpy(vector
+ maxoff
* sizeof(ItemPointerData
), btree
->items
+ btree
->curitem
,
488 sizeof(ItemPointerData
));
496 ptr
= vector
+ (off
- 1) * sizeofitem
;
497 if (maxoff
+ 1 - off
!= 0)
498 memmove(ptr
+ sizeofitem
, ptr
, (maxoff
- off
+ 1) * sizeofitem
);
499 if (GinPageIsLeaf(lpage
))
501 memcpy(ptr
, btree
->items
+ btree
->curitem
, sizeofitem
);
505 memcpy(ptr
, &(btree
->pitem
), sizeofitem
);
511 * we suppose that during index creation table scaned from begin to end,
512 * so ItemPointers are monotonically increased..
514 if (btree
->isBuild
&& GinPageRightMost(lpage
))
515 separator
= freeSpace
/ sizeofitem
;
517 separator
= maxoff
/ 2;
519 GinInitPage(rpage
, GinPageGetOpaque(lpage
)->flags
, pageSize
);
520 GinInitPage(lpage
, GinPageGetOpaque(rpage
)->flags
, pageSize
);
522 memcpy(GinDataPageGetItem(lpage
, FirstOffsetNumber
), vector
, separator
* sizeofitem
);
523 GinPageGetOpaque(lpage
)->maxoff
= separator
;
524 memcpy(GinDataPageGetItem(rpage
, FirstOffsetNumber
),
525 vector
+ separator
* sizeofitem
, (maxoff
- separator
) * sizeofitem
);
526 GinPageGetOpaque(rpage
)->maxoff
= maxoff
- separator
;
528 PostingItemSetBlockNumber(&(btree
->pitem
), BufferGetBlockNumber(lbuf
));
529 if (GinPageIsLeaf(lpage
))
530 btree
->pitem
.key
= *(ItemPointerData
*) GinDataPageGetItem(lpage
,
531 GinPageGetOpaque(lpage
)->maxoff
);
533 btree
->pitem
.key
= ((PostingItem
*) GinDataPageGetItem(lpage
,
534 GinPageGetOpaque(lpage
)->maxoff
))->key
;
535 btree
->rightblkno
= BufferGetBlockNumber(rbuf
);
537 /* set up right bound for left page */
538 bound
= GinDataPageGetRightBound(lpage
);
539 *bound
= btree
->pitem
.key
;
541 /* set up right bound for right page */
542 bound
= GinDataPageGetRightBound(rpage
);
545 data
.node
= btree
->index
->rd_node
;
546 data
.rootBlkno
= InvalidBlockNumber
;
547 data
.lblkno
= BufferGetBlockNumber(lbuf
);
548 data
.rblkno
= BufferGetBlockNumber(rbuf
);
549 data
.separator
= separator
;
552 data
.isLeaf
= GinPageIsLeaf(lpage
) ? TRUE
: FALSE
;
553 data
.isRootSplit
= FALSE
;
554 data
.rightbound
= oldbound
;
556 rdata
[0].buffer
= InvalidBuffer
;
557 rdata
[0].data
= (char *) &data
;
558 rdata
[0].len
= sizeof(ginxlogSplit
);
559 rdata
[0].next
= &rdata
[1];
561 rdata
[1].buffer
= InvalidBuffer
;
562 rdata
[1].data
= vector
;
563 rdata
[1].len
= MAXALIGN(maxoff
* sizeofitem
);
564 rdata
[1].next
= NULL
;
570 * Fills new root by right bound values from child.
571 * Also called from ginxlog, should not use btree
574 dataFillRoot(GinBtree btree
, Buffer root
, Buffer lbuf
, Buffer rbuf
)
576 Page page
= BufferGetPage(root
),
577 lpage
= BufferGetPage(lbuf
),
578 rpage
= BufferGetPage(rbuf
);
582 li
.key
= *GinDataPageGetRightBound(lpage
);
583 PostingItemSetBlockNumber(&li
, BufferGetBlockNumber(lbuf
));
584 GinDataPageAddItem(page
, &li
, InvalidOffsetNumber
);
586 ri
.key
= *GinDataPageGetRightBound(rpage
);
587 PostingItemSetBlockNumber(&ri
, BufferGetBlockNumber(rbuf
));
588 GinDataPageAddItem(page
, &ri
, InvalidOffsetNumber
);
592 prepareDataScan(GinBtree btree
, Relation index
)
594 memset(btree
, 0, sizeof(GinBtreeData
));
595 btree
->index
= index
;
596 btree
->isMoveRight
= dataIsMoveRight
;
597 btree
->findChildPage
= dataLocateItem
;
598 btree
->findItem
= dataLocateLeafItem
;
599 btree
->findChildPtr
= dataFindChildPtr
;
600 btree
->getLeftMostPage
= dataGetLeftMostPage
;
601 btree
->isEnoughSpace
= dataIsEnoughSpace
;
602 btree
->placeToPage
= dataPlaceToPage
;
603 btree
->splitPage
= dataSplitPage
;
604 btree
->fillRoot
= dataFillRoot
;
606 btree
->searchMode
= FALSE
;
607 btree
->isDelete
= FALSE
;
608 btree
->fullScan
= FALSE
;
609 btree
->isBuild
= FALSE
;
613 prepareScanPostingTree(Relation index
, BlockNumber rootBlkno
, bool searchMode
)
615 GinPostingTreeScan
*gdi
= (GinPostingTreeScan
*) palloc0(sizeof(GinPostingTreeScan
));
617 prepareDataScan(&gdi
->btree
, index
);
619 gdi
->btree
.searchMode
= searchMode
;
620 gdi
->btree
.fullScan
= searchMode
;
622 gdi
->stack
= ginPrepareFindLeafPage(&gdi
->btree
, rootBlkno
);
628 * Inserts array of item pointers, may execute several tree scan (very rare)
631 insertItemPointer(GinPostingTreeScan
*gdi
, ItemPointerData
*items
, uint32 nitem
)
633 BlockNumber rootBlkno
= gdi
->stack
->blkno
;
635 gdi
->btree
.items
= items
;
636 gdi
->btree
.nitem
= nitem
;
637 gdi
->btree
.curitem
= 0;
639 while (gdi
->btree
.curitem
< gdi
->btree
.nitem
)
642 gdi
->stack
= ginPrepareFindLeafPage(&gdi
->btree
, rootBlkno
);
644 gdi
->stack
= ginFindLeafPage(&gdi
->btree
, gdi
->stack
);
646 if (gdi
->btree
.findItem(&(gdi
->btree
), gdi
->stack
))
649 * gdi->btree.items[gdi->btree.curitem] already exists in index
651 gdi
->btree
.curitem
++;
652 LockBuffer(gdi
->stack
->buffer
, GIN_UNLOCK
);
653 freeGinBtreeStack(gdi
->stack
);
656 ginInsertValue(&(gdi
->btree
), gdi
->stack
);
663 scanBeginPostingTree(GinPostingTreeScan
*gdi
)
665 gdi
->stack
= ginFindLeafPage(&gdi
->btree
, gdi
->stack
);
666 return gdi
->stack
->buffer
;