1 /*-------------------------------------------------------------------------
7 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/spgist/spgvacuum.c
13 *-------------------------------------------------------------------------
18 #include "access/genam.h"
19 #include "access/spgist_private.h"
20 #include "access/spgxlog.h"
21 #include "access/transam.h"
22 #include "access/xloginsert.h"
23 #include "catalog/storage_xlog.h"
24 #include "commands/vacuum.h"
25 #include "miscadmin.h"
26 #include "storage/bufmgr.h"
27 #include "storage/indexfsm.h"
28 #include "storage/lmgr.h"
29 #include "utils/snapmgr.h"
32 /* Entry in pending-list of TIDs we need to revisit */
33 typedef struct spgVacPendingItem
35 ItemPointerData tid
; /* redirection target to visit */
36 bool done
; /* have we dealt with this? */
37 struct spgVacPendingItem
*next
; /* list link */
40 /* Local state for vacuum operations */
41 typedef struct spgBulkDeleteState
43 /* Parameters passed in to spgvacuumscan */
44 IndexVacuumInfo
*info
;
45 IndexBulkDeleteResult
*stats
;
46 IndexBulkDeleteCallback callback
;
49 /* Additional working state */
50 SpGistState spgstate
; /* for SPGiST operations that need one */
51 spgVacPendingItem
*pendingList
; /* TIDs we need to (re)visit */
52 TransactionId myXmin
; /* for detecting newly-added redirects */
53 BlockNumber lastFilledBlock
; /* last non-deletable block */
58 * Add TID to pendingList, but only if not already present.
60 * Note that new items are always appended at the end of the list; this
61 * ensures that scans of the list don't miss items added during the scan.
64 spgAddPendingTID(spgBulkDeleteState
*bds
, ItemPointer tid
)
66 spgVacPendingItem
*pitem
;
67 spgVacPendingItem
**listLink
;
69 /* search the list for pre-existing entry */
70 listLink
= &bds
->pendingList
;
71 while (*listLink
!= NULL
)
74 if (ItemPointerEquals(tid
, &pitem
->tid
))
75 return; /* already in list, do nothing */
76 listLink
= &pitem
->next
;
78 /* not there, so append new entry */
79 pitem
= (spgVacPendingItem
*) palloc(sizeof(spgVacPendingItem
));
90 spgClearPendingList(spgBulkDeleteState
*bds
)
92 spgVacPendingItem
*pitem
;
93 spgVacPendingItem
*nitem
;
95 for (pitem
= bds
->pendingList
; pitem
!= NULL
; pitem
= nitem
)
98 /* All items in list should have been dealt with */
102 bds
->pendingList
= NULL
;
106 * Vacuum a regular (non-root) leaf page
108 * We must delete tuples that are targeted for deletion by the VACUUM,
109 * but not move any tuples that are referenced by outside links; we assume
110 * those are the ones that are heads of chains.
112 * If we find a REDIRECT that was made by a concurrently-running transaction,
113 * we must add its target TID to pendingList. (We don't try to visit the
114 * target immediately, first because we don't want VACUUM locking more than
115 * one buffer at a time, and second because the duplicate-filtering logic
116 * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 * loop in the face of continuous concurrent insertions.)
119 * If forPending is true, we are examining the page as a consequence of
120 * chasing a redirect link, not as part of the normal sequential scan.
121 * We still vacuum the page normally, but we don't increment the stats
122 * about live tuples; else we'd double-count those tuples, since the page
123 * has been or will be visited in the sequential scan as well.
126 vacuumLeafPage(spgBulkDeleteState
*bds
, Relation index
, Buffer buffer
,
129 Page page
= BufferGetPage(buffer
);
130 spgxlogVacuumLeaf xlrec
;
131 OffsetNumber toDead
[MaxIndexTuplesPerPage
];
132 OffsetNumber toPlaceholder
[MaxIndexTuplesPerPage
];
133 OffsetNumber moveSrc
[MaxIndexTuplesPerPage
];
134 OffsetNumber moveDest
[MaxIndexTuplesPerPage
];
135 OffsetNumber chainSrc
[MaxIndexTuplesPerPage
];
136 OffsetNumber chainDest
[MaxIndexTuplesPerPage
];
137 OffsetNumber predecessor
[MaxIndexTuplesPerPage
+ 1];
138 bool deletable
[MaxIndexTuplesPerPage
+ 1];
141 max
= PageGetMaxOffsetNumber(page
);
143 memset(predecessor
, 0, sizeof(predecessor
));
144 memset(deletable
, 0, sizeof(deletable
));
147 /* Scan page, identify tuples to delete, accumulate stats */
148 for (i
= FirstOffsetNumber
; i
<= max
; i
++)
152 lt
= (SpGistLeafTuple
) PageGetItem(page
,
153 PageGetItemId(page
, i
));
154 if (lt
->tupstate
== SPGIST_LIVE
)
156 Assert(ItemPointerIsValid(<
->heapPtr
));
158 if (bds
->callback(<
->heapPtr
, bds
->callback_state
))
160 bds
->stats
->tuples_removed
+= 1;
167 bds
->stats
->num_index_tuples
+= 1;
170 /* Form predecessor map, too */
171 if (SGLT_GET_NEXTOFFSET(lt
) != InvalidOffsetNumber
)
173 /* paranoia about corrupted chain links */
174 if (SGLT_GET_NEXTOFFSET(lt
) < FirstOffsetNumber
||
175 SGLT_GET_NEXTOFFSET(lt
) > max
||
176 predecessor
[SGLT_GET_NEXTOFFSET(lt
)] != InvalidOffsetNumber
)
177 elog(ERROR
, "inconsistent tuple chain links in page %u of index \"%s\"",
178 BufferGetBlockNumber(buffer
),
179 RelationGetRelationName(index
));
180 predecessor
[SGLT_GET_NEXTOFFSET(lt
)] = i
;
183 else if (lt
->tupstate
== SPGIST_REDIRECT
)
185 SpGistDeadTuple dt
= (SpGistDeadTuple
) lt
;
187 Assert(SGLT_GET_NEXTOFFSET(dt
) == InvalidOffsetNumber
);
188 Assert(ItemPointerIsValid(&dt
->pointer
));
191 * Add target TID to pending list if the redirection could have
192 * happened since VACUUM started.
194 * Note: we could make a tighter test by seeing if the xid is
195 * "running" according to the active snapshot; but snapmgr.c
196 * doesn't currently export a suitable API, and it's not entirely
197 * clear that a tighter test is worth the cycles anyway.
199 if (TransactionIdFollowsOrEquals(dt
->xid
, bds
->myXmin
))
200 spgAddPendingTID(bds
, &dt
->pointer
);
204 Assert(SGLT_GET_NEXTOFFSET(lt
) == InvalidOffsetNumber
);
209 return; /* nothing more to do */
212 * Figure out exactly what we have to do. We do this separately from
213 * actually modifying the page, mainly so that we have a representation
214 * that can be dumped into WAL and then the replay code can do exactly
215 * the same thing. The output of this step consists of six arrays
216 * describing four kinds of operations, to be performed in this order:
218 * toDead[]: tuple numbers to be replaced with DEAD tuples
219 * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
220 * moveSrc[]: tuple numbers that need to be relocated to another offset
221 * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
222 * moveDest[]: new locations for moveSrc tuples
223 * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
224 * chainDest[]: new values of nextOffset for chainSrc members
226 * It's easiest to figure out what we have to do by processing tuple
227 * chains, so we iterate over all the tuples (not just the deletable
228 * ones!) to identify chain heads, then chase down each chain and make
229 * work item entries for deletable tuples within the chain.
232 xlrec
.nDead
= xlrec
.nPlaceholder
= xlrec
.nMove
= xlrec
.nChain
= 0;
234 for (i
= FirstOffsetNumber
; i
<= max
; i
++)
236 SpGistLeafTuple head
;
237 bool interveningDeletable
;
238 OffsetNumber prevLive
;
241 head
= (SpGistLeafTuple
) PageGetItem(page
,
242 PageGetItemId(page
, i
));
243 if (head
->tupstate
!= SPGIST_LIVE
)
244 continue; /* can't be a chain member */
245 if (predecessor
[i
] != 0)
246 continue; /* not a chain head */
249 interveningDeletable
= false;
250 prevLive
= deletable
[i
] ? InvalidOffsetNumber
: i
;
252 /* scan down the chain ... */
253 j
= SGLT_GET_NEXTOFFSET(head
);
254 while (j
!= InvalidOffsetNumber
)
258 lt
= (SpGistLeafTuple
) PageGetItem(page
,
259 PageGetItemId(page
, j
));
260 if (lt
->tupstate
!= SPGIST_LIVE
)
262 /* all tuples in chain should be live */
263 elog(ERROR
, "unexpected SPGiST tuple state: %d",
269 /* This tuple should be replaced by a placeholder */
270 toPlaceholder
[xlrec
.nPlaceholder
] = j
;
271 xlrec
.nPlaceholder
++;
272 /* previous live tuple's chain link will need an update */
273 interveningDeletable
= true;
275 else if (prevLive
== InvalidOffsetNumber
)
278 * This is the first live tuple in the chain. It has to move
279 * to the head position.
281 moveSrc
[xlrec
.nMove
] = j
;
282 moveDest
[xlrec
.nMove
] = i
;
284 /* Chain updates will be applied after the move */
286 interveningDeletable
= false;
291 * Second or later live tuple. Arrange to re-chain it to the
292 * previous live one, if there was a gap.
294 if (interveningDeletable
)
296 chainSrc
[xlrec
.nChain
] = prevLive
;
297 chainDest
[xlrec
.nChain
] = j
;
301 interveningDeletable
= false;
304 j
= SGLT_GET_NEXTOFFSET(lt
);
307 if (prevLive
== InvalidOffsetNumber
)
309 /* The chain is entirely removable, so we need a DEAD tuple */
310 toDead
[xlrec
.nDead
] = i
;
313 else if (interveningDeletable
)
315 /* One or more deletions at end of chain, so close it off */
316 chainSrc
[xlrec
.nChain
] = prevLive
;
317 chainDest
[xlrec
.nChain
] = InvalidOffsetNumber
;
322 /* sanity check ... */
323 if (nDeletable
!= xlrec
.nDead
+ xlrec
.nPlaceholder
+ xlrec
.nMove
)
324 elog(ERROR
, "inconsistent counts of deletable tuples");
327 START_CRIT_SECTION();
329 spgPageIndexMultiDelete(&bds
->spgstate
, page
,
331 SPGIST_DEAD
, SPGIST_DEAD
,
332 InvalidBlockNumber
, InvalidOffsetNumber
);
334 spgPageIndexMultiDelete(&bds
->spgstate
, page
,
335 toPlaceholder
, xlrec
.nPlaceholder
,
336 SPGIST_PLACEHOLDER
, SPGIST_PLACEHOLDER
,
337 InvalidBlockNumber
, InvalidOffsetNumber
);
340 * We implement the move step by swapping the line pointers of the source
341 * and target tuples, then replacing the newly-source tuples with
342 * placeholders. This is perhaps unduly friendly with the page data
343 * representation, but it's fast and doesn't risk page overflow when a
344 * tuple to be relocated is large.
346 for (i
= 0; i
< xlrec
.nMove
; i
++)
348 ItemId idSrc
= PageGetItemId(page
, moveSrc
[i
]);
349 ItemId idDest
= PageGetItemId(page
, moveDest
[i
]);
357 spgPageIndexMultiDelete(&bds
->spgstate
, page
,
358 moveSrc
, xlrec
.nMove
,
359 SPGIST_PLACEHOLDER
, SPGIST_PLACEHOLDER
,
360 InvalidBlockNumber
, InvalidOffsetNumber
);
362 for (i
= 0; i
< xlrec
.nChain
; i
++)
366 lt
= (SpGistLeafTuple
) PageGetItem(page
,
367 PageGetItemId(page
, chainSrc
[i
]));
368 Assert(lt
->tupstate
== SPGIST_LIVE
);
369 SGLT_SET_NEXTOFFSET(lt
, chainDest
[i
]);
372 MarkBufferDirty(buffer
);
374 if (RelationNeedsWAL(index
))
380 STORE_STATE(&bds
->spgstate
, xlrec
.stateSrc
);
382 XLogRegisterData((char *) &xlrec
, SizeOfSpgxlogVacuumLeaf
);
383 /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
384 XLogRegisterData((char *) toDead
, sizeof(OffsetNumber
) * xlrec
.nDead
);
385 XLogRegisterData((char *) toPlaceholder
, sizeof(OffsetNumber
) * xlrec
.nPlaceholder
);
386 XLogRegisterData((char *) moveSrc
, sizeof(OffsetNumber
) * xlrec
.nMove
);
387 XLogRegisterData((char *) moveDest
, sizeof(OffsetNumber
) * xlrec
.nMove
);
388 XLogRegisterData((char *) chainSrc
, sizeof(OffsetNumber
) * xlrec
.nChain
);
389 XLogRegisterData((char *) chainDest
, sizeof(OffsetNumber
) * xlrec
.nChain
);
391 XLogRegisterBuffer(0, buffer
, REGBUF_STANDARD
);
393 recptr
= XLogInsert(RM_SPGIST_ID
, XLOG_SPGIST_VACUUM_LEAF
);
395 PageSetLSN(page
, recptr
);
402 * Vacuum a root page when it is also a leaf
404 * On the root, we just delete any dead leaf tuples; no fancy business
407 vacuumLeafRoot(spgBulkDeleteState
*bds
, Relation index
, Buffer buffer
)
409 Page page
= BufferGetPage(buffer
);
410 spgxlogVacuumRoot xlrec
;
411 OffsetNumber toDelete
[MaxIndexTuplesPerPage
];
413 max
= PageGetMaxOffsetNumber(page
);
417 /* Scan page, identify tuples to delete, accumulate stats */
418 for (i
= FirstOffsetNumber
; i
<= max
; i
++)
422 lt
= (SpGistLeafTuple
) PageGetItem(page
,
423 PageGetItemId(page
, i
));
424 if (lt
->tupstate
== SPGIST_LIVE
)
426 Assert(ItemPointerIsValid(<
->heapPtr
));
428 if (bds
->callback(<
->heapPtr
, bds
->callback_state
))
430 bds
->stats
->tuples_removed
+= 1;
431 toDelete
[xlrec
.nDelete
] = i
;
436 bds
->stats
->num_index_tuples
+= 1;
441 /* all tuples on root should be live */
442 elog(ERROR
, "unexpected SPGiST tuple state: %d",
447 if (xlrec
.nDelete
== 0)
448 return; /* nothing more to do */
451 START_CRIT_SECTION();
453 /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
454 PageIndexMultiDelete(page
, toDelete
, xlrec
.nDelete
);
456 MarkBufferDirty(buffer
);
458 if (RelationNeedsWAL(index
))
464 /* Prepare WAL record */
465 STORE_STATE(&bds
->spgstate
, xlrec
.stateSrc
);
467 XLogRegisterData((char *) &xlrec
, SizeOfSpgxlogVacuumRoot
);
468 /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
469 XLogRegisterData((char *) toDelete
,
470 sizeof(OffsetNumber
) * xlrec
.nDelete
);
472 XLogRegisterBuffer(0, buffer
, REGBUF_STANDARD
);
474 recptr
= XLogInsert(RM_SPGIST_ID
, XLOG_SPGIST_VACUUM_ROOT
);
476 PageSetLSN(page
, recptr
);
483 * Clean up redirect and placeholder tuples on the given page
485 * Redirect tuples can be marked placeholder once they're old enough.
486 * Placeholder tuples can be removed if it won't change the offsets of
487 * non-placeholder ones.
489 * Unlike the routines above, this works on both leaf and inner pages.
492 vacuumRedirectAndPlaceholder(Relation index
, Buffer buffer
)
494 Page page
= BufferGetPage(buffer
);
495 SpGistPageOpaque opaque
= SpGistPageGetOpaque(page
);
497 max
= PageGetMaxOffsetNumber(page
),
498 firstPlaceholder
= InvalidOffsetNumber
;
499 bool hasNonPlaceholder
= false;
500 bool hasUpdate
= false;
501 OffsetNumber itemToPlaceholder
[MaxIndexTuplesPerPage
];
502 OffsetNumber itemnos
[MaxIndexTuplesPerPage
];
503 spgxlogVacuumRedirect xlrec
;
504 GlobalVisState
*vistest
;
506 xlrec
.nToPlaceholder
= 0;
507 xlrec
.snapshotConflictHorizon
= InvalidTransactionId
;
509 /* XXX: providing heap relation would allow more pruning */
510 vistest
= GlobalVisTestFor(NULL
);
512 START_CRIT_SECTION();
515 * Scan backwards to convert old redirection tuples to placeholder tuples,
516 * and identify location of last non-placeholder tuple while at it.
519 i
>= FirstOffsetNumber
&&
520 (opaque
->nRedirection
> 0 || !hasNonPlaceholder
);
525 dt
= (SpGistDeadTuple
) PageGetItem(page
, PageGetItemId(page
, i
));
527 if (dt
->tupstate
== SPGIST_REDIRECT
&&
528 GlobalVisTestIsRemovableXid(vistest
, dt
->xid
))
530 dt
->tupstate
= SPGIST_PLACEHOLDER
;
531 Assert(opaque
->nRedirection
> 0);
532 opaque
->nRedirection
--;
533 opaque
->nPlaceholder
++;
535 /* remember newest XID among the removed redirects */
536 if (!TransactionIdIsValid(xlrec
.snapshotConflictHorizon
) ||
537 TransactionIdPrecedes(xlrec
.snapshotConflictHorizon
, dt
->xid
))
538 xlrec
.snapshotConflictHorizon
= dt
->xid
;
540 ItemPointerSetInvalid(&dt
->pointer
);
542 itemToPlaceholder
[xlrec
.nToPlaceholder
] = i
;
543 xlrec
.nToPlaceholder
++;
548 if (dt
->tupstate
== SPGIST_PLACEHOLDER
)
550 if (!hasNonPlaceholder
)
551 firstPlaceholder
= i
;
555 hasNonPlaceholder
= true;
560 * Any placeholder tuples at the end of page can safely be removed. We
561 * can't remove ones before the last non-placeholder, though, because we
562 * can't alter the offset numbers of non-placeholder tuples.
564 if (firstPlaceholder
!= InvalidOffsetNumber
)
567 * We do not store this array to rdata because it's easy to recreate.
569 for (i
= firstPlaceholder
; i
<= max
; i
++)
570 itemnos
[i
- firstPlaceholder
] = i
;
572 i
= max
- firstPlaceholder
+ 1;
573 Assert(opaque
->nPlaceholder
>= i
);
574 opaque
->nPlaceholder
-= i
;
576 /* The array is surely sorted, so can use PageIndexMultiDelete */
577 PageIndexMultiDelete(page
, itemnos
, i
);
582 xlrec
.firstPlaceholder
= firstPlaceholder
;
585 MarkBufferDirty(buffer
);
587 if (hasUpdate
&& RelationNeedsWAL(index
))
593 XLogRegisterData((char *) &xlrec
, SizeOfSpgxlogVacuumRedirect
);
594 XLogRegisterData((char *) itemToPlaceholder
,
595 sizeof(OffsetNumber
) * xlrec
.nToPlaceholder
);
597 XLogRegisterBuffer(0, buffer
, REGBUF_STANDARD
);
599 recptr
= XLogInsert(RM_SPGIST_ID
, XLOG_SPGIST_VACUUM_REDIRECT
);
601 PageSetLSN(page
, recptr
);
608 * Process one page during a bulkdelete scan
611 spgvacuumpage(spgBulkDeleteState
*bds
, BlockNumber blkno
)
613 Relation index
= bds
->info
->index
;
617 /* call vacuum_delay_point while not holding any buffer lock */
618 vacuum_delay_point();
620 buffer
= ReadBufferExtended(index
, MAIN_FORKNUM
, blkno
,
621 RBM_NORMAL
, bds
->info
->strategy
);
622 LockBuffer(buffer
, BUFFER_LOCK_EXCLUSIVE
);
623 page
= (Page
) BufferGetPage(buffer
);
628 * We found an all-zero page, which could happen if the database
629 * crashed just after extending the file. Recycle it.
632 else if (PageIsEmpty(page
))
636 else if (SpGistPageIsLeaf(page
))
638 if (SpGistBlockIsRoot(blkno
))
640 vacuumLeafRoot(bds
, index
, buffer
);
641 /* no need for vacuumRedirectAndPlaceholder */
645 vacuumLeafPage(bds
, index
, buffer
, false);
646 vacuumRedirectAndPlaceholder(index
, buffer
);
652 vacuumRedirectAndPlaceholder(index
, buffer
);
656 * The root pages must never be deleted, nor marked as available in FSM,
657 * because we don't want them ever returned by a search for a place to put
658 * a new tuple. Otherwise, check for empty page, and make sure the FSM
661 if (!SpGistBlockIsRoot(blkno
))
663 if (PageIsNew(page
) || PageIsEmpty(page
))
665 RecordFreeIndexPage(index
, blkno
);
666 bds
->stats
->pages_deleted
++;
670 SpGistSetLastUsedPage(index
, buffer
);
671 bds
->lastFilledBlock
= blkno
;
675 UnlockReleaseBuffer(buffer
);
679 * Process the pending-TID list between pages of the main scan
682 spgprocesspending(spgBulkDeleteState
*bds
)
684 Relation index
= bds
->info
->index
;
685 spgVacPendingItem
*pitem
;
686 spgVacPendingItem
*nitem
;
691 for (pitem
= bds
->pendingList
; pitem
!= NULL
; pitem
= pitem
->next
)
694 continue; /* ignore already-done items */
696 /* call vacuum_delay_point while not holding any buffer lock */
697 vacuum_delay_point();
699 /* examine the referenced page */
700 blkno
= ItemPointerGetBlockNumber(&pitem
->tid
);
701 buffer
= ReadBufferExtended(index
, MAIN_FORKNUM
, blkno
,
702 RBM_NORMAL
, bds
->info
->strategy
);
703 LockBuffer(buffer
, BUFFER_LOCK_EXCLUSIVE
);
704 page
= (Page
) BufferGetPage(buffer
);
706 if (PageIsNew(page
) || SpGistPageIsDeleted(page
))
708 /* Probably shouldn't happen, but ignore it */
710 else if (SpGistPageIsLeaf(page
))
712 if (SpGistBlockIsRoot(blkno
))
714 /* this should definitely not happen */
715 elog(ERROR
, "redirection leads to root page of index \"%s\"",
716 RelationGetRelationName(index
));
719 /* deal with any deletable tuples */
720 vacuumLeafPage(bds
, index
, buffer
, true);
721 /* might as well do this while we are here */
722 vacuumRedirectAndPlaceholder(index
, buffer
);
724 SpGistSetLastUsedPage(index
, buffer
);
727 * We can mark as done not only this item, but any later ones
728 * pointing at the same page, since we vacuumed the whole page.
731 for (nitem
= pitem
->next
; nitem
!= NULL
; nitem
= nitem
->next
)
733 if (ItemPointerGetBlockNumber(&nitem
->tid
) == blkno
)
740 * On an inner page, visit the referenced inner tuple and add all
741 * its downlinks to the pending list. We might have pending items
742 * for more than one inner tuple on the same page (in fact this is
743 * pretty likely given the way space allocation works), so get
744 * them all while we are here.
746 for (nitem
= pitem
; nitem
!= NULL
; nitem
= nitem
->next
)
750 if (ItemPointerGetBlockNumber(&nitem
->tid
) == blkno
)
753 SpGistInnerTuple innerTuple
;
755 offset
= ItemPointerGetOffsetNumber(&nitem
->tid
);
756 innerTuple
= (SpGistInnerTuple
) PageGetItem(page
,
757 PageGetItemId(page
, offset
));
758 if (innerTuple
->tupstate
== SPGIST_LIVE
)
760 SpGistNodeTuple node
;
763 SGITITERATE(innerTuple
, i
, node
)
765 if (ItemPointerIsValid(&node
->t_tid
))
766 spgAddPendingTID(bds
, &node
->t_tid
);
769 else if (innerTuple
->tupstate
== SPGIST_REDIRECT
)
771 /* transfer attention to redirect point */
772 spgAddPendingTID(bds
,
773 &((SpGistDeadTuple
) innerTuple
)->pointer
);
776 elog(ERROR
, "unexpected SPGiST tuple state: %d",
777 innerTuple
->tupstate
);
784 UnlockReleaseBuffer(buffer
);
787 spgClearPendingList(bds
);
791 * Perform a bulkdelete scan
794 spgvacuumscan(spgBulkDeleteState
*bds
)
796 Relation index
= bds
->info
->index
;
798 BlockNumber num_pages
,
801 /* Finish setting up spgBulkDeleteState */
802 initSpGistState(&bds
->spgstate
, index
);
803 bds
->pendingList
= NULL
;
804 bds
->myXmin
= GetActiveSnapshot()->xmin
;
805 bds
->lastFilledBlock
= SPGIST_LAST_FIXED_BLKNO
;
808 * Reset counts that will be incremented during the scan; needed in case
809 * of multiple scans during a single VACUUM command
811 bds
->stats
->estimated_count
= false;
812 bds
->stats
->num_index_tuples
= 0;
813 bds
->stats
->pages_deleted
= 0;
815 /* We can skip locking for new or temp relations */
816 needLock
= !RELATION_IS_LOCAL(index
);
819 * The outer loop iterates over all index pages except the metapage, in
820 * physical order (we hope the kernel will cooperate in providing
821 * read-ahead for speed). It is critical that we visit all leaf pages,
822 * including ones added after we start the scan, else we might fail to
823 * delete some deletable tuples. See more extensive comments about this
826 blkno
= SPGIST_METAPAGE_BLKNO
+ 1;
829 /* Get the current relation length */
831 LockRelationForExtension(index
, ExclusiveLock
);
832 num_pages
= RelationGetNumberOfBlocks(index
);
834 UnlockRelationForExtension(index
, ExclusiveLock
);
836 /* Quit if we've scanned the whole relation */
837 if (blkno
>= num_pages
)
839 /* Iterate over pages, then loop back to recheck length */
840 for (; blkno
< num_pages
; blkno
++)
842 spgvacuumpage(bds
, blkno
);
843 /* empty the pending-list after each page */
844 if (bds
->pendingList
!= NULL
)
845 spgprocesspending(bds
);
849 /* Propagate local lastUsedPages cache to metablock */
850 SpGistUpdateMetaPage(index
);
853 * If we found any empty pages (and recorded them in the FSM), then
854 * forcibly update the upper-level FSM pages to ensure that searchers can
855 * find them. It's possible that the pages were also found during
856 * previous scans and so this is a waste of time, but it's cheap enough
857 * relative to scanning the index that it shouldn't matter much, and
858 * making sure that free pages are available sooner not later seems
861 * Note that if no empty pages exist, we don't bother vacuuming the FSM at
864 if (bds
->stats
->pages_deleted
> 0)
865 IndexFreeSpaceMapVacuum(index
);
868 * Truncate index if possible
870 * XXX disabled because it's unsafe due to possible concurrent inserts.
871 * We'd have to rescan the pages to make sure they're still empty, and it
872 * doesn't seem worth it. Note that btree doesn't do this either.
874 * Another reason not to truncate is that it could invalidate the cached
875 * pages-with-freespace pointers in the metapage and other backends'
876 * relation caches, that is leave them pointing to nonexistent pages.
877 * Adding RelationGetNumberOfBlocks calls to protect the places that use
878 * those pointers would be unduly expensive.
881 if (num_pages
> bds
->lastFilledBlock
+ 1)
883 BlockNumber lastBlock
= num_pages
- 1;
885 num_pages
= bds
->lastFilledBlock
+ 1;
886 RelationTruncate(index
, num_pages
);
887 bds
->stats
->pages_removed
+= lastBlock
- bds
->lastFilledBlock
;
888 bds
->stats
->pages_deleted
-= lastBlock
- bds
->lastFilledBlock
;
892 /* Report final stats */
893 bds
->stats
->num_pages
= num_pages
;
894 bds
->stats
->pages_newly_deleted
= bds
->stats
->pages_deleted
;
895 bds
->stats
->pages_free
= bds
->stats
->pages_deleted
;
899 * Bulk deletion of all index entries pointing to a set of heap tuples.
900 * The set of target tuples is specified via a callback routine that tells
901 * whether any given heap tuple (identified by ItemPointer) is being deleted.
903 * Result: a palloc'd struct containing statistical info for VACUUM displays.
905 IndexBulkDeleteResult
*
906 spgbulkdelete(IndexVacuumInfo
*info
, IndexBulkDeleteResult
*stats
,
907 IndexBulkDeleteCallback callback
, void *callback_state
)
909 spgBulkDeleteState bds
;
911 /* allocate stats if first time through, else re-use existing struct */
913 stats
= (IndexBulkDeleteResult
*) palloc0(sizeof(IndexBulkDeleteResult
));
916 bds
.callback
= callback
;
917 bds
.callback_state
= callback_state
;
924 /* Dummy callback to delete no tuples during spgvacuumcleanup */
926 dummy_callback(ItemPointer itemptr
, void *state
)
932 * Post-VACUUM cleanup.
934 * Result: a palloc'd struct containing statistical info for VACUUM displays.
936 IndexBulkDeleteResult
*
937 spgvacuumcleanup(IndexVacuumInfo
*info
, IndexBulkDeleteResult
*stats
)
939 spgBulkDeleteState bds
;
941 /* No-op in ANALYZE ONLY mode */
942 if (info
->analyze_only
)
946 * We don't need to scan the index if there was a preceding bulkdelete
947 * pass. Otherwise, make a pass that won't delete any live tuples, but
948 * might still accomplish useful stuff with redirect/placeholder cleanup
949 * and/or FSM housekeeping, and in any case will provide stats.
953 stats
= (IndexBulkDeleteResult
*) palloc0(sizeof(IndexBulkDeleteResult
));
956 bds
.callback
= dummy_callback
;
957 bds
.callback_state
= NULL
;
963 * It's quite possible for us to be fooled by concurrent tuple moves into
964 * double-counting some index tuples, so disbelieve any total that exceeds
965 * the underlying heap's count ... if we know that accurately. Otherwise
966 * this might just make matters worse.
968 if (!info
->estimated_count
)
970 if (stats
->num_index_tuples
> info
->num_heap_tuples
)
971 stats
->num_index_tuples
= info
->num_heap_tuples
;