Standardize rmgrdesc recovery conflict XID output.
[pgsql.git] / src / backend / access / spgist / spgvacuum.c
blobad90b213b9cbe39d460002ca7479b8d831e9be69
1 /*-------------------------------------------------------------------------
3 * spgvacuum.c
4 * vacuum for SP-GiST
7 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * src/backend/access/spgist/spgvacuum.c
13 *-------------------------------------------------------------------------
16 #include "postgres.h"
18 #include "access/genam.h"
19 #include "access/spgist_private.h"
20 #include "access/spgxlog.h"
21 #include "access/transam.h"
22 #include "access/xloginsert.h"
23 #include "catalog/storage_xlog.h"
24 #include "commands/vacuum.h"
25 #include "miscadmin.h"
26 #include "storage/bufmgr.h"
27 #include "storage/indexfsm.h"
28 #include "storage/lmgr.h"
29 #include "utils/snapmgr.h"
32 /* Entry in pending-list of TIDs we need to revisit */
33 typedef struct spgVacPendingItem
35 ItemPointerData tid; /* redirection target to visit */
36 bool done; /* have we dealt with this? */
37 struct spgVacPendingItem *next; /* list link */
38 } spgVacPendingItem;
40 /* Local state for vacuum operations */
41 typedef struct spgBulkDeleteState
43 /* Parameters passed in to spgvacuumscan */
44 IndexVacuumInfo *info;
45 IndexBulkDeleteResult *stats;
46 IndexBulkDeleteCallback callback;
47 void *callback_state;
49 /* Additional working state */
50 SpGistState spgstate; /* for SPGiST operations that need one */
51 spgVacPendingItem *pendingList; /* TIDs we need to (re)visit */
52 TransactionId myXmin; /* for detecting newly-added redirects */
53 BlockNumber lastFilledBlock; /* last non-deletable block */
54 } spgBulkDeleteState;
58 * Add TID to pendingList, but only if not already present.
60 * Note that new items are always appended at the end of the list; this
61 * ensures that scans of the list don't miss items added during the scan.
63 static void
64 spgAddPendingTID(spgBulkDeleteState *bds, ItemPointer tid)
66 spgVacPendingItem *pitem;
67 spgVacPendingItem **listLink;
69 /* search the list for pre-existing entry */
70 listLink = &bds->pendingList;
71 while (*listLink != NULL)
73 pitem = *listLink;
74 if (ItemPointerEquals(tid, &pitem->tid))
75 return; /* already in list, do nothing */
76 listLink = &pitem->next;
78 /* not there, so append new entry */
79 pitem = (spgVacPendingItem *) palloc(sizeof(spgVacPendingItem));
80 pitem->tid = *tid;
81 pitem->done = false;
82 pitem->next = NULL;
83 *listLink = pitem;
87 * Clear pendingList
89 static void
90 spgClearPendingList(spgBulkDeleteState *bds)
92 spgVacPendingItem *pitem;
93 spgVacPendingItem *nitem;
95 for (pitem = bds->pendingList; pitem != NULL; pitem = nitem)
97 nitem = pitem->next;
98 /* All items in list should have been dealt with */
99 Assert(pitem->done);
100 pfree(pitem);
102 bds->pendingList = NULL;
106 * Vacuum a regular (non-root) leaf page
108 * We must delete tuples that are targeted for deletion by the VACUUM,
109 * but not move any tuples that are referenced by outside links; we assume
110 * those are the ones that are heads of chains.
112 * If we find a REDIRECT that was made by a concurrently-running transaction,
113 * we must add its target TID to pendingList. (We don't try to visit the
114 * target immediately, first because we don't want VACUUM locking more than
115 * one buffer at a time, and second because the duplicate-filtering logic
116 * in spgAddPendingTID is useful to ensure we can't get caught in an infinite
117 * loop in the face of continuous concurrent insertions.)
119 * If forPending is true, we are examining the page as a consequence of
120 * chasing a redirect link, not as part of the normal sequential scan.
121 * We still vacuum the page normally, but we don't increment the stats
122 * about live tuples; else we'd double-count those tuples, since the page
123 * has been or will be visited in the sequential scan as well.
125 static void
126 vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
127 bool forPending)
129 Page page = BufferGetPage(buffer);
130 spgxlogVacuumLeaf xlrec;
131 OffsetNumber toDead[MaxIndexTuplesPerPage];
132 OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
133 OffsetNumber moveSrc[MaxIndexTuplesPerPage];
134 OffsetNumber moveDest[MaxIndexTuplesPerPage];
135 OffsetNumber chainSrc[MaxIndexTuplesPerPage];
136 OffsetNumber chainDest[MaxIndexTuplesPerPage];
137 OffsetNumber predecessor[MaxIndexTuplesPerPage + 1];
138 bool deletable[MaxIndexTuplesPerPage + 1];
139 int nDeletable;
140 OffsetNumber i,
141 max = PageGetMaxOffsetNumber(page);
143 memset(predecessor, 0, sizeof(predecessor));
144 memset(deletable, 0, sizeof(deletable));
145 nDeletable = 0;
147 /* Scan page, identify tuples to delete, accumulate stats */
148 for (i = FirstOffsetNumber; i <= max; i++)
150 SpGistLeafTuple lt;
152 lt = (SpGistLeafTuple) PageGetItem(page,
153 PageGetItemId(page, i));
154 if (lt->tupstate == SPGIST_LIVE)
156 Assert(ItemPointerIsValid(&lt->heapPtr));
158 if (bds->callback(&lt->heapPtr, bds->callback_state))
160 bds->stats->tuples_removed += 1;
161 deletable[i] = true;
162 nDeletable++;
164 else
166 if (!forPending)
167 bds->stats->num_index_tuples += 1;
170 /* Form predecessor map, too */
171 if (SGLT_GET_NEXTOFFSET(lt) != InvalidOffsetNumber)
173 /* paranoia about corrupted chain links */
174 if (SGLT_GET_NEXTOFFSET(lt) < FirstOffsetNumber ||
175 SGLT_GET_NEXTOFFSET(lt) > max ||
176 predecessor[SGLT_GET_NEXTOFFSET(lt)] != InvalidOffsetNumber)
177 elog(ERROR, "inconsistent tuple chain links in page %u of index \"%s\"",
178 BufferGetBlockNumber(buffer),
179 RelationGetRelationName(index));
180 predecessor[SGLT_GET_NEXTOFFSET(lt)] = i;
183 else if (lt->tupstate == SPGIST_REDIRECT)
185 SpGistDeadTuple dt = (SpGistDeadTuple) lt;
187 Assert(SGLT_GET_NEXTOFFSET(dt) == InvalidOffsetNumber);
188 Assert(ItemPointerIsValid(&dt->pointer));
191 * Add target TID to pending list if the redirection could have
192 * happened since VACUUM started.
194 * Note: we could make a tighter test by seeing if the xid is
195 * "running" according to the active snapshot; but snapmgr.c
196 * doesn't currently export a suitable API, and it's not entirely
197 * clear that a tighter test is worth the cycles anyway.
199 if (TransactionIdFollowsOrEquals(dt->xid, bds->myXmin))
200 spgAddPendingTID(bds, &dt->pointer);
202 else
204 Assert(SGLT_GET_NEXTOFFSET(lt) == InvalidOffsetNumber);
208 if (nDeletable == 0)
209 return; /* nothing more to do */
211 /*----------
212 * Figure out exactly what we have to do. We do this separately from
213 * actually modifying the page, mainly so that we have a representation
214 * that can be dumped into WAL and then the replay code can do exactly
215 * the same thing. The output of this step consists of six arrays
216 * describing four kinds of operations, to be performed in this order:
218 * toDead[]: tuple numbers to be replaced with DEAD tuples
219 * toPlaceholder[]: tuple numbers to be replaced with PLACEHOLDER tuples
220 * moveSrc[]: tuple numbers that need to be relocated to another offset
221 * (replacing the tuple there) and then replaced with PLACEHOLDER tuples
222 * moveDest[]: new locations for moveSrc tuples
223 * chainSrc[]: tuple numbers whose chain links (nextOffset) need updates
224 * chainDest[]: new values of nextOffset for chainSrc members
226 * It's easiest to figure out what we have to do by processing tuple
227 * chains, so we iterate over all the tuples (not just the deletable
228 * ones!) to identify chain heads, then chase down each chain and make
229 * work item entries for deletable tuples within the chain.
230 *----------
232 xlrec.nDead = xlrec.nPlaceholder = xlrec.nMove = xlrec.nChain = 0;
234 for (i = FirstOffsetNumber; i <= max; i++)
236 SpGistLeafTuple head;
237 bool interveningDeletable;
238 OffsetNumber prevLive;
239 OffsetNumber j;
241 head = (SpGistLeafTuple) PageGetItem(page,
242 PageGetItemId(page, i));
243 if (head->tupstate != SPGIST_LIVE)
244 continue; /* can't be a chain member */
245 if (predecessor[i] != 0)
246 continue; /* not a chain head */
248 /* initialize ... */
249 interveningDeletable = false;
250 prevLive = deletable[i] ? InvalidOffsetNumber : i;
252 /* scan down the chain ... */
253 j = SGLT_GET_NEXTOFFSET(head);
254 while (j != InvalidOffsetNumber)
256 SpGistLeafTuple lt;
258 lt = (SpGistLeafTuple) PageGetItem(page,
259 PageGetItemId(page, j));
260 if (lt->tupstate != SPGIST_LIVE)
262 /* all tuples in chain should be live */
263 elog(ERROR, "unexpected SPGiST tuple state: %d",
264 lt->tupstate);
267 if (deletable[j])
269 /* This tuple should be replaced by a placeholder */
270 toPlaceholder[xlrec.nPlaceholder] = j;
271 xlrec.nPlaceholder++;
272 /* previous live tuple's chain link will need an update */
273 interveningDeletable = true;
275 else if (prevLive == InvalidOffsetNumber)
278 * This is the first live tuple in the chain. It has to move
279 * to the head position.
281 moveSrc[xlrec.nMove] = j;
282 moveDest[xlrec.nMove] = i;
283 xlrec.nMove++;
284 /* Chain updates will be applied after the move */
285 prevLive = i;
286 interveningDeletable = false;
288 else
291 * Second or later live tuple. Arrange to re-chain it to the
292 * previous live one, if there was a gap.
294 if (interveningDeletable)
296 chainSrc[xlrec.nChain] = prevLive;
297 chainDest[xlrec.nChain] = j;
298 xlrec.nChain++;
300 prevLive = j;
301 interveningDeletable = false;
304 j = SGLT_GET_NEXTOFFSET(lt);
307 if (prevLive == InvalidOffsetNumber)
309 /* The chain is entirely removable, so we need a DEAD tuple */
310 toDead[xlrec.nDead] = i;
311 xlrec.nDead++;
313 else if (interveningDeletable)
315 /* One or more deletions at end of chain, so close it off */
316 chainSrc[xlrec.nChain] = prevLive;
317 chainDest[xlrec.nChain] = InvalidOffsetNumber;
318 xlrec.nChain++;
322 /* sanity check ... */
323 if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
324 elog(ERROR, "inconsistent counts of deletable tuples");
326 /* Do the updates */
327 START_CRIT_SECTION();
329 spgPageIndexMultiDelete(&bds->spgstate, page,
330 toDead, xlrec.nDead,
331 SPGIST_DEAD, SPGIST_DEAD,
332 InvalidBlockNumber, InvalidOffsetNumber);
334 spgPageIndexMultiDelete(&bds->spgstate, page,
335 toPlaceholder, xlrec.nPlaceholder,
336 SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
337 InvalidBlockNumber, InvalidOffsetNumber);
340 * We implement the move step by swapping the line pointers of the source
341 * and target tuples, then replacing the newly-source tuples with
342 * placeholders. This is perhaps unduly friendly with the page data
343 * representation, but it's fast and doesn't risk page overflow when a
344 * tuple to be relocated is large.
346 for (i = 0; i < xlrec.nMove; i++)
348 ItemId idSrc = PageGetItemId(page, moveSrc[i]);
349 ItemId idDest = PageGetItemId(page, moveDest[i]);
350 ItemIdData tmp;
352 tmp = *idSrc;
353 *idSrc = *idDest;
354 *idDest = tmp;
357 spgPageIndexMultiDelete(&bds->spgstate, page,
358 moveSrc, xlrec.nMove,
359 SPGIST_PLACEHOLDER, SPGIST_PLACEHOLDER,
360 InvalidBlockNumber, InvalidOffsetNumber);
362 for (i = 0; i < xlrec.nChain; i++)
364 SpGistLeafTuple lt;
366 lt = (SpGistLeafTuple) PageGetItem(page,
367 PageGetItemId(page, chainSrc[i]));
368 Assert(lt->tupstate == SPGIST_LIVE);
369 SGLT_SET_NEXTOFFSET(lt, chainDest[i]);
372 MarkBufferDirty(buffer);
374 if (RelationNeedsWAL(index))
376 XLogRecPtr recptr;
378 XLogBeginInsert();
380 STORE_STATE(&bds->spgstate, xlrec.stateSrc);
382 XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
383 /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
384 XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
385 XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
386 XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
387 XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
388 XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
389 XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
391 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
393 recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
395 PageSetLSN(page, recptr);
398 END_CRIT_SECTION();
402 * Vacuum a root page when it is also a leaf
404 * On the root, we just delete any dead leaf tuples; no fancy business
406 static void
407 vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
409 Page page = BufferGetPage(buffer);
410 spgxlogVacuumRoot xlrec;
411 OffsetNumber toDelete[MaxIndexTuplesPerPage];
412 OffsetNumber i,
413 max = PageGetMaxOffsetNumber(page);
415 xlrec.nDelete = 0;
417 /* Scan page, identify tuples to delete, accumulate stats */
418 for (i = FirstOffsetNumber; i <= max; i++)
420 SpGistLeafTuple lt;
422 lt = (SpGistLeafTuple) PageGetItem(page,
423 PageGetItemId(page, i));
424 if (lt->tupstate == SPGIST_LIVE)
426 Assert(ItemPointerIsValid(&lt->heapPtr));
428 if (bds->callback(&lt->heapPtr, bds->callback_state))
430 bds->stats->tuples_removed += 1;
431 toDelete[xlrec.nDelete] = i;
432 xlrec.nDelete++;
434 else
436 bds->stats->num_index_tuples += 1;
439 else
441 /* all tuples on root should be live */
442 elog(ERROR, "unexpected SPGiST tuple state: %d",
443 lt->tupstate);
447 if (xlrec.nDelete == 0)
448 return; /* nothing more to do */
450 /* Do the update */
451 START_CRIT_SECTION();
453 /* The tuple numbers are in order, so we can use PageIndexMultiDelete */
454 PageIndexMultiDelete(page, toDelete, xlrec.nDelete);
456 MarkBufferDirty(buffer);
458 if (RelationNeedsWAL(index))
460 XLogRecPtr recptr;
462 XLogBeginInsert();
464 /* Prepare WAL record */
465 STORE_STATE(&bds->spgstate, xlrec.stateSrc);
467 XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
468 /* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
469 XLogRegisterData((char *) toDelete,
470 sizeof(OffsetNumber) * xlrec.nDelete);
472 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
474 recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
476 PageSetLSN(page, recptr);
479 END_CRIT_SECTION();
483 * Clean up redirect and placeholder tuples on the given page
485 * Redirect tuples can be marked placeholder once they're old enough.
486 * Placeholder tuples can be removed if it won't change the offsets of
487 * non-placeholder ones.
489 * Unlike the routines above, this works on both leaf and inner pages.
491 static void
492 vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
494 Page page = BufferGetPage(buffer);
495 SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
496 OffsetNumber i,
497 max = PageGetMaxOffsetNumber(page),
498 firstPlaceholder = InvalidOffsetNumber;
499 bool hasNonPlaceholder = false;
500 bool hasUpdate = false;
501 OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
502 OffsetNumber itemnos[MaxIndexTuplesPerPage];
503 spgxlogVacuumRedirect xlrec;
504 GlobalVisState *vistest;
506 xlrec.nToPlaceholder = 0;
507 xlrec.snapshotConflictHorizon = InvalidTransactionId;
509 /* XXX: providing heap relation would allow more pruning */
510 vistest = GlobalVisTestFor(NULL);
512 START_CRIT_SECTION();
515 * Scan backwards to convert old redirection tuples to placeholder tuples,
516 * and identify location of last non-placeholder tuple while at it.
518 for (i = max;
519 i >= FirstOffsetNumber &&
520 (opaque->nRedirection > 0 || !hasNonPlaceholder);
521 i--)
523 SpGistDeadTuple dt;
525 dt = (SpGistDeadTuple) PageGetItem(page, PageGetItemId(page, i));
527 if (dt->tupstate == SPGIST_REDIRECT &&
528 GlobalVisTestIsRemovableXid(vistest, dt->xid))
530 dt->tupstate = SPGIST_PLACEHOLDER;
531 Assert(opaque->nRedirection > 0);
532 opaque->nRedirection--;
533 opaque->nPlaceholder++;
535 /* remember newest XID among the removed redirects */
536 if (!TransactionIdIsValid(xlrec.snapshotConflictHorizon) ||
537 TransactionIdPrecedes(xlrec.snapshotConflictHorizon, dt->xid))
538 xlrec.snapshotConflictHorizon = dt->xid;
540 ItemPointerSetInvalid(&dt->pointer);
542 itemToPlaceholder[xlrec.nToPlaceholder] = i;
543 xlrec.nToPlaceholder++;
545 hasUpdate = true;
548 if (dt->tupstate == SPGIST_PLACEHOLDER)
550 if (!hasNonPlaceholder)
551 firstPlaceholder = i;
553 else
555 hasNonPlaceholder = true;
560 * Any placeholder tuples at the end of page can safely be removed. We
561 * can't remove ones before the last non-placeholder, though, because we
562 * can't alter the offset numbers of non-placeholder tuples.
564 if (firstPlaceholder != InvalidOffsetNumber)
567 * We do not store this array to rdata because it's easy to recreate.
569 for (i = firstPlaceholder; i <= max; i++)
570 itemnos[i - firstPlaceholder] = i;
572 i = max - firstPlaceholder + 1;
573 Assert(opaque->nPlaceholder >= i);
574 opaque->nPlaceholder -= i;
576 /* The array is surely sorted, so can use PageIndexMultiDelete */
577 PageIndexMultiDelete(page, itemnos, i);
579 hasUpdate = true;
582 xlrec.firstPlaceholder = firstPlaceholder;
584 if (hasUpdate)
585 MarkBufferDirty(buffer);
587 if (hasUpdate && RelationNeedsWAL(index))
589 XLogRecPtr recptr;
591 XLogBeginInsert();
593 XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
594 XLogRegisterData((char *) itemToPlaceholder,
595 sizeof(OffsetNumber) * xlrec.nToPlaceholder);
597 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
599 recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
601 PageSetLSN(page, recptr);
604 END_CRIT_SECTION();
608 * Process one page during a bulkdelete scan
610 static void
611 spgvacuumpage(spgBulkDeleteState *bds, BlockNumber blkno)
613 Relation index = bds->info->index;
614 Buffer buffer;
615 Page page;
617 /* call vacuum_delay_point while not holding any buffer lock */
618 vacuum_delay_point();
620 buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
621 RBM_NORMAL, bds->info->strategy);
622 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
623 page = (Page) BufferGetPage(buffer);
625 if (PageIsNew(page))
628 * We found an all-zero page, which could happen if the database
629 * crashed just after extending the file. Recycle it.
632 else if (PageIsEmpty(page))
634 /* nothing to do */
636 else if (SpGistPageIsLeaf(page))
638 if (SpGistBlockIsRoot(blkno))
640 vacuumLeafRoot(bds, index, buffer);
641 /* no need for vacuumRedirectAndPlaceholder */
643 else
645 vacuumLeafPage(bds, index, buffer, false);
646 vacuumRedirectAndPlaceholder(index, buffer);
649 else
651 /* inner page */
652 vacuumRedirectAndPlaceholder(index, buffer);
656 * The root pages must never be deleted, nor marked as available in FSM,
657 * because we don't want them ever returned by a search for a place to put
658 * a new tuple. Otherwise, check for empty page, and make sure the FSM
659 * knows about it.
661 if (!SpGistBlockIsRoot(blkno))
663 if (PageIsNew(page) || PageIsEmpty(page))
665 RecordFreeIndexPage(index, blkno);
666 bds->stats->pages_deleted++;
668 else
670 SpGistSetLastUsedPage(index, buffer);
671 bds->lastFilledBlock = blkno;
675 UnlockReleaseBuffer(buffer);
679 * Process the pending-TID list between pages of the main scan
681 static void
682 spgprocesspending(spgBulkDeleteState *bds)
684 Relation index = bds->info->index;
685 spgVacPendingItem *pitem;
686 spgVacPendingItem *nitem;
687 BlockNumber blkno;
688 Buffer buffer;
689 Page page;
691 for (pitem = bds->pendingList; pitem != NULL; pitem = pitem->next)
693 if (pitem->done)
694 continue; /* ignore already-done items */
696 /* call vacuum_delay_point while not holding any buffer lock */
697 vacuum_delay_point();
699 /* examine the referenced page */
700 blkno = ItemPointerGetBlockNumber(&pitem->tid);
701 buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
702 RBM_NORMAL, bds->info->strategy);
703 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
704 page = (Page) BufferGetPage(buffer);
706 if (PageIsNew(page) || SpGistPageIsDeleted(page))
708 /* Probably shouldn't happen, but ignore it */
710 else if (SpGistPageIsLeaf(page))
712 if (SpGistBlockIsRoot(blkno))
714 /* this should definitely not happen */
715 elog(ERROR, "redirection leads to root page of index \"%s\"",
716 RelationGetRelationName(index));
719 /* deal with any deletable tuples */
720 vacuumLeafPage(bds, index, buffer, true);
721 /* might as well do this while we are here */
722 vacuumRedirectAndPlaceholder(index, buffer);
724 SpGistSetLastUsedPage(index, buffer);
727 * We can mark as done not only this item, but any later ones
728 * pointing at the same page, since we vacuumed the whole page.
730 pitem->done = true;
731 for (nitem = pitem->next; nitem != NULL; nitem = nitem->next)
733 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
734 nitem->done = true;
737 else
740 * On an inner page, visit the referenced inner tuple and add all
741 * its downlinks to the pending list. We might have pending items
742 * for more than one inner tuple on the same page (in fact this is
743 * pretty likely given the way space allocation works), so get
744 * them all while we are here.
746 for (nitem = pitem; nitem != NULL; nitem = nitem->next)
748 if (nitem->done)
749 continue;
750 if (ItemPointerGetBlockNumber(&nitem->tid) == blkno)
752 OffsetNumber offset;
753 SpGistInnerTuple innerTuple;
755 offset = ItemPointerGetOffsetNumber(&nitem->tid);
756 innerTuple = (SpGistInnerTuple) PageGetItem(page,
757 PageGetItemId(page, offset));
758 if (innerTuple->tupstate == SPGIST_LIVE)
760 SpGistNodeTuple node;
761 int i;
763 SGITITERATE(innerTuple, i, node)
765 if (ItemPointerIsValid(&node->t_tid))
766 spgAddPendingTID(bds, &node->t_tid);
769 else if (innerTuple->tupstate == SPGIST_REDIRECT)
771 /* transfer attention to redirect point */
772 spgAddPendingTID(bds,
773 &((SpGistDeadTuple) innerTuple)->pointer);
775 else
776 elog(ERROR, "unexpected SPGiST tuple state: %d",
777 innerTuple->tupstate);
779 nitem->done = true;
784 UnlockReleaseBuffer(buffer);
787 spgClearPendingList(bds);
791 * Perform a bulkdelete scan
793 static void
794 spgvacuumscan(spgBulkDeleteState *bds)
796 Relation index = bds->info->index;
797 bool needLock;
798 BlockNumber num_pages,
799 blkno;
801 /* Finish setting up spgBulkDeleteState */
802 initSpGistState(&bds->spgstate, index);
803 bds->pendingList = NULL;
804 bds->myXmin = GetActiveSnapshot()->xmin;
805 bds->lastFilledBlock = SPGIST_LAST_FIXED_BLKNO;
808 * Reset counts that will be incremented during the scan; needed in case
809 * of multiple scans during a single VACUUM command
811 bds->stats->estimated_count = false;
812 bds->stats->num_index_tuples = 0;
813 bds->stats->pages_deleted = 0;
815 /* We can skip locking for new or temp relations */
816 needLock = !RELATION_IS_LOCAL(index);
819 * The outer loop iterates over all index pages except the metapage, in
820 * physical order (we hope the kernel will cooperate in providing
821 * read-ahead for speed). It is critical that we visit all leaf pages,
822 * including ones added after we start the scan, else we might fail to
823 * delete some deletable tuples. See more extensive comments about this
824 * in btvacuumscan().
826 blkno = SPGIST_METAPAGE_BLKNO + 1;
827 for (;;)
829 /* Get the current relation length */
830 if (needLock)
831 LockRelationForExtension(index, ExclusiveLock);
832 num_pages = RelationGetNumberOfBlocks(index);
833 if (needLock)
834 UnlockRelationForExtension(index, ExclusiveLock);
836 /* Quit if we've scanned the whole relation */
837 if (blkno >= num_pages)
838 break;
839 /* Iterate over pages, then loop back to recheck length */
840 for (; blkno < num_pages; blkno++)
842 spgvacuumpage(bds, blkno);
843 /* empty the pending-list after each page */
844 if (bds->pendingList != NULL)
845 spgprocesspending(bds);
849 /* Propagate local lastUsedPages cache to metablock */
850 SpGistUpdateMetaPage(index);
853 * If we found any empty pages (and recorded them in the FSM), then
854 * forcibly update the upper-level FSM pages to ensure that searchers can
855 * find them. It's possible that the pages were also found during
856 * previous scans and so this is a waste of time, but it's cheap enough
857 * relative to scanning the index that it shouldn't matter much, and
858 * making sure that free pages are available sooner not later seems
859 * worthwhile.
861 * Note that if no empty pages exist, we don't bother vacuuming the FSM at
862 * all.
864 if (bds->stats->pages_deleted > 0)
865 IndexFreeSpaceMapVacuum(index);
868 * Truncate index if possible
870 * XXX disabled because it's unsafe due to possible concurrent inserts.
871 * We'd have to rescan the pages to make sure they're still empty, and it
872 * doesn't seem worth it. Note that btree doesn't do this either.
874 * Another reason not to truncate is that it could invalidate the cached
875 * pages-with-freespace pointers in the metapage and other backends'
876 * relation caches, that is leave them pointing to nonexistent pages.
877 * Adding RelationGetNumberOfBlocks calls to protect the places that use
878 * those pointers would be unduly expensive.
880 #ifdef NOT_USED
881 if (num_pages > bds->lastFilledBlock + 1)
883 BlockNumber lastBlock = num_pages - 1;
885 num_pages = bds->lastFilledBlock + 1;
886 RelationTruncate(index, num_pages);
887 bds->stats->pages_removed += lastBlock - bds->lastFilledBlock;
888 bds->stats->pages_deleted -= lastBlock - bds->lastFilledBlock;
890 #endif
892 /* Report final stats */
893 bds->stats->num_pages = num_pages;
894 bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
895 bds->stats->pages_free = bds->stats->pages_deleted;
899 * Bulk deletion of all index entries pointing to a set of heap tuples.
900 * The set of target tuples is specified via a callback routine that tells
901 * whether any given heap tuple (identified by ItemPointer) is being deleted.
903 * Result: a palloc'd struct containing statistical info for VACUUM displays.
905 IndexBulkDeleteResult *
906 spgbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
907 IndexBulkDeleteCallback callback, void *callback_state)
909 spgBulkDeleteState bds;
911 /* allocate stats if first time through, else re-use existing struct */
912 if (stats == NULL)
913 stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
914 bds.info = info;
915 bds.stats = stats;
916 bds.callback = callback;
917 bds.callback_state = callback_state;
919 spgvacuumscan(&bds);
921 return stats;
924 /* Dummy callback to delete no tuples during spgvacuumcleanup */
925 static bool
926 dummy_callback(ItemPointer itemptr, void *state)
928 return false;
932 * Post-VACUUM cleanup.
934 * Result: a palloc'd struct containing statistical info for VACUUM displays.
936 IndexBulkDeleteResult *
937 spgvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
939 spgBulkDeleteState bds;
941 /* No-op in ANALYZE ONLY mode */
942 if (info->analyze_only)
943 return stats;
946 * We don't need to scan the index if there was a preceding bulkdelete
947 * pass. Otherwise, make a pass that won't delete any live tuples, but
948 * might still accomplish useful stuff with redirect/placeholder cleanup
949 * and/or FSM housekeeping, and in any case will provide stats.
951 if (stats == NULL)
953 stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
954 bds.info = info;
955 bds.stats = stats;
956 bds.callback = dummy_callback;
957 bds.callback_state = NULL;
959 spgvacuumscan(&bds);
963 * It's quite possible for us to be fooled by concurrent tuple moves into
964 * double-counting some index tuples, so disbelieve any total that exceeds
965 * the underlying heap's count ... if we know that accurately. Otherwise
966 * this might just make matters worse.
968 if (!info->estimated_count)
970 if (stats->num_index_tuples > info->num_heap_tuples)
971 stats->num_index_tuples = info->num_heap_tuples;
974 return stats;