Avoid updating inactive_since for invalid replication slots.
[pgsql.git] / src / backend / access / brin / brin_revmap.c
blobea722d95ebcfcf821239499df277efeeb182ae84
1 /*
2 * brin_revmap.c
3 * Range map for BRIN indexes
5 * The range map (revmap) is a translation structure for BRIN indexes: for each
6 * page range there is one summary tuple, and its location is tracked by the
7 * revmap. Whenever a new tuple is inserted into a table that violates the
8 * previously recorded summary values, a new tuple is inserted into the index
9 * and the revmap is updated to point to it.
11 * The revmap is stored in the first pages of the index, immediately following
12 * the metapage. When the revmap needs to be expanded, all tuples on the
13 * regular BRIN page at that block (if any) are moved out of the way.
15 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 * Portions Copyright (c) 1994, Regents of the University of California
18 * IDENTIFICATION
19 * src/backend/access/brin/brin_revmap.c
21 #include "postgres.h"
23 #include "access/brin_page.h"
24 #include "access/brin_pageops.h"
25 #include "access/brin_revmap.h"
26 #include "access/brin_tuple.h"
27 #include "access/brin_xlog.h"
28 #include "access/rmgr.h"
29 #include "access/xloginsert.h"
30 #include "miscadmin.h"
31 #include "storage/bufmgr.h"
32 #include "utils/rel.h"
36 * In revmap pages, each item stores an ItemPointerData. These defines let one
37 * find the logical revmap page number and index number of the revmap item for
38 * the given heap block number.
40 #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
41 ((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
42 #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
43 ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
46 struct BrinRevmap
48 Relation rm_irel;
49 BlockNumber rm_pagesPerRange;
50 BlockNumber rm_lastRevmapPage; /* cached from the metapage */
51 Buffer rm_metaBuf;
52 Buffer rm_currBuf;
55 /* typedef appears in brin_revmap.h */
58 static BlockNumber revmap_get_blkno(BrinRevmap *revmap,
59 BlockNumber heapBlk);
60 static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk);
61 static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap,
62 BlockNumber heapBlk);
63 static void revmap_physical_extend(BrinRevmap *revmap);
66 * Initialize an access object for a range map. This must be freed by
67 * brinRevmapTerminate when caller is done with it.
69 BrinRevmap *
70 brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange)
72 BrinRevmap *revmap;
73 Buffer meta;
74 BrinMetaPageData *metadata;
75 Page page;
77 meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
78 LockBuffer(meta, BUFFER_LOCK_SHARE);
79 page = BufferGetPage(meta);
80 metadata = (BrinMetaPageData *) PageGetContents(page);
82 revmap = palloc(sizeof(BrinRevmap));
83 revmap->rm_irel = idxrel;
84 revmap->rm_pagesPerRange = metadata->pagesPerRange;
85 revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
86 revmap->rm_metaBuf = meta;
87 revmap->rm_currBuf = InvalidBuffer;
89 *pagesPerRange = metadata->pagesPerRange;
91 LockBuffer(meta, BUFFER_LOCK_UNLOCK);
93 return revmap;
97 * Release resources associated with a revmap access object.
99 void
100 brinRevmapTerminate(BrinRevmap *revmap)
102 ReleaseBuffer(revmap->rm_metaBuf);
103 if (revmap->rm_currBuf != InvalidBuffer)
104 ReleaseBuffer(revmap->rm_currBuf);
105 pfree(revmap);
109 * Extend the revmap to cover the given heap block number.
111 void
112 brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk)
114 BlockNumber mapBlk PG_USED_FOR_ASSERTS_ONLY;
116 mapBlk = revmap_extend_and_get_blkno(revmap, heapBlk);
118 /* Ensure the buffer we got is in the expected range */
119 Assert(mapBlk != InvalidBlockNumber &&
120 mapBlk != BRIN_METAPAGE_BLKNO &&
121 mapBlk <= revmap->rm_lastRevmapPage);
125 * Prepare to insert an entry into the revmap; the revmap buffer in which the
126 * entry is to reside is locked and returned. Most callers should call
127 * brinRevmapExtend beforehand, as this routine does not extend the revmap if
128 * it's not long enough.
130 * The returned buffer is also recorded in the revmap struct; finishing that
131 * releases the buffer, therefore the caller needn't do it explicitly.
133 Buffer
134 brinLockRevmapPageForUpdate(BrinRevmap *revmap, BlockNumber heapBlk)
136 Buffer rmBuf;
138 rmBuf = revmap_get_buffer(revmap, heapBlk);
139 LockBuffer(rmBuf, BUFFER_LOCK_EXCLUSIVE);
141 return rmBuf;
145 * In the given revmap buffer (locked appropriately by caller), which is used
146 * in a BRIN index of pagesPerRange pages per range, set the element
147 * corresponding to heap block number heapBlk to the given TID.
149 * Once the operation is complete, the caller must update the LSN on the
150 * returned buffer.
152 * This is used both in regular operation and during WAL replay.
154 void
155 brinSetHeapBlockItemptr(Buffer buf, BlockNumber pagesPerRange,
156 BlockNumber heapBlk, ItemPointerData tid)
158 RevmapContents *contents;
159 ItemPointerData *iptr;
160 Page page;
162 /* The correct page should already be pinned and locked */
163 page = BufferGetPage(buf);
164 contents = (RevmapContents *) PageGetContents(page);
165 iptr = (ItemPointerData *) contents->rm_tids;
166 iptr += HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk);
168 if (ItemPointerIsValid(&tid))
169 ItemPointerSet(iptr,
170 ItemPointerGetBlockNumber(&tid),
171 ItemPointerGetOffsetNumber(&tid));
172 else
173 ItemPointerSetInvalid(iptr);
177 * Fetch the BrinTuple for a given heap block.
179 * The buffer containing the tuple is locked, and returned in *buf. The
180 * returned tuple points to the shared buffer and must not be freed; if caller
181 * wants to use it after releasing the buffer lock, it must create its own
182 * palloc'ed copy. As an optimization, the caller can pass a pinned buffer
183 * *buf on entry, which will avoid a pin-unpin cycle when the next tuple is on
184 * the same page as a previous one.
186 * If no tuple is found for the given heap range, returns NULL. In that case,
187 * *buf might still be updated (and pin must be released by caller), but it's
188 * not locked.
190 * The output tuple offset within the buffer is returned in *off, and its size
191 * is returned in *size.
193 BrinTuple *
194 brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
195 Buffer *buf, OffsetNumber *off, Size *size, int mode)
197 Relation idxRel = revmap->rm_irel;
198 BlockNumber mapBlk;
199 RevmapContents *contents;
200 ItemPointerData *iptr;
201 BlockNumber blk;
202 Page page;
203 ItemId lp;
204 BrinTuple *tup;
205 ItemPointerData previptr;
207 /* normalize the heap block number to be the first page in the range */
208 heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange;
211 * Compute the revmap page number we need. If Invalid is returned (i.e.,
212 * the revmap page hasn't been created yet), the requested page range is
213 * not summarized.
215 mapBlk = revmap_get_blkno(revmap, heapBlk);
216 if (mapBlk == InvalidBlockNumber)
218 *off = InvalidOffsetNumber;
219 return NULL;
222 ItemPointerSetInvalid(&previptr);
223 for (;;)
225 CHECK_FOR_INTERRUPTS();
227 if (revmap->rm_currBuf == InvalidBuffer ||
228 BufferGetBlockNumber(revmap->rm_currBuf) != mapBlk)
230 if (revmap->rm_currBuf != InvalidBuffer)
231 ReleaseBuffer(revmap->rm_currBuf);
233 Assert(mapBlk != InvalidBlockNumber);
234 revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
237 LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE);
239 contents = (RevmapContents *)
240 PageGetContents(BufferGetPage(revmap->rm_currBuf));
241 iptr = contents->rm_tids;
242 iptr += HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
244 if (!ItemPointerIsValid(iptr))
246 LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK);
247 return NULL;
251 * Check the TID we got in a previous iteration, if any, and save the
252 * current TID we got from the revmap; if we loop, we can sanity-check
253 * that the next one we get is different. Otherwise we might be stuck
254 * looping forever if the revmap is somehow badly broken.
256 if (ItemPointerIsValid(&previptr) && ItemPointerEquals(&previptr, iptr))
257 ereport(ERROR,
258 (errcode(ERRCODE_INDEX_CORRUPTED),
259 errmsg_internal("corrupted BRIN index: inconsistent range map")));
260 previptr = *iptr;
262 blk = ItemPointerGetBlockNumber(iptr);
263 *off = ItemPointerGetOffsetNumber(iptr);
265 LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_UNLOCK);
267 /* Ok, got a pointer to where the BrinTuple should be. Fetch it. */
268 if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != blk)
270 if (BufferIsValid(*buf))
271 ReleaseBuffer(*buf);
272 *buf = ReadBuffer(idxRel, blk);
274 LockBuffer(*buf, mode);
275 page = BufferGetPage(*buf);
277 /* If we land on a revmap page, start over */
278 if (BRIN_IS_REGULAR_PAGE(page))
281 * If the offset number is greater than what's in the page, it's
282 * possible that the range was desummarized concurrently. Just
283 * return NULL to handle that case.
285 if (*off > PageGetMaxOffsetNumber(page))
287 LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
288 return NULL;
291 lp = PageGetItemId(page, *off);
292 if (ItemIdIsUsed(lp))
294 tup = (BrinTuple *) PageGetItem(page, lp);
296 if (tup->bt_blkno == heapBlk)
298 if (size)
299 *size = ItemIdGetLength(lp);
300 /* found it! */
301 return tup;
307 * No luck. Assume that the revmap was updated concurrently.
309 LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
311 /* not reached, but keep compiler quiet */
312 return NULL;
316 * Delete an index tuple, marking a page range as unsummarized.
318 * Index must be locked in ShareUpdateExclusiveLock mode.
320 * Return false if caller should retry.
322 bool
323 brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
325 BrinRevmap *revmap;
326 BlockNumber pagesPerRange;
327 RevmapContents *contents;
328 ItemPointerData *iptr;
329 ItemPointerData invalidIptr;
330 BlockNumber revmapBlk;
331 Buffer revmapBuf;
332 Buffer regBuf;
333 Page revmapPg;
334 Page regPg;
335 OffsetNumber revmapOffset;
336 OffsetNumber regOffset;
337 ItemId lp;
339 revmap = brinRevmapInitialize(idxrel, &pagesPerRange);
341 revmapBlk = revmap_get_blkno(revmap, heapBlk);
342 if (!BlockNumberIsValid(revmapBlk))
344 /* revmap page doesn't exist: range not summarized, we're done */
345 brinRevmapTerminate(revmap);
346 return true;
349 /* Lock the revmap page, obtain the index tuple pointer from it */
350 revmapBuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
351 revmapPg = BufferGetPage(revmapBuf);
352 revmapOffset = HEAPBLK_TO_REVMAP_INDEX(revmap->rm_pagesPerRange, heapBlk);
354 contents = (RevmapContents *) PageGetContents(revmapPg);
355 iptr = contents->rm_tids;
356 iptr += revmapOffset;
358 if (!ItemPointerIsValid(iptr))
360 /* no index tuple: range not summarized, we're done */
361 LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
362 brinRevmapTerminate(revmap);
363 return true;
366 regBuf = ReadBuffer(idxrel, ItemPointerGetBlockNumber(iptr));
367 LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
368 regPg = BufferGetPage(regBuf);
370 /* if this is no longer a regular page, tell caller to start over */
371 if (!BRIN_IS_REGULAR_PAGE(regPg))
373 LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
374 LockBuffer(regBuf, BUFFER_LOCK_UNLOCK);
375 brinRevmapTerminate(revmap);
376 return false;
379 regOffset = ItemPointerGetOffsetNumber(iptr);
380 if (regOffset > PageGetMaxOffsetNumber(regPg))
381 ereport(ERROR,
382 (errcode(ERRCODE_INDEX_CORRUPTED),
383 errmsg("corrupted BRIN index: inconsistent range map")));
385 lp = PageGetItemId(regPg, regOffset);
386 if (!ItemIdIsUsed(lp))
387 ereport(ERROR,
388 (errcode(ERRCODE_INDEX_CORRUPTED),
389 errmsg("corrupted BRIN index: inconsistent range map")));
392 * Placeholder tuples only appear during unfinished summarization, and we
393 * hold ShareUpdateExclusiveLock, so this function cannot run concurrently
394 * with that. So any placeholder tuples that exist are leftovers from a
395 * crashed or aborted summarization; remove them silently.
398 START_CRIT_SECTION();
400 ItemPointerSetInvalid(&invalidIptr);
401 brinSetHeapBlockItemptr(revmapBuf, revmap->rm_pagesPerRange, heapBlk,
402 invalidIptr);
403 PageIndexTupleDeleteNoCompact(regPg, regOffset);
404 /* XXX record free space in FSM? */
406 MarkBufferDirty(regBuf);
407 MarkBufferDirty(revmapBuf);
409 if (RelationNeedsWAL(idxrel))
411 xl_brin_desummarize xlrec;
412 XLogRecPtr recptr;
414 xlrec.pagesPerRange = revmap->rm_pagesPerRange;
415 xlrec.heapBlk = heapBlk;
416 xlrec.regOffset = regOffset;
418 XLogBeginInsert();
419 XLogRegisterData((char *) &xlrec, SizeOfBrinDesummarize);
420 XLogRegisterBuffer(0, revmapBuf, 0);
421 XLogRegisterBuffer(1, regBuf, REGBUF_STANDARD);
422 recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_DESUMMARIZE);
423 PageSetLSN(revmapPg, recptr);
424 PageSetLSN(regPg, recptr);
427 END_CRIT_SECTION();
429 UnlockReleaseBuffer(regBuf);
430 LockBuffer(revmapBuf, BUFFER_LOCK_UNLOCK);
431 brinRevmapTerminate(revmap);
433 return true;
437 * Given a heap block number, find the corresponding physical revmap block
438 * number and return it. If the revmap page hasn't been allocated yet, return
439 * InvalidBlockNumber.
441 static BlockNumber
442 revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
444 BlockNumber targetblk;
446 /* obtain revmap block number, skip 1 for metapage block */
447 targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
449 /* Normal case: the revmap page is already allocated */
450 if (targetblk <= revmap->rm_lastRevmapPage)
451 return targetblk;
453 return InvalidBlockNumber;
457 * Obtain and return a buffer containing the revmap page for the given heap
458 * page. The revmap must have been previously extended to cover that page.
459 * The returned buffer is also recorded in the revmap struct; finishing that
460 * releases the buffer, therefore the caller needn't do it explicitly.
462 static Buffer
463 revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk)
465 BlockNumber mapBlk;
467 /* Translate the heap block number to physical index location. */
468 mapBlk = revmap_get_blkno(revmap, heapBlk);
470 if (mapBlk == InvalidBlockNumber)
471 elog(ERROR, "revmap does not cover heap block %u", heapBlk);
473 /* Ensure the buffer we got is in the expected range */
474 Assert(mapBlk != BRIN_METAPAGE_BLKNO &&
475 mapBlk <= revmap->rm_lastRevmapPage);
478 * Obtain the buffer from which we need to read. If we already have the
479 * correct buffer in our access struct, use that; otherwise, release that,
480 * (if valid) and read the one we need.
482 if (revmap->rm_currBuf == InvalidBuffer ||
483 mapBlk != BufferGetBlockNumber(revmap->rm_currBuf))
485 if (revmap->rm_currBuf != InvalidBuffer)
486 ReleaseBuffer(revmap->rm_currBuf);
488 revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk);
491 return revmap->rm_currBuf;
495 * Given a heap block number, find the corresponding physical revmap block
496 * number and return it. If the revmap page hasn't been allocated yet, extend
497 * the revmap until it is.
499 static BlockNumber
500 revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk)
502 BlockNumber targetblk;
504 /* obtain revmap block number, skip 1 for metapage block */
505 targetblk = HEAPBLK_TO_REVMAP_BLK(revmap->rm_pagesPerRange, heapBlk) + 1;
507 /* Extend the revmap, if necessary */
508 while (targetblk > revmap->rm_lastRevmapPage)
510 CHECK_FOR_INTERRUPTS();
511 revmap_physical_extend(revmap);
514 return targetblk;
518 * Try to extend the revmap by one page. This might not happen for a number of
519 * reasons; caller is expected to retry until the expected outcome is obtained.
521 static void
522 revmap_physical_extend(BrinRevmap *revmap)
524 Buffer buf;
525 Page page;
526 Page metapage;
527 BrinMetaPageData *metadata;
528 BlockNumber mapBlk;
529 BlockNumber nblocks;
530 Relation irel = revmap->rm_irel;
533 * Lock the metapage. This locks out concurrent extensions of the revmap,
534 * but note that we still need to grab the relation extension lock because
535 * another backend can extend the index with regular BRIN pages.
537 LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_EXCLUSIVE);
538 metapage = BufferGetPage(revmap->rm_metaBuf);
539 metadata = (BrinMetaPageData *) PageGetContents(metapage);
542 * Check that our cached lastRevmapPage value was up-to-date; if it
543 * wasn't, update the cached copy and have caller start over.
545 if (metadata->lastRevmapPage != revmap->rm_lastRevmapPage)
547 revmap->rm_lastRevmapPage = metadata->lastRevmapPage;
548 LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
549 return;
551 mapBlk = metadata->lastRevmapPage + 1;
553 nblocks = RelationGetNumberOfBlocks(irel);
554 if (mapBlk < nblocks)
556 buf = ReadBuffer(irel, mapBlk);
557 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
558 page = BufferGetPage(buf);
560 else
562 buf = ExtendBufferedRel(BMR_REL(irel), MAIN_FORKNUM, NULL,
563 EB_LOCK_FIRST);
564 if (BufferGetBlockNumber(buf) != mapBlk)
567 * Very rare corner case: somebody extended the relation
568 * concurrently after we read its length. If this happens, give
569 * up and have caller start over. We will have to evacuate that
570 * page from under whoever is using it.
572 LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
573 UnlockReleaseBuffer(buf);
574 return;
576 page = BufferGetPage(buf);
579 /* Check that it's a regular block (or an empty page) */
580 if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page))
581 ereport(ERROR,
582 (errcode(ERRCODE_INDEX_CORRUPTED),
583 errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u",
584 BrinPageType(page),
585 RelationGetRelationName(irel),
586 BufferGetBlockNumber(buf))));
588 /* If the page is in use, evacuate it and restart */
589 if (brin_start_evacuating_page(irel, buf))
591 LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
592 brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf);
594 /* have caller start over */
595 return;
599 * Ok, we have now locked the metapage and the target block. Re-initialize
600 * the target block as a revmap page, and update the metapage.
602 START_CRIT_SECTION();
604 /* the rm_tids array is initialized to all invalid by PageInit */
605 brin_page_init(page, BRIN_PAGETYPE_REVMAP);
606 MarkBufferDirty(buf);
608 metadata->lastRevmapPage = mapBlk;
611 * Set pd_lower just past the end of the metadata. This is essential,
612 * because without doing so, metadata will be lost if xlog.c compresses
613 * the page. (We must do this here because pre-v11 versions of PG did not
614 * set the metapage's pd_lower correctly, so a pg_upgraded index might
615 * contain the wrong value.)
617 ((PageHeader) metapage)->pd_lower =
618 ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) metapage;
620 MarkBufferDirty(revmap->rm_metaBuf);
622 if (RelationNeedsWAL(revmap->rm_irel))
624 xl_brin_revmap_extend xlrec;
625 XLogRecPtr recptr;
627 xlrec.targetBlk = mapBlk;
629 XLogBeginInsert();
630 XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
631 XLogRegisterBuffer(0, revmap->rm_metaBuf, REGBUF_STANDARD);
633 XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
635 recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
636 PageSetLSN(metapage, recptr);
637 PageSetLSN(page, recptr);
640 END_CRIT_SECTION();
642 LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK);
644 UnlockReleaseBuffer(buf);