1 /*-------------------------------------------------------------------------
4 * POSTGRES free space map for quickly finding free space in relations
7 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/storage/freespace/freespace.c
16 * Free Space Map keeps track of the amount of free space on pages, and
17 * allows quickly searching for a page with enough free space. The FSM is
18 * stored in a dedicated relation fork of all heap relations, and those
19 * index access methods that need it (see also indexfsm.c). See README for
22 *-------------------------------------------------------------------------
26 #include "access/htup_details.h"
27 #include "access/xloginsert.h"
28 #include "access/xlogutils.h"
29 #include "miscadmin.h"
30 #include "storage/freespace.h"
31 #include "storage/fsm_internals.h"
32 #include "storage/smgr.h"
33 #include "utils/rel.h"
37 * We use just one byte to store the amount of free space on a page, so we
38 * divide the amount of free space a page can have into 256 different
39 * categories. The highest category, 255, represents a page with at least
40 * MaxFSMRequestSize bytes of free space, and the second highest category
41 * represents the range from 254 * FSM_CAT_STEP, inclusive, to
42 * MaxFSMRequestSize, exclusive.
44 * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
45 * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
46 * categories look like this:
57 * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
58 * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
59 * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
60 * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
61 * completely empty page, that would mean that we could never satisfy a
62 * request of exactly MaxFSMRequestSize bytes.
64 #define FSM_CATEGORIES 256
65 #define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
66 #define MaxFSMRequestSize MaxHeapTupleSize
69 * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
70 * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
71 * 256 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
72 * this means that 4096 bytes is the smallest BLCKSZ that we can get away
73 * with a 3-level tree, and 512 is the smallest we support.
75 #define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
77 #define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
78 #define FSM_BOTTOM_LEVEL 0
81 * The internal FSM routines work on a logical addressing scheme. Each
82 * level of the tree can be thought of as a separately addressable file.
86 int level
; /* level */
87 int logpageno
; /* page number within the level */
90 /* Address of the root page. */
91 static const FSMAddress FSM_ROOT_ADDRESS
= {FSM_ROOT_LEVEL
, 0};
93 /* functions to navigate the tree */
94 static FSMAddress
fsm_get_child(FSMAddress parent
, uint16 slot
);
95 static FSMAddress
fsm_get_parent(FSMAddress child
, uint16
*slot
);
96 static FSMAddress
fsm_get_location(BlockNumber heapblk
, uint16
*slot
);
97 static BlockNumber
fsm_get_heap_blk(FSMAddress addr
, uint16 slot
);
98 static BlockNumber
fsm_logical_to_physical(FSMAddress addr
);
100 static Buffer
fsm_readbuf(Relation rel
, FSMAddress addr
, bool extend
);
101 static Buffer
fsm_extend(Relation rel
, BlockNumber fsm_nblocks
);
103 /* functions to convert amount of free space to a FSM category */
104 static uint8
fsm_space_avail_to_cat(Size avail
);
105 static uint8
fsm_space_needed_to_cat(Size needed
);
106 static Size
fsm_space_cat_to_avail(uint8 cat
);
108 /* workhorse functions for various operations */
109 static int fsm_set_and_search(Relation rel
, FSMAddress addr
, uint16 slot
,
110 uint8 newValue
, uint8 minValue
);
111 static BlockNumber
fsm_search(Relation rel
, uint8 min_cat
);
112 static uint8
fsm_vacuum_page(Relation rel
, FSMAddress addr
,
113 BlockNumber start
, BlockNumber end
,
115 static bool fsm_does_block_exist(Relation rel
, BlockNumber blknumber
);
118 /******** Public API ********/
121 * GetPageWithFreeSpace - try to find a page in the given relation with
122 * at least the specified amount of free space.
124 * If successful, return the block number; if not, return InvalidBlockNumber.
126 * The caller must be prepared for the possibility that the returned page
127 * will turn out to have too little space available by the time the caller
128 * gets a lock on it. In that case, the caller should report the actual
129 * amount of free space available on that page and then try again (see
130 * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
131 * extend the relation.
133 * This can trigger FSM updates if any FSM entry is found to point to a block
134 * past the end of the relation.
137 GetPageWithFreeSpace(Relation rel
, Size spaceNeeded
)
139 uint8 min_cat
= fsm_space_needed_to_cat(spaceNeeded
);
141 return fsm_search(rel
, min_cat
);
145 * RecordAndGetPageWithFreeSpace - update info about a page and try again.
147 * We provide this combo form to save some locking overhead, compared to
148 * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
149 * also some effort to return a page close to the old page; if there's a
150 * page with enough free space on the same FSM page where the old one page
151 * is located, it is preferred.
154 RecordAndGetPageWithFreeSpace(Relation rel
, BlockNumber oldPage
,
155 Size oldSpaceAvail
, Size spaceNeeded
)
157 int old_cat
= fsm_space_avail_to_cat(oldSpaceAvail
);
158 int search_cat
= fsm_space_needed_to_cat(spaceNeeded
);
163 /* Get the location of the FSM byte representing the heap block */
164 addr
= fsm_get_location(oldPage
, &slot
);
166 search_slot
= fsm_set_and_search(rel
, addr
, slot
, old_cat
, search_cat
);
169 * If fsm_set_and_search found a suitable new block, return that.
170 * Otherwise, search as usual.
172 if (search_slot
!= -1)
174 BlockNumber blknum
= fsm_get_heap_blk(addr
, search_slot
);
177 * Check that the blknum is actually in the relation. Don't try to
178 * update the FSM in that case, just fall back to the other case
180 if (fsm_does_block_exist(rel
, blknum
))
183 return fsm_search(rel
, search_cat
);
187 * RecordPageWithFreeSpace - update info about a page.
189 * Note that if the new spaceAvail value is higher than the old value stored
190 * in the FSM, the space might not become visible to searchers until the next
191 * FreeSpaceMapVacuum call, which updates the upper level pages.
194 RecordPageWithFreeSpace(Relation rel
, BlockNumber heapBlk
, Size spaceAvail
)
196 int new_cat
= fsm_space_avail_to_cat(spaceAvail
);
200 /* Get the location of the FSM byte representing the heap block */
201 addr
= fsm_get_location(heapBlk
, &slot
);
203 fsm_set_and_search(rel
, addr
, slot
, new_cat
, 0);
207 * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
211 XLogRecordPageWithFreeSpace(RelFileLocator rlocator
, BlockNumber heapBlk
,
214 int new_cat
= fsm_space_avail_to_cat(spaceAvail
);
221 /* Get the location of the FSM byte representing the heap block */
222 addr
= fsm_get_location(heapBlk
, &slot
);
223 blkno
= fsm_logical_to_physical(addr
);
225 /* If the page doesn't exist already, extend */
226 buf
= XLogReadBufferExtended(rlocator
, FSM_FORKNUM
, blkno
,
227 RBM_ZERO_ON_ERROR
, InvalidBuffer
);
228 LockBuffer(buf
, BUFFER_LOCK_EXCLUSIVE
);
230 page
= BufferGetPage(buf
);
232 PageInit(page
, BLCKSZ
, 0);
234 if (fsm_set_avail(page
, slot
, new_cat
))
235 MarkBufferDirtyHint(buf
, false);
236 UnlockReleaseBuffer(buf
);
240 * GetRecordedFreeSpace - return the amount of free space on a particular page,
241 * according to the FSM.
244 GetRecordedFreeSpace(Relation rel
, BlockNumber heapBlk
)
251 /* Get the location of the FSM byte representing the heap block */
252 addr
= fsm_get_location(heapBlk
, &slot
);
254 buf
= fsm_readbuf(rel
, addr
, false);
255 if (!BufferIsValid(buf
))
257 cat
= fsm_get_avail(BufferGetPage(buf
), slot
);
260 return fsm_space_cat_to_avail(cat
);
264 * FreeSpaceMapPrepareTruncateRel - prepare for truncation of a relation.
266 * nblocks is the new size of the heap.
268 * Return the number of blocks of new FSM.
269 * If it's InvalidBlockNumber, there is nothing to truncate;
270 * otherwise the caller is responsible for calling smgrtruncate()
271 * to truncate the FSM pages, and FreeSpaceMapVacuumRange()
272 * to update upper-level pages in the FSM.
275 FreeSpaceMapPrepareTruncateRel(Relation rel
, BlockNumber nblocks
)
277 BlockNumber new_nfsmblocks
;
278 FSMAddress first_removed_address
;
279 uint16 first_removed_slot
;
283 * If no FSM has been created yet for this relation, there's nothing to
286 if (!smgrexists(RelationGetSmgr(rel
), FSM_FORKNUM
))
287 return InvalidBlockNumber
;
289 /* Get the location in the FSM of the first removed heap block */
290 first_removed_address
= fsm_get_location(nblocks
, &first_removed_slot
);
293 * Zero out the tail of the last remaining FSM page. If the slot
294 * representing the first removed heap block is at a page boundary, as the
295 * first slot on the FSM page that first_removed_address points to, we can
296 * just truncate that page altogether.
298 if (first_removed_slot
> 0)
300 buf
= fsm_readbuf(rel
, first_removed_address
, false);
301 if (!BufferIsValid(buf
))
302 return InvalidBlockNumber
; /* nothing to do; the FSM was already
304 LockBuffer(buf
, BUFFER_LOCK_EXCLUSIVE
);
306 /* NO EREPORT(ERROR) from here till changes are logged */
307 START_CRIT_SECTION();
309 fsm_truncate_avail(BufferGetPage(buf
), first_removed_slot
);
312 * This change is non-critical, because fsm_does_block_exist() would
313 * stop us from returning a truncated-away block. However, since this
314 * may remove up to SlotsPerFSMPage slots, it's nice to avoid the cost
315 * of that many fsm_does_block_exist() rejections. Use a full
316 * MarkBufferDirty(), not MarkBufferDirtyHint().
318 MarkBufferDirty(buf
);
321 * WAL-log like MarkBufferDirtyHint() might have done, just to avoid
322 * differing from the rest of the file in this respect. This is
323 * optional; see README mention of full page images. XXX consider
324 * XLogSaveBufferForHint() for even closer similarity.
326 * A higher-level operation calls us at WAL replay. If we crash
327 * before the XLOG_SMGR_TRUNCATE flushes to disk, main fork length has
328 * not changed, and our fork remains valid. If we crash after that
329 * flush, redo will return here.
331 if (!InRecovery
&& RelationNeedsWAL(rel
) && XLogHintBitIsNeeded())
332 log_newpage_buffer(buf
, false);
336 UnlockReleaseBuffer(buf
);
338 new_nfsmblocks
= fsm_logical_to_physical(first_removed_address
) + 1;
342 new_nfsmblocks
= fsm_logical_to_physical(first_removed_address
);
343 if (smgrnblocks(RelationGetSmgr(rel
), FSM_FORKNUM
) <= new_nfsmblocks
)
344 return InvalidBlockNumber
; /* nothing to do; the FSM was already
348 return new_nfsmblocks
;
352 * FreeSpaceMapVacuum - update upper-level pages in the rel's FSM
354 * We assume that the bottom-level pages have already been updated with
355 * new free-space information.
358 FreeSpaceMapVacuum(Relation rel
)
362 /* Recursively scan the tree, starting at the root */
363 (void) fsm_vacuum_page(rel
, FSM_ROOT_ADDRESS
,
364 (BlockNumber
) 0, InvalidBlockNumber
,
369 * FreeSpaceMapVacuumRange - update upper-level pages in the rel's FSM
371 * As above, but assume that only heap pages between start and end-1 inclusive
372 * have new free-space information, so update only the upper-level slots
373 * covering that block range. end == InvalidBlockNumber is equivalent to
374 * "all the rest of the relation".
377 FreeSpaceMapVacuumRange(Relation rel
, BlockNumber start
, BlockNumber end
)
381 /* Recursively scan the tree, starting at the root */
383 (void) fsm_vacuum_page(rel
, FSM_ROOT_ADDRESS
, start
, end
, &dummy
);
386 /******** Internal routines ********/
389 * Return category corresponding x bytes of free space
392 fsm_space_avail_to_cat(Size avail
)
396 Assert(avail
< BLCKSZ
);
398 if (avail
>= MaxFSMRequestSize
)
401 cat
= avail
/ FSM_CAT_STEP
;
404 * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
414 * Return the lower bound of the range of free space represented by given
418 fsm_space_cat_to_avail(uint8 cat
)
420 /* The highest category represents exactly MaxFSMRequestSize bytes. */
422 return MaxFSMRequestSize
;
424 return cat
* FSM_CAT_STEP
;
428 * Which category does a page need to have, to accommodate x bytes of data?
429 * While fsm_space_avail_to_cat() rounds down, this needs to round up.
432 fsm_space_needed_to_cat(Size needed
)
436 /* Can't ask for more space than the highest category represents */
437 if (needed
> MaxFSMRequestSize
)
438 elog(ERROR
, "invalid FSM request size %zu", needed
);
443 cat
= (needed
+ FSM_CAT_STEP
- 1) / FSM_CAT_STEP
;
452 * Returns the physical block number of a FSM page
455 fsm_logical_to_physical(FSMAddress addr
)
462 * Calculate the logical page number of the first leaf page below the
465 leafno
= addr
.logpageno
;
466 for (l
= 0; l
< addr
.level
; l
++)
467 leafno
*= SlotsPerFSMPage
;
469 /* Count upper level nodes required to address the leaf page */
471 for (l
= 0; l
< FSM_TREE_DEPTH
; l
++)
474 leafno
/= SlotsPerFSMPage
;
478 * If the page we were asked for wasn't at the bottom level, subtract the
479 * additional lower level pages we counted above.
483 /* Turn the page count into 0-based block number */
488 * Return the FSM location corresponding to given heap block.
491 fsm_get_location(BlockNumber heapblk
, uint16
*slot
)
495 addr
.level
= FSM_BOTTOM_LEVEL
;
496 addr
.logpageno
= heapblk
/ SlotsPerFSMPage
;
497 *slot
= heapblk
% SlotsPerFSMPage
;
503 * Return the heap block number corresponding to given location in the FSM.
506 fsm_get_heap_blk(FSMAddress addr
, uint16 slot
)
508 Assert(addr
.level
== FSM_BOTTOM_LEVEL
);
509 return ((unsigned int) addr
.logpageno
) * SlotsPerFSMPage
+ slot
;
513 * Given a logical address of a child page, get the logical page number of
514 * the parent, and the slot within the parent corresponding to the child.
517 fsm_get_parent(FSMAddress child
, uint16
*slot
)
521 Assert(child
.level
< FSM_ROOT_LEVEL
);
523 parent
.level
= child
.level
+ 1;
524 parent
.logpageno
= child
.logpageno
/ SlotsPerFSMPage
;
525 *slot
= child
.logpageno
% SlotsPerFSMPage
;
531 * Given a logical address of a parent page and a slot number, get the
532 * logical address of the corresponding child page.
535 fsm_get_child(FSMAddress parent
, uint16 slot
)
539 Assert(parent
.level
> FSM_BOTTOM_LEVEL
);
541 child
.level
= parent
.level
- 1;
542 child
.logpageno
= parent
.logpageno
* SlotsPerFSMPage
+ slot
;
550 * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
551 * true, the FSM file is extended.
554 fsm_readbuf(Relation rel
, FSMAddress addr
, bool extend
)
556 BlockNumber blkno
= fsm_logical_to_physical(addr
);
558 SMgrRelation reln
= RelationGetSmgr(rel
);
561 * If we haven't cached the size of the FSM yet, check it first. Also
562 * recheck if the requested block seems to be past end, since our cached
563 * value might be stale. (We send smgr inval messages on truncation, but
566 if (reln
->smgr_cached_nblocks
[FSM_FORKNUM
] == InvalidBlockNumber
||
567 blkno
>= reln
->smgr_cached_nblocks
[FSM_FORKNUM
])
569 /* Invalidate the cache so smgrnblocks asks the kernel. */
570 reln
->smgr_cached_nblocks
[FSM_FORKNUM
] = InvalidBlockNumber
;
571 if (smgrexists(reln
, FSM_FORKNUM
))
572 smgrnblocks(reln
, FSM_FORKNUM
);
574 reln
->smgr_cached_nblocks
[FSM_FORKNUM
] = 0;
578 * For reading we use ZERO_ON_ERROR mode, and initialize the page if
579 * necessary. The FSM information is not accurate anyway, so it's better
580 * to clear corrupt pages than error out. Since the FSM changes are not
581 * WAL-logged, the so-called torn page problem on crash can lead to pages
582 * with corrupt headers, for example.
584 * We use the same path below to initialize pages when extending the
585 * relation, as a concurrent extension can end up with vm_extend()
586 * returning an already-initialized page.
588 if (blkno
>= reln
->smgr_cached_nblocks
[FSM_FORKNUM
])
591 buf
= fsm_extend(rel
, blkno
+ 1);
593 return InvalidBuffer
;
596 buf
= ReadBufferExtended(rel
, FSM_FORKNUM
, blkno
, RBM_ZERO_ON_ERROR
, NULL
);
599 * Initializing the page when needed is trickier than it looks, because of
600 * the possibility of multiple backends doing this concurrently, and our
601 * desire to not uselessly take the buffer lock in the normal path where
602 * the page is OK. We must take the lock to initialize the page, so
603 * recheck page newness after we have the lock, in case someone else
604 * already did it. Also, because we initially check PageIsNew with no
605 * lock, it's possible to fall through and return the buffer while someone
606 * else is still initializing the page (i.e., we might see pd_upper as set
607 * but other page header fields are still zeroes). This is harmless for
608 * callers that will take a buffer lock themselves, but some callers
609 * inspect the page without any lock at all. The latter is OK only so
610 * long as it doesn't depend on the page header having correct contents.
611 * Current usage is safe because PageGetContents() does not require that.
613 if (PageIsNew(BufferGetPage(buf
)))
615 LockBuffer(buf
, BUFFER_LOCK_EXCLUSIVE
);
616 if (PageIsNew(BufferGetPage(buf
)))
617 PageInit(BufferGetPage(buf
), BLCKSZ
, 0);
618 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
624 * Ensure that the FSM fork is at least fsm_nblocks long, extending
625 * it if necessary with empty pages. And by empty, I mean pages filled
626 * with zeros, meaning there's no free space.
629 fsm_extend(Relation rel
, BlockNumber fsm_nblocks
)
631 return ExtendBufferedRelTo(BMR_REL(rel
), FSM_FORKNUM
, NULL
,
632 EB_CREATE_FORK_IF_NEEDED
|
639 * Set value in given FSM page and slot.
641 * If minValue > 0, the updated page is also searched for a page with at
642 * least minValue of free space. If one is found, its slot number is
643 * returned, -1 otherwise.
646 fsm_set_and_search(Relation rel
, FSMAddress addr
, uint16 slot
,
647 uint8 newValue
, uint8 minValue
)
653 buf
= fsm_readbuf(rel
, addr
, true);
654 LockBuffer(buf
, BUFFER_LOCK_EXCLUSIVE
);
656 page
= BufferGetPage(buf
);
658 if (fsm_set_avail(page
, slot
, newValue
))
659 MarkBufferDirtyHint(buf
, false);
663 /* Search while we still hold the lock */
664 newslot
= fsm_search_avail(buf
, minValue
,
665 addr
.level
== FSM_BOTTOM_LEVEL
,
669 UnlockReleaseBuffer(buf
);
675 * Search the tree for a heap page with at least min_cat of free space
678 fsm_search(Relation rel
, uint8 min_cat
)
681 FSMAddress addr
= FSM_ROOT_ADDRESS
;
689 /* Read the FSM page. */
690 buf
= fsm_readbuf(rel
, addr
, false);
692 /* Search within the page */
693 if (BufferIsValid(buf
))
695 LockBuffer(buf
, BUFFER_LOCK_SHARE
);
696 slot
= fsm_search_avail(buf
, min_cat
,
697 (addr
.level
== FSM_BOTTOM_LEVEL
),
701 max_avail
= fsm_get_max_avail(BufferGetPage(buf
));
702 UnlockReleaseBuffer(buf
);
706 /* Keep the pin for possible update below */
707 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
716 * Descend the tree, or return the found block if we're at the
719 if (addr
.level
== FSM_BOTTOM_LEVEL
)
721 BlockNumber blkno
= fsm_get_heap_blk(addr
, slot
);
724 if (fsm_does_block_exist(rel
, blkno
))
731 * Block is past the end of the relation. Update FSM, and
732 * restart from root. The usual "advancenext" behavior is
733 * pessimal for this rare scenario, since every later slot is
734 * unusable in the same way. We could zero all affected slots
735 * on the same FSM page, but don't bet on the benefits of that
736 * optimization justifying its compiled code bulk.
738 page
= BufferGetPage(buf
);
739 LockBuffer(buf
, BUFFER_LOCK_EXCLUSIVE
);
740 fsm_set_avail(page
, slot
, 0);
741 MarkBufferDirtyHint(buf
, false);
742 UnlockReleaseBuffer(buf
);
743 if (restarts
++ > 10000) /* same rationale as below */
744 return InvalidBlockNumber
;
745 addr
= FSM_ROOT_ADDRESS
;
751 addr
= fsm_get_child(addr
, slot
);
753 else if (addr
.level
== FSM_ROOT_LEVEL
)
756 * At the root, failure means there's no page with enough free
757 * space in the FSM. Give up.
759 return InvalidBlockNumber
;
767 * At lower level, failure can happen if the value in the upper-
768 * level node didn't reflect the value on the lower page. Update
769 * the upper node, to avoid falling into the same trap again, and
772 * There's a race condition here, if another backend updates this
773 * page right after we release it, and gets the lock on the parent
774 * page before us. We'll then update the parent page with the now
775 * stale information we had. It's OK, because it should happen
776 * rarely, and will be fixed by the next vacuum.
778 parent
= fsm_get_parent(addr
, &parentslot
);
779 fsm_set_and_search(rel
, parent
, parentslot
, max_avail
, 0);
782 * If the upper pages are badly out of date, we might need to loop
783 * quite a few times, updating them as we go. Any inconsistencies
784 * should eventually be corrected and the loop should end. Looping
785 * indefinitely is nevertheless scary, so provide an emergency
788 if (restarts
++ > 10000)
789 return InvalidBlockNumber
;
791 /* Start search all over from the root */
792 addr
= FSM_ROOT_ADDRESS
;
799 * Recursive guts of FreeSpaceMapVacuum
801 * Examine the FSM page indicated by addr, as well as its children, updating
802 * upper-level nodes that cover the heap block range from start to end-1.
803 * (It's okay if end is beyond the actual end of the map.)
804 * Return the maximum freespace value on this page.
806 * If addr is past the end of the FSM, set *eof_p to true and return 0.
808 * This traverses the tree in depth-first order. The tree is stored
809 * physically in depth-first order, so this should be pretty I/O efficient.
812 fsm_vacuum_page(Relation rel
, FSMAddress addr
,
813 BlockNumber start
, BlockNumber end
,
820 /* Read the page if it exists, or return EOF */
821 buf
= fsm_readbuf(rel
, addr
, false);
822 if (!BufferIsValid(buf
))
830 page
= BufferGetPage(buf
);
833 * If we're above the bottom level, recurse into children, and fix the
834 * information stored about them at this level.
836 if (addr
.level
> FSM_BOTTOM_LEVEL
)
838 FSMAddress fsm_start
,
840 uint16 fsm_start_slot
,
848 * Compute the range of slots we need to update on this page, given
849 * the requested range of heap blocks to consider. The first slot to
850 * update is the one covering the "start" block, and the last slot is
851 * the one covering "end - 1". (Some of this work will be duplicated
852 * in each recursive call, but it's cheap enough to not worry about.)
854 fsm_start
= fsm_get_location(start
, &fsm_start_slot
);
855 fsm_end
= fsm_get_location(end
- 1, &fsm_end_slot
);
857 while (fsm_start
.level
< addr
.level
)
859 fsm_start
= fsm_get_parent(fsm_start
, &fsm_start_slot
);
860 fsm_end
= fsm_get_parent(fsm_end
, &fsm_end_slot
);
862 Assert(fsm_start
.level
== addr
.level
);
864 if (fsm_start
.logpageno
== addr
.logpageno
)
865 start_slot
= fsm_start_slot
;
866 else if (fsm_start
.logpageno
> addr
.logpageno
)
867 start_slot
= SlotsPerFSMPage
; /* shouldn't get here... */
871 if (fsm_end
.logpageno
== addr
.logpageno
)
872 end_slot
= fsm_end_slot
;
873 else if (fsm_end
.logpageno
> addr
.logpageno
)
874 end_slot
= SlotsPerFSMPage
- 1;
876 end_slot
= -1; /* shouldn't get here... */
878 for (slot
= start_slot
; slot
<= end_slot
; slot
++)
882 CHECK_FOR_INTERRUPTS();
884 /* After we hit end-of-file, just clear the rest of the slots */
886 child_avail
= fsm_vacuum_page(rel
, fsm_get_child(addr
, slot
),
892 /* Update information about the child */
893 if (fsm_get_avail(page
, slot
) != child_avail
)
895 LockBuffer(buf
, BUFFER_LOCK_EXCLUSIVE
);
896 fsm_set_avail(page
, slot
, child_avail
);
897 MarkBufferDirtyHint(buf
, false);
898 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
903 /* Now get the maximum value on the page, to return to caller */
904 max_avail
= fsm_get_max_avail(page
);
907 * Reset the next slot pointer. This encourages the use of low-numbered
908 * pages, increasing the chances that a later vacuum can truncate the
909 * relation. We don't bother with a lock here, nor with marking the page
910 * dirty if it wasn't already, since this is just a hint.
912 ((FSMPage
) PageGetContents(page
))->fp_next_slot
= 0;
921 * Check whether a block number is past the end of the relation. This can
922 * happen after WAL replay, if the FSM reached disk but newly-extended pages
923 * it refers to did not.
926 fsm_does_block_exist(Relation rel
, BlockNumber blknumber
)
928 SMgrRelation smgr
= RelationGetSmgr(rel
);
931 * If below the cached nblocks, the block surely exists. Otherwise, we
932 * face a trade-off. We opt to compare to a fresh nblocks, incurring
933 * lseek() overhead. The alternative would be to assume the block does
934 * not exist, but that would cause FSM to set zero space available for
935 * blocks that main fork extension just recorded.
937 return ((BlockNumberIsValid(smgr
->smgr_cached_nblocks
[MAIN_FORKNUM
]) &&
938 blknumber
< smgr
->smgr_cached_nblocks
[MAIN_FORKNUM
]) ||
939 blknumber
< RelationGetNumberOfBlocks(rel
));