From 97cd1e801f283d951a35a739781ee6242ab40482 Mon Sep 17 00:00:00 2001 From: heikki Date: Fri, 31 Oct 2008 19:40:27 +0000 Subject: [PATCH] Update FSM on WAL replay. This is a bit limited; the FSM is only updated on non-full-page-image WAL records, and quite arbitrarily, only if there's less than 20% free space on the page after the insert/update (not on HOT updates, though). The 20% cutoff should avoid most of the overhead, when replaying a bulk insertion, for example, while ensuring that pages that are full are marked as full in the FSM. This is mostly to avoid the nasty worst case scenario, where you replay from a PITR archive, and the FSM information in the base backup is really out of date. If there was a lot of pages that the outdated FSM claims to have free space, but don't actually have any, the first unlucky inserter after the recovery would traverse through all those pages, just to find out that they're full. We didn't have this problem with the old FSM implementation, because we simply threw the FSM information away on a non-clean shutdown. --- src/backend/access/heap/heapam.c | 63 ++++++++++++++++++++++++++++--- src/backend/storage/freespace/freespace.c | 30 +++++++++++++++ src/include/storage/freespace.h | 2 + 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f6584e9b53..a9eabeb7be 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -54,6 +54,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "storage/bufmgr.h" +#include "storage/freespace.h" #include "storage/lmgr.h" #include "storage/procarray.h" #include "storage/smgr.h" @@ -4022,6 +4023,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) int nredirected; int ndead; int nunused; + Size freespace; if (record->xl_info & XLR_BKP_BLOCK_1) return; @@ -4053,6 +4055,8 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) nowunused, nunused, clean_move); + freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + /* * Note: we don't worry about updating the page's prunability hints. * At worst this will cause an extra prune cycle to occur soon. @@ -4062,6 +4066,15 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move) PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); + + /* + * Update the FSM as well. + * + * XXX: We don't get here if the page was restored from full page image. + * We don't bother to update the FSM in that case, it doesn't need to be + * totally accurate anyway. + */ + XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace); } static void @@ -4205,15 +4218,17 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) HeapTupleHeader htup; xl_heap_header xlhdr; uint32 newlen; + Size freespace; + BlockNumber blkno; if (record->xl_info & XLR_BKP_BLOCK_1) return; + blkno = ItemPointerGetBlockNumber(&(xlrec->target.tid)); + if (record->xl_info & XLOG_HEAP_INIT_PAGE) { - buffer = XLogReadBuffer(xlrec->target.node, - ItemPointerGetBlockNumber(&(xlrec->target.tid)), - true); + buffer = XLogReadBuffer(xlrec->target.node, blkno, true); Assert(BufferIsValid(buffer)); page = (Page) BufferGetPage(buffer); @@ -4221,9 +4236,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) } else { - buffer = XLogReadBuffer(xlrec->target.node, - ItemPointerGetBlockNumber(&(xlrec->target.tid)), - false); + buffer = XLogReadBuffer(xlrec->target.node, blkno, false); if (!BufferIsValid(buffer)) return; page = (Page) BufferGetPage(buffer); @@ -4261,10 +4274,25 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record) offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); if (offnum == InvalidOffsetNumber) elog(PANIC, "heap_insert_redo: failed to add tuple"); + + freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); + + /* + * If the page is running low on free space, update the FSM as well. + * Arbitrarily, our definition of "low" is less than 20%. We can't do + * much better than that without knowing the fill-factor for the table. + * + * XXX: We don't get here if the page was restored from full page image. + * We don't bother to update the FSM in that case, it doesn't need to be + * totally accurate anyway. + */ + if (freespace < BLCKSZ / 5) + XLogRecordPageWithFreeSpace(xlrec->target.node, blkno, freespace); } /* @@ -4289,6 +4317,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool move, bool hot_update) xl_heap_header xlhdr; int hsize; uint32 newlen; + Size freespace; if (record->xl_info & XLR_BKP_BLOCK_1) { @@ -4446,10 +4475,32 @@ newsame:; offnum = PageAddItem(page, (Item) htup, newlen, offnum, true, true); if (offnum == InvalidOffsetNumber) elog(PANIC, "heap_update_redo: failed to add tuple"); + + freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ + PageSetLSN(page, lsn); PageSetTLI(page, ThisTimeLineID); MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); + + /* + * If the page is running low on free space, update the FSM as well. + * Arbitrarily, our definition of "low" is less than 20%. We can't do + * much better than that without knowing the fill-factor for the table. + * + * However, don't update the FSM on HOT updates, because after crash + * recovery, either the old or the new tuple will certainly be dead and + * prunable. After pruning, the page will have roughly as much free space + * as it did before the update, assuming the new tuple is about the same + * size as the old one. + * + * XXX: We don't get here if the page was restored from full page image. + * We don't bother to update the FSM in that case, it doesn't need to be + * totally accurate anyway. + */ + if (!hot_update && freespace < BLCKSZ / 5) + XLogRecordPageWithFreeSpace(xlrec->target.node, + ItemPointerGetBlockNumber(&(xlrec->newtid)), freespace); } static void diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 171fe63af5..4949cf6d8e 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -203,6 +203,36 @@ RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail) } /* + * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in + * WAL replay + */ +void +XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, + Size spaceAvail) +{ + int new_cat = fsm_space_avail_to_cat(spaceAvail); + FSMAddress addr; + uint16 slot; + BlockNumber blkno; + Buffer buf; + Page page; + + /* Get the location of the FSM byte representing the heap block */ + addr = fsm_get_location(heapBlk, &slot); + blkno = fsm_logical_to_physical(addr); + + /* If the page doesn't exist already, extend */ + buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR); + page = BufferGetPage(buf); + if (PageIsNew(page)) + PageInit(page, BLCKSZ, 0); + + if (fsm_set_avail(page, slot, new_cat)) + MarkBufferDirty(buf); + UnlockReleaseBuffer(buf); +} + +/* * GetRecordedFreePage - return the amount of free space on a particular page, * according to the FSM. */ diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h index 7a1664f0ed..e17a8d5d2d 100644 --- a/src/include/storage/freespace.h +++ b/src/include/storage/freespace.h @@ -27,6 +27,8 @@ extern BlockNumber RecordAndGetPageWithFreeSpace(Relation rel, Size spaceNeeded); extern void RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail); +extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, + Size spaceAvail); extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks); extern void FreeSpaceMapVacuum(Relation rel); -- 2.11.4.GIT