1 /*-------------------------------------------------------------------------
4 * Concurrent ("lazy") vacuuming.
7 * The major space usage for LAZY VACUUM is storage for the array of dead
8 * tuple TIDs, with the next biggest need being storage for per-disk-page
9 * free space info. We want to ensure we can vacuum even the very largest
10 * relations with finite memory space usage. To do that, we set upper bounds
11 * on the number of tuples and pages we will keep track of at once.
13 * We are willing to use at most maintenance_work_mem memory space to keep
14 * track of dead tuples. We initially allocate an array of TIDs of that size,
15 * with an upper limit that depends on table size (this limit ensures we don't
16 * allocate a huge area uselessly for vacuuming small tables). If the array
17 * threatens to overflow, we suspend the heap scan phase and perform a pass of
18 * index cleanup and page compaction, then resume the heap scan with an empty
21 * We can limit the storage for page free space to MaxFSMPages entries,
22 * since that's the most the free space map will be willing to remember
23 * anyway. If the relation has fewer than that many pages with free space,
24 * life is easy: just build an array of per-page info. If it has more,
25 * we store the free space info as a heap ordered by amount of free space,
26 * so that we can discard the pages with least free space to ensure we never
27 * have more than MaxFSMPages entries in all. The surviving page entries
28 * are passed to the free space map at conclusion of the scan.
30 * If we're processing a table with no indexes, we can just vacuum each page
31 * as we go; there's no need to save up multiple tuples to minimize the number
32 * of index scans performed. So we don't use maintenance_work_mem memory for
33 * the TID array, just enough to hold as many heap tuples as fit on one page.
36 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
37 * Portions Copyright (c) 1994, Regents of the University of California
43 *-------------------------------------------------------------------------
49 #include "access/genam.h"
50 #include "access/heapam.h"
51 #include "access/transam.h"
52 #include "commands/dbcommands.h"
53 #include "commands/vacuum.h"
54 #include "miscadmin.h"
56 #include "postmaster/autovacuum.h"
57 #include "storage/bufmgr.h"
58 #include "storage/freespace.h"
59 #include "storage/lmgr.h"
60 #include "utils/lsyscache.h"
61 #include "utils/memutils.h"
62 #include "utils/pg_rusage.h"
63 #include "utils/tqual.h"
67 * Space/time tradeoff parameters: do these need to be user-tunable?
69 * To consider truncating the relation, we want there to be at least
70 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
71 * is less) potentially-freeable pages.
73 #define REL_TRUNCATE_MINIMUM 1000
74 #define REL_TRUNCATE_FRACTION 16
77 * Guesstimation of number of dead tuples per page. This is used to
78 * provide an upper limit to memory allocated when vacuuming small
81 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
83 typedef struct LVRelStats
85 /* hasindex = true means two-pass strategy; false means one-pass */
87 /* Overall statistics about rel */
88 BlockNumber rel_pages
;
90 BlockNumber pages_removed
;
91 double tuples_deleted
;
92 BlockNumber nonempty_pages
; /* actually, last nonempty page + 1 */
93 Size threshold
; /* minimum interesting free space */
94 /* List of TIDs of tuples we intend to delete */
95 /* NB: this list is ordered by TID address */
96 int num_dead_tuples
; /* current # of entries */
97 int max_dead_tuples
; /* # slots allocated in array */
98 ItemPointer dead_tuples
; /* array of ItemPointerData */
99 /* Array or heap of per-page info about free space */
100 /* We use a simple array until it fills up, then convert to heap */
101 bool fs_is_heap
; /* are we using heap organization? */
102 int num_free_pages
; /* current # of entries */
103 int max_free_pages
; /* # slots allocated in array */
104 FSMPageData
*free_pages
; /* array or heap of blkno/avail */
105 BlockNumber tot_free_pages
; /* total pages with >= threshold space */
110 /* A few variables that don't seem worth passing around as parameters */
111 static int elevel
= -1;
113 static TransactionId OldestXmin
;
114 static TransactionId FreezeLimit
;
116 static BufferAccessStrategy vac_strategy
;
119 /* non-export function prototypes */
120 static void lazy_scan_heap(Relation onerel
, LVRelStats
*vacrelstats
,
121 Relation
*Irel
, int nindexes
);
122 static void lazy_vacuum_heap(Relation onerel
, LVRelStats
*vacrelstats
);
123 static void lazy_vacuum_index(Relation indrel
,
124 IndexBulkDeleteResult
**stats
,
125 LVRelStats
*vacrelstats
);
126 static void lazy_cleanup_index(Relation indrel
,
127 IndexBulkDeleteResult
*stats
,
128 LVRelStats
*vacrelstats
);
129 static int lazy_vacuum_page(Relation onerel
, BlockNumber blkno
, Buffer buffer
,
130 int tupindex
, LVRelStats
*vacrelstats
);
131 static void lazy_truncate_heap(Relation onerel
, LVRelStats
*vacrelstats
);
132 static BlockNumber
count_nondeletable_pages(Relation onerel
,
133 LVRelStats
*vacrelstats
);
134 static void lazy_space_alloc(LVRelStats
*vacrelstats
, BlockNumber relblocks
);
135 static void lazy_record_dead_tuple(LVRelStats
*vacrelstats
,
136 ItemPointer itemptr
);
137 static void lazy_record_free_space(LVRelStats
*vacrelstats
,
138 BlockNumber page
, Size avail
);
139 static bool lazy_tid_reaped(ItemPointer itemptr
, void *state
);
140 static void lazy_update_fsm(Relation onerel
, LVRelStats
*vacrelstats
);
141 static int vac_cmp_itemptr(const void *left
, const void *right
);
142 static int vac_cmp_page_spaces(const void *left
, const void *right
);
146 * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
148 * This routine vacuums a single heap, cleans out its indexes, and
149 * updates its relpages and reltuples statistics.
151 * At entry, we have already established a transaction and opened
152 * and locked the relation.
155 lazy_vacuum_rel(Relation onerel
, VacuumStmt
*vacstmt
,
156 BufferAccessStrategy bstrategy
)
158 LVRelStats
*vacrelstats
;
161 BlockNumber possibly_freeable
;
163 TimestampTz starttime
= 0;
165 pg_rusage_init(&ru0
);
167 /* measure elapsed time iff autovacuum logging requires it */
168 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration
> 0)
169 starttime
= GetCurrentTimestamp();
171 if (vacstmt
->verbose
)
176 vac_strategy
= bstrategy
;
178 vacuum_set_xid_limits(vacstmt
->freeze_min_age
, onerel
->rd_rel
->relisshared
,
179 &OldestXmin
, &FreezeLimit
);
181 vacrelstats
= (LVRelStats
*) palloc0(sizeof(LVRelStats
));
183 /* Set threshold for interesting free space = average request size */
184 /* XXX should we scale it up or down? Adjust vacuum.c too, if so */
185 vacrelstats
->threshold
= GetAvgFSMRequestSize(&onerel
->rd_node
);
187 vacrelstats
->num_index_scans
= 0;
189 /* Open all indexes of the relation */
190 vac_open_indexes(onerel
, RowExclusiveLock
, &nindexes
, &Irel
);
191 vacrelstats
->hasindex
= (nindexes
> 0);
193 /* Do the vacuuming */
194 lazy_scan_heap(onerel
, vacrelstats
, Irel
, nindexes
);
196 /* Done with indexes */
197 vac_close_indexes(nindexes
, Irel
, NoLock
);
200 * Optionally truncate the relation.
202 * Don't even think about it unless we have a shot at releasing a goodly
203 * number of pages. Otherwise, the time taken isn't worth it.
205 possibly_freeable
= vacrelstats
->rel_pages
- vacrelstats
->nonempty_pages
;
206 if (possibly_freeable
>= REL_TRUNCATE_MINIMUM
||
207 possibly_freeable
>= vacrelstats
->rel_pages
/ REL_TRUNCATE_FRACTION
)
208 lazy_truncate_heap(onerel
, vacrelstats
);
210 /* Update shared free space map with final free space info */
211 lazy_update_fsm(onerel
, vacrelstats
);
213 if (vacrelstats
->tot_free_pages
> MaxFSMPages
)
215 (errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
216 get_namespace_name(RelationGetNamespace(onerel
)),
217 RelationGetRelationName(onerel
)),
218 /* Only suggest VACUUM FULL if > 20% free */
219 (vacrelstats
->tot_free_pages
> vacrelstats
->rel_pages
* 0.20) ?
220 errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
221 errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));
223 /* Update statistics in pg_class */
224 vac_update_relstats(RelationGetRelid(onerel
),
225 vacrelstats
->rel_pages
,
226 vacrelstats
->rel_tuples
,
227 vacrelstats
->hasindex
,
230 /* report results to the stats collector, too */
231 pgstat_report_vacuum(RelationGetRelid(onerel
), onerel
->rd_rel
->relisshared
,
232 vacstmt
->analyze
, vacrelstats
->rel_tuples
);
234 /* and log the action if appropriate */
235 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration
>= 0)
237 if (Log_autovacuum_min_duration
== 0 ||
238 TimestampDifferenceExceeds(starttime
, GetCurrentTimestamp(),
239 Log_autovacuum_min_duration
))
241 (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
242 "pages: %d removed, %d remain\n"
243 "tuples: %.0f removed, %.0f remain\n"
245 get_database_name(MyDatabaseId
),
246 get_namespace_name(RelationGetNamespace(onerel
)),
247 RelationGetRelationName(onerel
),
248 vacrelstats
->num_index_scans
,
249 vacrelstats
->pages_removed
, vacrelstats
->rel_pages
,
250 vacrelstats
->tuples_deleted
, vacrelstats
->rel_tuples
,
251 pg_rusage_show(&ru0
))));
257 * lazy_scan_heap() -- scan an open heap relation
259 * This routine sets commit status bits, builds lists of dead tuples
260 * and pages with free space, and calculates statistics on the number
261 * of live tuples in the heap. When done, or when we run low on space
262 * for dead-tuple TIDs, invoke vacuuming of indexes and heap.
264 * If there are no indexes then we just vacuum each dirty page as we
265 * process it, since there's no point in gathering many tuples.
268 lazy_scan_heap(Relation onerel
, LVRelStats
*vacrelstats
,
269 Relation
*Irel
, int nindexes
)
275 BlockNumber empty_pages
,
281 IndexBulkDeleteResult
**indstats
;
285 pg_rusage_init(&ru0
);
287 relname
= RelationGetRelationName(onerel
);
289 (errmsg("vacuuming \"%s.%s\"",
290 get_namespace_name(RelationGetNamespace(onerel
)),
293 empty_pages
= vacuumed_pages
= 0;
294 num_tuples
= tups_vacuumed
= nkeep
= nunused
= 0;
296 indstats
= (IndexBulkDeleteResult
**)
297 palloc0(nindexes
* sizeof(IndexBulkDeleteResult
*));
299 nblocks
= RelationGetNumberOfBlocks(onerel
);
300 vacrelstats
->rel_pages
= nblocks
;
301 vacrelstats
->nonempty_pages
= 0;
303 lazy_space_alloc(vacrelstats
, nblocks
);
305 for (blkno
= 0; blkno
< nblocks
; blkno
++)
314 OffsetNumber frozen
[MaxOffsetNumber
];
317 vacuum_delay_point();
320 * If we are close to overrunning the available space for dead-tuple
321 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
323 if ((vacrelstats
->max_dead_tuples
- vacrelstats
->num_dead_tuples
) < MaxHeapTuplesPerPage
&&
324 vacrelstats
->num_dead_tuples
> 0)
326 /* Remove index entries */
327 for (i
= 0; i
< nindexes
; i
++)
328 lazy_vacuum_index(Irel
[i
],
331 /* Remove tuples from heap */
332 lazy_vacuum_heap(onerel
, vacrelstats
);
333 /* Forget the now-vacuumed tuples, and press on */
334 vacrelstats
->num_dead_tuples
= 0;
335 vacrelstats
->num_index_scans
++;
338 buf
= ReadBufferWithStrategy(onerel
, blkno
, vac_strategy
);
340 /* We need buffer cleanup lock so that we can prune HOT chains. */
341 LockBufferForCleanup(buf
);
343 page
= BufferGetPage(buf
);
348 * An all-zeroes page could be left over if a backend extends the
349 * relation but crashes before initializing the page. Reclaim such
352 * We have to be careful here because we could be looking at a
353 * page that someone has just added to the relation and not yet
354 * been able to initialize (see RelationGetBufferForTuple). To
355 * protect against that, release the buffer lock, grab the
356 * relation extension lock momentarily, and re-lock the buffer. If
357 * the page is still uninitialized by then, it must be left over
358 * from a crashed backend, and we can initialize it.
360 * We don't really need the relation lock when this is a new or
361 * temp relation, but it's probably not worth the code space to
362 * check that, since this surely isn't a critical path.
364 * Note: the comparable code in vacuum.c need not worry because
365 * it's got exclusive lock on the whole relation.
367 LockBuffer(buf
, BUFFER_LOCK_UNLOCK
);
368 LockRelationForExtension(onerel
, ExclusiveLock
);
369 UnlockRelationForExtension(onerel
, ExclusiveLock
);
370 LockBufferForCleanup(buf
);
374 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
376 PageInit(page
, BufferGetPageSize(buf
), 0);
378 lazy_record_free_space(vacrelstats
, blkno
,
379 PageGetHeapFreeSpace(page
));
381 MarkBufferDirty(buf
);
382 UnlockReleaseBuffer(buf
);
386 if (PageIsEmpty(page
))
389 lazy_record_free_space(vacrelstats
, blkno
,
390 PageGetHeapFreeSpace(page
));
391 UnlockReleaseBuffer(buf
);
396 * Prune all HOT-update chains in this page.
398 * We count tuples removed by the pruning step as removed by VACUUM.
400 tups_vacuumed
+= heap_page_prune(onerel
, buf
, OldestXmin
,
404 * Now scan the page to collect vacuumable items and check for tuples
405 * requiring freezing.
409 prev_dead_count
= vacrelstats
->num_dead_tuples
;
410 maxoff
= PageGetMaxOffsetNumber(page
);
411 for (offnum
= FirstOffsetNumber
;
413 offnum
= OffsetNumberNext(offnum
))
417 itemid
= PageGetItemId(page
, offnum
);
419 /* Unused items require no processing, but we count 'em */
420 if (!ItemIdIsUsed(itemid
))
426 /* Redirect items mustn't be touched */
427 if (ItemIdIsRedirected(itemid
))
429 hastup
= true; /* this page won't be truncatable */
433 ItemPointerSet(&(tuple
.t_self
), blkno
, offnum
);
436 * DEAD item pointers are to be vacuumed normally; but we don't
437 * count them in tups_vacuumed, else we'd be double-counting (at
438 * least in the common case where heap_page_prune() just freed up
441 if (ItemIdIsDead(itemid
))
443 lazy_record_dead_tuple(vacrelstats
, &(tuple
.t_self
));
447 Assert(ItemIdIsNormal(itemid
));
449 tuple
.t_data
= (HeapTupleHeader
) PageGetItem(page
, itemid
);
450 tuple
.t_len
= ItemIdGetLength(itemid
);
454 switch (HeapTupleSatisfiesVacuum(tuple
.t_data
, OldestXmin
, buf
))
459 * Ordinarily, DEAD tuples would have been removed by
460 * heap_page_prune(), but it's possible that the tuple
461 * state changed since heap_page_prune() looked. In
462 * particular an INSERT_IN_PROGRESS tuple could have
463 * changed to DEAD if the inserter aborted. So this
464 * cannot be considered an error condition.
466 * If the tuple is HOT-updated then it must only be
467 * removed by a prune operation; so we keep it just as if
468 * it were RECENTLY_DEAD. Also, if it's a heap-only
469 * tuple, we choose to keep it, because it'll be a lot
470 * cheaper to get rid of it in the next pruning pass than
471 * to treat it like an indexed tuple.
473 if (HeapTupleIsHotUpdated(&tuple
) ||
474 HeapTupleIsHeapOnly(&tuple
))
477 tupgone
= true; /* we can delete the tuple */
480 /* Tuple is good --- but let's do some validity checks */
481 if (onerel
->rd_rel
->relhasoids
&&
482 !OidIsValid(HeapTupleGetOid(&tuple
)))
483 elog(WARNING
, "relation \"%s\" TID %u/%u: OID is invalid",
484 relname
, blkno
, offnum
);
486 case HEAPTUPLE_RECENTLY_DEAD
:
489 * If tuple is recently deleted then we must not remove it
494 case HEAPTUPLE_INSERT_IN_PROGRESS
:
495 /* This is an expected case during concurrent vacuum */
497 case HEAPTUPLE_DELETE_IN_PROGRESS
:
498 /* This is an expected case during concurrent vacuum */
501 elog(ERROR
, "unexpected HeapTupleSatisfiesVacuum result");
507 lazy_record_dead_tuple(vacrelstats
, &(tuple
.t_self
));
516 * Each non-removable tuple must be checked to see if it needs
517 * freezing. Note we already have exclusive buffer lock.
519 if (heap_freeze_tuple(tuple
.t_data
, FreezeLimit
,
521 frozen
[nfrozen
++] = offnum
;
523 } /* scan along page */
526 * If we froze any tuples, mark the buffer dirty, and write a WAL
527 * record recording the changes. We must log the changes to be
528 * crash-safe against future truncation of CLOG.
532 MarkBufferDirty(buf
);
533 /* no XLOG for temp tables, though */
534 if (!onerel
->rd_istemp
)
538 recptr
= log_heap_freeze(onerel
, buf
, FreezeLimit
,
540 PageSetLSN(page
, recptr
);
541 PageSetTLI(page
, ThisTimeLineID
);
546 * If there are no indexes then we can vacuum the page right now
547 * instead of doing a second scan.
550 vacrelstats
->num_dead_tuples
> 0)
552 /* Remove tuples from heap */
553 lazy_vacuum_page(onerel
, blkno
, buf
, 0, vacrelstats
);
554 /* Forget the now-vacuumed tuples, and press on */
555 vacrelstats
->num_dead_tuples
= 0;
560 * If we remembered any tuples for deletion, then the page will be
561 * visited again by lazy_vacuum_heap, which will compute and record
562 * its post-compaction free space. If not, then we're done with this
563 * page, so remember its free space as-is. (This path will always be
564 * taken if there are no indexes.)
566 if (vacrelstats
->num_dead_tuples
== prev_dead_count
)
568 lazy_record_free_space(vacrelstats
, blkno
,
569 PageGetHeapFreeSpace(page
));
572 /* Remember the location of the last page with nonremovable tuples */
574 vacrelstats
->nonempty_pages
= blkno
+ 1;
576 UnlockReleaseBuffer(buf
);
579 /* save stats for use later */
580 vacrelstats
->rel_tuples
= num_tuples
;
581 vacrelstats
->tuples_deleted
= tups_vacuumed
;
583 /* If any tuples need to be deleted, perform final vacuum cycle */
584 /* XXX put a threshold on min number of tuples here? */
585 if (vacrelstats
->num_dead_tuples
> 0)
587 /* Remove index entries */
588 for (i
= 0; i
< nindexes
; i
++)
589 lazy_vacuum_index(Irel
[i
],
592 /* Remove tuples from heap */
593 lazy_vacuum_heap(onerel
, vacrelstats
);
594 vacrelstats
->num_index_scans
++;
597 /* Do post-vacuum cleanup and statistics update for each index */
598 for (i
= 0; i
< nindexes
; i
++)
599 lazy_cleanup_index(Irel
[i
], indstats
[i
], vacrelstats
);
601 /* If no indexes, make log report that lazy_vacuum_heap would've made */
604 (errmsg("\"%s\": removed %.0f row versions in %u pages",
605 RelationGetRelationName(onerel
),
606 tups_vacuumed
, vacuumed_pages
)));
609 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
610 RelationGetRelationName(onerel
),
611 tups_vacuumed
, num_tuples
, nblocks
),
612 errdetail("%.0f dead row versions cannot be removed yet.\n"
613 "There were %.0f unused item pointers.\n"
614 "%u pages contain useful free space.\n"
615 "%u pages are entirely empty.\n"
619 vacrelstats
->tot_free_pages
,
621 pg_rusage_show(&ru0
))));
626 * lazy_vacuum_heap() -- second pass over the heap
628 * This routine marks dead tuples as unused and compacts out free
629 * space on their pages. Pages not having dead tuples recorded from
630 * lazy_scan_heap are not visited at all.
632 * Note: the reason for doing this as a second pass is we cannot remove
633 * the tuples until we've removed their index entries, and we want to
634 * process index entry removal in batches as large as possible.
637 lazy_vacuum_heap(Relation onerel
, LVRelStats
*vacrelstats
)
643 pg_rusage_init(&ru0
);
647 while (tupindex
< vacrelstats
->num_dead_tuples
)
653 vacuum_delay_point();
655 tblk
= ItemPointerGetBlockNumber(&vacrelstats
->dead_tuples
[tupindex
]);
656 buf
= ReadBufferWithStrategy(onerel
, tblk
, vac_strategy
);
657 LockBufferForCleanup(buf
);
658 tupindex
= lazy_vacuum_page(onerel
, tblk
, buf
, tupindex
, vacrelstats
);
659 /* Now that we've compacted the page, record its available space */
660 page
= BufferGetPage(buf
);
661 lazy_record_free_space(vacrelstats
, tblk
,
662 PageGetHeapFreeSpace(page
));
663 UnlockReleaseBuffer(buf
);
668 (errmsg("\"%s\": removed %d row versions in %d pages",
669 RelationGetRelationName(onerel
),
672 pg_rusage_show(&ru0
))));
676 * lazy_vacuum_page() -- free dead tuples on a page
677 * and repair its fragmentation.
679 * Caller must hold pin and buffer cleanup lock on the buffer.
681 * tupindex is the index in vacrelstats->dead_tuples of the first dead
682 * tuple for this page. We assume the rest follow sequentially.
683 * The return value is the first tupindex after the tuples of this page.
686 lazy_vacuum_page(Relation onerel
, BlockNumber blkno
, Buffer buffer
,
687 int tupindex
, LVRelStats
*vacrelstats
)
689 Page page
= BufferGetPage(buffer
);
690 OffsetNumber unused
[MaxOffsetNumber
];
693 START_CRIT_SECTION();
695 for (; tupindex
< vacrelstats
->num_dead_tuples
; tupindex
++)
701 tblk
= ItemPointerGetBlockNumber(&vacrelstats
->dead_tuples
[tupindex
]);
703 break; /* past end of tuples for this block */
704 toff
= ItemPointerGetOffsetNumber(&vacrelstats
->dead_tuples
[tupindex
]);
705 itemid
= PageGetItemId(page
, toff
);
706 ItemIdSetUnused(itemid
);
707 unused
[uncnt
++] = toff
;
710 PageRepairFragmentation(page
);
712 MarkBufferDirty(buffer
);
715 if (!onerel
->rd_istemp
)
719 recptr
= log_heap_clean(onerel
, buffer
,
723 PageSetLSN(page
, recptr
);
724 PageSetTLI(page
, ThisTimeLineID
);
733 * lazy_vacuum_index() -- vacuum one index relation.
735 * Delete all the index entries pointing to tuples listed in
736 * vacrelstats->dead_tuples, and update running statistics.
739 lazy_vacuum_index(Relation indrel
,
740 IndexBulkDeleteResult
**stats
,
741 LVRelStats
*vacrelstats
)
743 IndexVacuumInfo ivinfo
;
746 pg_rusage_init(&ru0
);
748 ivinfo
.index
= indrel
;
749 ivinfo
.vacuum_full
= false;
750 ivinfo
.message_level
= elevel
;
751 /* We don't yet know rel_tuples, so pass -1 */
752 ivinfo
.num_heap_tuples
= -1;
753 ivinfo
.strategy
= vac_strategy
;
755 /* Do bulk deletion */
756 *stats
= index_bulk_delete(&ivinfo
, *stats
,
757 lazy_tid_reaped
, (void *) vacrelstats
);
760 (errmsg("scanned index \"%s\" to remove %d row versions",
761 RelationGetRelationName(indrel
),
762 vacrelstats
->num_dead_tuples
),
763 errdetail("%s.", pg_rusage_show(&ru0
))));
767 * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
770 lazy_cleanup_index(Relation indrel
,
771 IndexBulkDeleteResult
*stats
,
772 LVRelStats
*vacrelstats
)
774 IndexVacuumInfo ivinfo
;
777 pg_rusage_init(&ru0
);
779 ivinfo
.index
= indrel
;
780 ivinfo
.vacuum_full
= false;
781 ivinfo
.message_level
= elevel
;
782 ivinfo
.num_heap_tuples
= vacrelstats
->rel_tuples
;
783 ivinfo
.strategy
= vac_strategy
;
785 stats
= index_vacuum_cleanup(&ivinfo
, stats
);
790 /* now update statistics in pg_class */
791 vac_update_relstats(RelationGetRelid(indrel
),
793 stats
->num_index_tuples
,
794 false, InvalidTransactionId
);
797 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
798 RelationGetRelationName(indrel
),
799 stats
->num_index_tuples
,
801 errdetail("%.0f index row versions were removed.\n"
802 "%u index pages have been deleted, %u are currently reusable.\n"
804 stats
->tuples_removed
,
805 stats
->pages_deleted
, stats
->pages_free
,
806 pg_rusage_show(&ru0
))));
812 * lazy_truncate_heap - try to truncate off any empty pages at the end
815 lazy_truncate_heap(Relation onerel
, LVRelStats
*vacrelstats
)
817 BlockNumber old_rel_pages
= vacrelstats
->rel_pages
;
818 BlockNumber new_rel_pages
;
819 FSMPageData
*pageSpaces
;
825 pg_rusage_init(&ru0
);
828 * We need full exclusive lock on the relation in order to do truncation.
829 * If we can't get it, give up rather than waiting --- we don't want to
830 * block other backends, and we don't want to deadlock (which is quite
831 * possible considering we already hold a lower-grade lock).
833 if (!ConditionalLockRelation(onerel
, AccessExclusiveLock
))
837 * Now that we have exclusive lock, look to see if the rel has grown
838 * whilst we were vacuuming with non-exclusive lock. If so, give up; the
839 * newly added pages presumably contain non-deletable tuples.
841 new_rel_pages
= RelationGetNumberOfBlocks(onerel
);
842 if (new_rel_pages
!= old_rel_pages
)
844 /* might as well use the latest news when we update pg_class stats */
845 vacrelstats
->rel_pages
= new_rel_pages
;
846 UnlockRelation(onerel
, AccessExclusiveLock
);
851 * Scan backwards from the end to verify that the end pages actually
852 * contain no tuples. This is *necessary*, not optional, because other
853 * backends could have added tuples to these pages whilst we were
856 new_rel_pages
= count_nondeletable_pages(onerel
, vacrelstats
);
858 if (new_rel_pages
>= old_rel_pages
)
860 /* can't do anything after all */
861 UnlockRelation(onerel
, AccessExclusiveLock
);
868 RelationTruncate(onerel
, new_rel_pages
);
871 * Note: once we have truncated, we *must* keep the exclusive lock until
872 * commit. The sinval message that will be sent at commit (as a result of
873 * vac_update_relstats()) must be received by other backends, to cause
874 * them to reset their rd_targblock values, before they can safely access
879 * Drop free-space info for removed blocks; these must not get entered
882 pageSpaces
= vacrelstats
->free_pages
;
883 n
= vacrelstats
->num_free_pages
;
885 for (i
= 0; i
< n
; i
++)
887 if (FSMPageGetPageNum(&pageSpaces
[i
]) < new_rel_pages
)
889 pageSpaces
[j
] = pageSpaces
[i
];
893 vacrelstats
->num_free_pages
= j
;
896 * If tot_free_pages was more than num_free_pages, we can't tell for sure
897 * what its correct value is now, because we don't know which of the
898 * forgotten pages are getting truncated. Conservatively set it equal to
901 vacrelstats
->tot_free_pages
= j
;
903 /* We destroyed the heap ordering, so mark array unordered */
904 vacrelstats
->fs_is_heap
= false;
906 /* update statistics */
907 vacrelstats
->rel_pages
= new_rel_pages
;
908 vacrelstats
->pages_removed
= old_rel_pages
- new_rel_pages
;
911 (errmsg("\"%s\": truncated %u to %u pages",
912 RelationGetRelationName(onerel
),
913 old_rel_pages
, new_rel_pages
),
915 pg_rusage_show(&ru0
))));
919 * Rescan end pages to verify that they are (still) empty of tuples.
921 * Returns number of nondeletable pages (last nonempty page + 1).
924 count_nondeletable_pages(Relation onerel
, LVRelStats
*vacrelstats
)
928 /* Strange coding of loop control is needed because blkno is unsigned */
929 blkno
= vacrelstats
->rel_pages
;
930 while (blkno
> vacrelstats
->nonempty_pages
)
939 * We don't insert a vacuum delay point here, because we have an
940 * exclusive lock on the table which we want to hold for as short a
941 * time as possible. We still need to check for interrupts however.
943 CHECK_FOR_INTERRUPTS();
947 buf
= ReadBufferWithStrategy(onerel
, blkno
, vac_strategy
);
949 /* In this phase we only need shared access to the buffer */
950 LockBuffer(buf
, BUFFER_LOCK_SHARE
);
952 page
= BufferGetPage(buf
);
954 if (PageIsNew(page
) || PageIsEmpty(page
))
956 /* PageIsNew probably shouldn't happen... */
957 UnlockReleaseBuffer(buf
);
962 maxoff
= PageGetMaxOffsetNumber(page
);
963 for (offnum
= FirstOffsetNumber
;
965 offnum
= OffsetNumberNext(offnum
))
969 itemid
= PageGetItemId(page
, offnum
);
972 * Note: any non-unused item should be taken as a reason to keep
973 * this page. We formerly thought that DEAD tuples could be
974 * thrown away, but that's not so, because we'd not have cleaned
975 * out their index entries.
977 if (ItemIdIsUsed(itemid
))
980 break; /* can stop scanning */
982 } /* scan along page */
984 UnlockReleaseBuffer(buf
);
986 /* Done scanning if we found a tuple here */
992 * If we fall out of the loop, all the previously-thought-to-be-empty
993 * pages still are; we need not bother to look at the last known-nonempty
996 return vacrelstats
->nonempty_pages
;
1000 * lazy_space_alloc - space allocation decisions for lazy vacuum
1002 * See the comments at the head of this file for rationale.
1005 lazy_space_alloc(LVRelStats
*vacrelstats
, BlockNumber relblocks
)
1010 if (vacrelstats
->hasindex
)
1012 maxtuples
= (maintenance_work_mem
* 1024L) / sizeof(ItemPointerData
);
1013 maxtuples
= Min(maxtuples
, INT_MAX
);
1014 maxtuples
= Min(maxtuples
, MaxAllocSize
/ sizeof(ItemPointerData
));
1016 /* curious coding here to ensure the multiplication can't overflow */
1017 if ((BlockNumber
) (maxtuples
/ LAZY_ALLOC_TUPLES
) > relblocks
)
1018 maxtuples
= relblocks
* LAZY_ALLOC_TUPLES
;
1020 /* stay sane if small maintenance_work_mem */
1021 maxtuples
= Max(maxtuples
, MaxHeapTuplesPerPage
);
1025 maxtuples
= MaxHeapTuplesPerPage
;
1028 vacrelstats
->num_dead_tuples
= 0;
1029 vacrelstats
->max_dead_tuples
= (int) maxtuples
;
1030 vacrelstats
->dead_tuples
= (ItemPointer
)
1031 palloc(maxtuples
* sizeof(ItemPointerData
));
1033 maxpages
= MaxFSMPages
;
1034 maxpages
= Min(maxpages
, MaxAllocSize
/ sizeof(FSMPageData
));
1035 /* No need to allocate more pages than the relation has blocks */
1036 if (relblocks
< (BlockNumber
) maxpages
)
1037 maxpages
= (int) relblocks
;
1039 vacrelstats
->fs_is_heap
= false;
1040 vacrelstats
->num_free_pages
= 0;
1041 vacrelstats
->max_free_pages
= maxpages
;
1042 vacrelstats
->free_pages
= (FSMPageData
*)
1043 palloc(maxpages
* sizeof(FSMPageData
));
1044 vacrelstats
->tot_free_pages
= 0;
1048 * lazy_record_dead_tuple - remember one deletable tuple
1051 lazy_record_dead_tuple(LVRelStats
*vacrelstats
,
1052 ItemPointer itemptr
)
1055 * The array shouldn't overflow under normal behavior, but perhaps it
1056 * could if we are given a really small maintenance_work_mem. In that
1057 * case, just forget the last few tuples (we'll get 'em next time).
1059 if (vacrelstats
->num_dead_tuples
< vacrelstats
->max_dead_tuples
)
1061 vacrelstats
->dead_tuples
[vacrelstats
->num_dead_tuples
] = *itemptr
;
1062 vacrelstats
->num_dead_tuples
++;
1067 * lazy_record_free_space - remember free space on one page
1070 lazy_record_free_space(LVRelStats
*vacrelstats
,
1074 FSMPageData
*pageSpaces
;
1078 * A page with less than stats->threshold free space will be forgotten
1079 * immediately, and never passed to the free space map. Removing the
1080 * uselessly small entries early saves cycles, and in particular reduces
1081 * the amount of time we spend holding the FSM lock when we finally call
1082 * RecordRelationFreeSpace. Since the FSM will probably drop pages with
1083 * little free space anyway, there's no point in making this really small.
1085 * XXX Is it worth trying to measure average tuple size, and using that to
1086 * adjust the threshold? Would be worthwhile if FSM has no stats yet for
1087 * this relation. But changing the threshold as we scan the rel might
1088 * lead to bizarre behavior, too. Also, it's probably better if vacuum.c
1089 * has the same thresholding behavior as we do here.
1091 if (avail
< vacrelstats
->threshold
)
1094 /* Count all pages over threshold, even if not enough space in array */
1095 vacrelstats
->tot_free_pages
++;
1097 /* Copy pointers to local variables for notational simplicity */
1098 pageSpaces
= vacrelstats
->free_pages
;
1099 n
= vacrelstats
->max_free_pages
;
1101 /* If we haven't filled the array yet, just keep adding entries */
1102 if (vacrelstats
->num_free_pages
< n
)
1104 FSMPageSetPageNum(&pageSpaces
[vacrelstats
->num_free_pages
], page
);
1105 FSMPageSetSpace(&pageSpaces
[vacrelstats
->num_free_pages
], avail
);
1106 vacrelstats
->num_free_pages
++;
1111 * The rest of this routine works with "heap" organization of the
1112 * free space arrays, wherein we maintain the heap property
1113 * avail[(j-1) div 2] <= avail[j] for 0 < j < n.
1114 * In particular, the zero'th element always has the smallest available
1115 * space and can be discarded to make room for a new page with more space.
1116 * See Knuth's discussion of heap-based priority queues, sec 5.2.3;
1117 * but note he uses 1-origin array subscripts, not 0-origin.
1121 /* If we haven't yet converted the array to heap organization, do it */
1122 if (!vacrelstats
->fs_is_heap
)
1125 * Scan backwards through the array, "sift-up" each value into its
1126 * correct position. We can start the scan at n/2-1 since each entry
1127 * above that position has no children to worry about.
1133 BlockNumber R
= FSMPageGetPageNum(&pageSpaces
[l
]);
1134 Size K
= FSMPageGetSpace(&pageSpaces
[l
]);
1135 int i
; /* i is where the "hole" is */
1144 if (j
+ 1 < n
&& FSMPageGetSpace(&pageSpaces
[j
]) > FSMPageGetSpace(&pageSpaces
[j
+ 1]))
1146 if (K
<= FSMPageGetSpace(&pageSpaces
[j
]))
1148 pageSpaces
[i
] = pageSpaces
[j
];
1151 FSMPageSetPageNum(&pageSpaces
[i
], R
);
1152 FSMPageSetSpace(&pageSpaces
[i
], K
);
1155 vacrelstats
->fs_is_heap
= true;
1158 /* If new page has more than zero'th entry, insert it into heap */
1159 if (avail
> FSMPageGetSpace(&pageSpaces
[0]))
1162 * Notionally, we replace the zero'th entry with the new data, and
1163 * then sift-up to maintain the heap property. Physically, the new
1164 * data doesn't get stored into the arrays until we find the right
1167 int i
= 0; /* i is where the "hole" is */
1175 if (j
+ 1 < n
&& FSMPageGetSpace(&pageSpaces
[j
]) > FSMPageGetSpace(&pageSpaces
[j
+ 1]))
1177 if (avail
<= FSMPageGetSpace(&pageSpaces
[j
]))
1179 pageSpaces
[i
] = pageSpaces
[j
];
1182 FSMPageSetPageNum(&pageSpaces
[i
], page
);
1183 FSMPageSetSpace(&pageSpaces
[i
], avail
);
1188 * lazy_tid_reaped() -- is a particular tid deletable?
1190 * This has the right signature to be an IndexBulkDeleteCallback.
1192 * Assumes dead_tuples array is in sorted order.
1195 lazy_tid_reaped(ItemPointer itemptr
, void *state
)
1197 LVRelStats
*vacrelstats
= (LVRelStats
*) state
;
1200 res
= (ItemPointer
) bsearch((void *) itemptr
,
1201 (void *) vacrelstats
->dead_tuples
,
1202 vacrelstats
->num_dead_tuples
,
1203 sizeof(ItemPointerData
),
1206 return (res
!= NULL
);
1210 * Update the shared Free Space Map with the info we now have about
1211 * free space in the relation, discarding any old info the map may have.
1214 lazy_update_fsm(Relation onerel
, LVRelStats
*vacrelstats
)
1216 FSMPageData
*pageSpaces
= vacrelstats
->free_pages
;
1217 int nPages
= vacrelstats
->num_free_pages
;
1220 * Sort data into order, as required by RecordRelationFreeSpace.
1223 qsort(pageSpaces
, nPages
, sizeof(FSMPageData
),
1224 vac_cmp_page_spaces
);
1226 RecordRelationFreeSpace(&onerel
->rd_node
, vacrelstats
->tot_free_pages
,
1227 nPages
, pageSpaces
);
1231 * Comparator routines for use with qsort() and bsearch().
1234 vac_cmp_itemptr(const void *left
, const void *right
)
1241 lblk
= ItemPointerGetBlockNumber((ItemPointer
) left
);
1242 rblk
= ItemPointerGetBlockNumber((ItemPointer
) right
);
1249 loff
= ItemPointerGetOffsetNumber((ItemPointer
) left
);
1250 roff
= ItemPointerGetOffsetNumber((ItemPointer
) right
);
1261 vac_cmp_page_spaces(const void *left
, const void *right
)
1263 FSMPageData
*linfo
= (FSMPageData
*) left
;
1264 FSMPageData
*rinfo
= (FSMPageData
*) right
;
1265 BlockNumber lblkno
= FSMPageGetPageNum(linfo
);
1266 BlockNumber rblkno
= FSMPageGetPageNum(rinfo
);
1268 if (lblkno
< rblkno
)
1270 else if (lblkno
> rblkno
)