Force a checkpoint in CREATE DATABASE before starting to copy the files,
[PostgreSQL.git] / src / backend / commands / vacuumlazy.c
blob77754be1b4ccef92da300da9209ace9d461b7418
1 /*-------------------------------------------------------------------------
3 * vacuumlazy.c
4 * Concurrent ("lazy") vacuuming.
7 * The major space usage for LAZY VACUUM is storage for the array of dead
8 * tuple TIDs, with the next biggest need being storage for per-disk-page
9 * free space info. We want to ensure we can vacuum even the very largest
10 * relations with finite memory space usage. To do that, we set upper bounds
11 * on the number of tuples and pages we will keep track of at once.
13 * We are willing to use at most maintenance_work_mem memory space to keep
14 * track of dead tuples. We initially allocate an array of TIDs of that size,
15 * with an upper limit that depends on table size (this limit ensures we don't
16 * allocate a huge area uselessly for vacuuming small tables). If the array
17 * threatens to overflow, we suspend the heap scan phase and perform a pass of
18 * index cleanup and page compaction, then resume the heap scan with an empty
19 * TID array.
21 * If we're processing a table with no indexes, we can just vacuum each page
22 * as we go; there's no need to save up multiple tuples to minimize the number
23 * of index scans performed. So we don't use maintenance_work_mem memory for
24 * the TID array, just enough to hold as many heap tuples as fit on one page.
27 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
28 * Portions Copyright (c) 1994, Regents of the University of California
31 * IDENTIFICATION
32 * $PostgreSQL$
34 *-------------------------------------------------------------------------
36 #include "postgres.h"
38 #include <math.h>
40 #include "access/genam.h"
41 #include "access/heapam.h"
42 #include "access/transam.h"
43 #include "commands/dbcommands.h"
44 #include "commands/vacuum.h"
45 #include "miscadmin.h"
46 #include "pgstat.h"
47 #include "postmaster/autovacuum.h"
48 #include "storage/bufmgr.h"
49 #include "storage/freespace.h"
50 #include "storage/lmgr.h"
51 #include "utils/lsyscache.h"
52 #include "utils/memutils.h"
53 #include "utils/pg_rusage.h"
54 #include "utils/tqual.h"
58 * Space/time tradeoff parameters: do these need to be user-tunable?
60 * To consider truncating the relation, we want there to be at least
61 * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
62 * is less) potentially-freeable pages.
64 #define REL_TRUNCATE_MINIMUM 1000
65 #define REL_TRUNCATE_FRACTION 16
68 * Guesstimation of number of dead tuples per page. This is used to
69 * provide an upper limit to memory allocated when vacuuming small
70 * tables.
72 #define LAZY_ALLOC_TUPLES MaxHeapTuplesPerPage
74 typedef struct LVRelStats
76 /* hasindex = true means two-pass strategy; false means one-pass */
77 bool hasindex;
78 /* Overall statistics about rel */
79 BlockNumber rel_pages;
80 double rel_tuples;
81 BlockNumber pages_removed;
82 double tuples_deleted;
83 BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
84 /* List of TIDs of tuples we intend to delete */
85 /* NB: this list is ordered by TID address */
86 int num_dead_tuples; /* current # of entries */
87 int max_dead_tuples; /* # slots allocated in array */
88 ItemPointer dead_tuples; /* array of ItemPointerData */
89 int num_index_scans;
90 } LVRelStats;
93 /* A few variables that don't seem worth passing around as parameters */
94 static int elevel = -1;
96 static TransactionId OldestXmin;
97 static TransactionId FreezeLimit;
99 static BufferAccessStrategy vac_strategy;
102 /* non-export function prototypes */
103 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
104 Relation *Irel, int nindexes);
105 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
106 static void lazy_vacuum_index(Relation indrel,
107 IndexBulkDeleteResult **stats,
108 LVRelStats *vacrelstats);
109 static void lazy_cleanup_index(Relation indrel,
110 IndexBulkDeleteResult *stats,
111 LVRelStats *vacrelstats);
112 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
113 int tupindex, LVRelStats *vacrelstats);
114 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
115 static BlockNumber count_nondeletable_pages(Relation onerel,
116 LVRelStats *vacrelstats);
117 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
118 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
119 ItemPointer itemptr);
120 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
121 static int vac_cmp_itemptr(const void *left, const void *right);
125 * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
127 * This routine vacuums a single heap, cleans out its indexes, and
128 * updates its relpages and reltuples statistics.
130 * At entry, we have already established a transaction and opened
131 * and locked the relation.
133 void
134 lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
135 BufferAccessStrategy bstrategy)
137 LVRelStats *vacrelstats;
138 Relation *Irel;
139 int nindexes;
140 BlockNumber possibly_freeable;
141 PGRUsage ru0;
142 TimestampTz starttime = 0;
144 pg_rusage_init(&ru0);
146 /* measure elapsed time iff autovacuum logging requires it */
147 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
148 starttime = GetCurrentTimestamp();
150 if (vacstmt->verbose)
151 elevel = INFO;
152 else
153 elevel = DEBUG2;
155 vac_strategy = bstrategy;
157 vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
158 &OldestXmin, &FreezeLimit);
160 vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
162 vacrelstats->num_index_scans = 0;
164 /* Open all indexes of the relation */
165 vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
166 vacrelstats->hasindex = (nindexes > 0);
168 /* Do the vacuuming */
169 lazy_scan_heap(onerel, vacrelstats, Irel, nindexes);
171 /* Done with indexes */
172 vac_close_indexes(nindexes, Irel, NoLock);
175 * Optionally truncate the relation.
177 * Don't even think about it unless we have a shot at releasing a goodly
178 * number of pages. Otherwise, the time taken isn't worth it.
180 possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
181 if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
182 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
183 lazy_truncate_heap(onerel, vacrelstats);
185 /* Vacuum the Free Space Map */
186 FreeSpaceMapVacuum(onerel);
188 /* Update statistics in pg_class */
189 vac_update_relstats(RelationGetRelid(onerel),
190 vacrelstats->rel_pages,
191 vacrelstats->rel_tuples,
192 vacrelstats->hasindex,
193 FreezeLimit);
195 /* report results to the stats collector, too */
196 pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
197 vacstmt->analyze, vacrelstats->rel_tuples);
199 /* and log the action if appropriate */
200 if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
202 if (Log_autovacuum_min_duration == 0 ||
203 TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
204 Log_autovacuum_min_duration))
205 ereport(LOG,
206 (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
207 "pages: %d removed, %d remain\n"
208 "tuples: %.0f removed, %.0f remain\n"
209 "system usage: %s",
210 get_database_name(MyDatabaseId),
211 get_namespace_name(RelationGetNamespace(onerel)),
212 RelationGetRelationName(onerel),
213 vacrelstats->num_index_scans,
214 vacrelstats->pages_removed, vacrelstats->rel_pages,
215 vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
216 pg_rusage_show(&ru0))));
222 * lazy_scan_heap() -- scan an open heap relation
224 * This routine sets commit status bits, builds lists of dead tuples
225 * and pages with free space, and calculates statistics on the number
226 * of live tuples in the heap. When done, or when we run low on space
227 * for dead-tuple TIDs, invoke vacuuming of indexes and heap.
229 * If there are no indexes then we just vacuum each dirty page as we
230 * process it, since there's no point in gathering many tuples.
232 static void
233 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
234 Relation *Irel, int nindexes)
236 BlockNumber nblocks,
237 blkno;
238 HeapTupleData tuple;
239 char *relname;
240 BlockNumber empty_pages,
241 vacuumed_pages;
242 double num_tuples,
243 tups_vacuumed,
244 nkeep,
245 nunused;
246 IndexBulkDeleteResult **indstats;
247 int i;
248 PGRUsage ru0;
250 pg_rusage_init(&ru0);
252 relname = RelationGetRelationName(onerel);
253 ereport(elevel,
254 (errmsg("vacuuming \"%s.%s\"",
255 get_namespace_name(RelationGetNamespace(onerel)),
256 relname)));
258 empty_pages = vacuumed_pages = 0;
259 num_tuples = tups_vacuumed = nkeep = nunused = 0;
261 indstats = (IndexBulkDeleteResult **)
262 palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
264 nblocks = RelationGetNumberOfBlocks(onerel);
265 vacrelstats->rel_pages = nblocks;
266 vacrelstats->nonempty_pages = 0;
268 lazy_space_alloc(vacrelstats, nblocks);
270 for (blkno = 0; blkno < nblocks; blkno++)
272 Buffer buf;
273 Page page;
274 OffsetNumber offnum,
275 maxoff;
276 bool tupgone,
277 hastup;
278 int prev_dead_count;
279 OffsetNumber frozen[MaxOffsetNumber];
280 int nfrozen;
281 Size freespace;
283 vacuum_delay_point();
286 * If we are close to overrunning the available space for dead-tuple
287 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
289 if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
290 vacrelstats->num_dead_tuples > 0)
292 /* Remove index entries */
293 for (i = 0; i < nindexes; i++)
294 lazy_vacuum_index(Irel[i],
295 &indstats[i],
296 vacrelstats);
297 /* Remove tuples from heap */
298 lazy_vacuum_heap(onerel, vacrelstats);
299 /* Forget the now-vacuumed tuples, and press on */
300 vacrelstats->num_dead_tuples = 0;
301 vacrelstats->num_index_scans++;
304 buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
306 /* We need buffer cleanup lock so that we can prune HOT chains. */
307 LockBufferForCleanup(buf);
309 page = BufferGetPage(buf);
311 if (PageIsNew(page))
314 * An all-zeroes page could be left over if a backend extends the
315 * relation but crashes before initializing the page. Reclaim such
316 * pages for use.
318 * We have to be careful here because we could be looking at a
319 * page that someone has just added to the relation and not yet
320 * been able to initialize (see RelationGetBufferForTuple). To
321 * protect against that, release the buffer lock, grab the
322 * relation extension lock momentarily, and re-lock the buffer. If
323 * the page is still uninitialized by then, it must be left over
324 * from a crashed backend, and we can initialize it.
326 * We don't really need the relation lock when this is a new or
327 * temp relation, but it's probably not worth the code space to
328 * check that, since this surely isn't a critical path.
330 * Note: the comparable code in vacuum.c need not worry because
331 * it's got exclusive lock on the whole relation.
333 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
334 LockRelationForExtension(onerel, ExclusiveLock);
335 UnlockRelationForExtension(onerel, ExclusiveLock);
336 LockBufferForCleanup(buf);
337 if (PageIsNew(page))
339 ereport(WARNING,
340 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
341 relname, blkno)));
342 PageInit(page, BufferGetPageSize(buf), 0);
343 empty_pages++;
345 freespace = PageGetHeapFreeSpace(page);
346 MarkBufferDirty(buf);
347 UnlockReleaseBuffer(buf);
349 RecordPageWithFreeSpace(onerel, blkno, freespace);
350 continue;
353 if (PageIsEmpty(page))
355 empty_pages++;
356 freespace = PageGetHeapFreeSpace(page);
357 UnlockReleaseBuffer(buf);
358 RecordPageWithFreeSpace(onerel, blkno, freespace);
359 continue;
363 * Prune all HOT-update chains in this page.
365 * We count tuples removed by the pruning step as removed by VACUUM.
367 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin,
368 false, false);
371 * Now scan the page to collect vacuumable items and check for tuples
372 * requiring freezing.
374 nfrozen = 0;
375 hastup = false;
376 prev_dead_count = vacrelstats->num_dead_tuples;
377 maxoff = PageGetMaxOffsetNumber(page);
378 for (offnum = FirstOffsetNumber;
379 offnum <= maxoff;
380 offnum = OffsetNumberNext(offnum))
382 ItemId itemid;
384 itemid = PageGetItemId(page, offnum);
386 /* Unused items require no processing, but we count 'em */
387 if (!ItemIdIsUsed(itemid))
389 nunused += 1;
390 continue;
393 /* Redirect items mustn't be touched */
394 if (ItemIdIsRedirected(itemid))
396 hastup = true; /* this page won't be truncatable */
397 continue;
400 ItemPointerSet(&(tuple.t_self), blkno, offnum);
403 * DEAD item pointers are to be vacuumed normally; but we don't
404 * count them in tups_vacuumed, else we'd be double-counting (at
405 * least in the common case where heap_page_prune() just freed up
406 * a non-HOT tuple).
408 if (ItemIdIsDead(itemid))
410 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
411 continue;
414 Assert(ItemIdIsNormal(itemid));
416 tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
417 tuple.t_len = ItemIdGetLength(itemid);
419 tupgone = false;
421 switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
423 case HEAPTUPLE_DEAD:
426 * Ordinarily, DEAD tuples would have been removed by
427 * heap_page_prune(), but it's possible that the tuple
428 * state changed since heap_page_prune() looked. In
429 * particular an INSERT_IN_PROGRESS tuple could have
430 * changed to DEAD if the inserter aborted. So this
431 * cannot be considered an error condition.
433 * If the tuple is HOT-updated then it must only be
434 * removed by a prune operation; so we keep it just as if
435 * it were RECENTLY_DEAD. Also, if it's a heap-only
436 * tuple, we choose to keep it, because it'll be a lot
437 * cheaper to get rid of it in the next pruning pass than
438 * to treat it like an indexed tuple.
440 if (HeapTupleIsHotUpdated(&tuple) ||
441 HeapTupleIsHeapOnly(&tuple))
442 nkeep += 1;
443 else
444 tupgone = true; /* we can delete the tuple */
445 break;
446 case HEAPTUPLE_LIVE:
447 /* Tuple is good --- but let's do some validity checks */
448 if (onerel->rd_rel->relhasoids &&
449 !OidIsValid(HeapTupleGetOid(&tuple)))
450 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
451 relname, blkno, offnum);
452 break;
453 case HEAPTUPLE_RECENTLY_DEAD:
456 * If tuple is recently deleted then we must not remove it
457 * from relation.
459 nkeep += 1;
460 break;
461 case HEAPTUPLE_INSERT_IN_PROGRESS:
462 /* This is an expected case during concurrent vacuum */
463 break;
464 case HEAPTUPLE_DELETE_IN_PROGRESS:
465 /* This is an expected case during concurrent vacuum */
466 break;
467 default:
468 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
469 break;
472 if (tupgone)
474 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
475 tups_vacuumed += 1;
477 else
479 num_tuples += 1;
480 hastup = true;
483 * Each non-removable tuple must be checked to see if it needs
484 * freezing. Note we already have exclusive buffer lock.
486 if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
487 InvalidBuffer))
488 frozen[nfrozen++] = offnum;
490 } /* scan along page */
493 * If we froze any tuples, mark the buffer dirty, and write a WAL
494 * record recording the changes. We must log the changes to be
495 * crash-safe against future truncation of CLOG.
497 if (nfrozen > 0)
499 MarkBufferDirty(buf);
500 /* no XLOG for temp tables, though */
501 if (!onerel->rd_istemp)
503 XLogRecPtr recptr;
505 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
506 frozen, nfrozen);
507 PageSetLSN(page, recptr);
508 PageSetTLI(page, ThisTimeLineID);
513 * If there are no indexes then we can vacuum the page right now
514 * instead of doing a second scan.
516 if (nindexes == 0 &&
517 vacrelstats->num_dead_tuples > 0)
519 /* Remove tuples from heap */
520 lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
521 /* Forget the now-vacuumed tuples, and press on */
522 vacrelstats->num_dead_tuples = 0;
523 vacuumed_pages++;
526 freespace = PageGetHeapFreeSpace(page);
528 /* Remember the location of the last page with nonremovable tuples */
529 if (hastup)
530 vacrelstats->nonempty_pages = blkno + 1;
532 UnlockReleaseBuffer(buf);
535 * If we remembered any tuples for deletion, then the page will be
536 * visited again by lazy_vacuum_heap, which will compute and record
537 * its post-compaction free space. If not, then we're done with this
538 * page, so remember its free space as-is. (This path will always be
539 * taken if there are no indexes.)
541 if (vacrelstats->num_dead_tuples == prev_dead_count)
542 RecordPageWithFreeSpace(onerel, blkno, freespace);
545 /* save stats for use later */
546 vacrelstats->rel_tuples = num_tuples;
547 vacrelstats->tuples_deleted = tups_vacuumed;
549 /* If any tuples need to be deleted, perform final vacuum cycle */
550 /* XXX put a threshold on min number of tuples here? */
551 if (vacrelstats->num_dead_tuples > 0)
553 /* Remove index entries */
554 for (i = 0; i < nindexes; i++)
555 lazy_vacuum_index(Irel[i],
556 &indstats[i],
557 vacrelstats);
558 /* Remove tuples from heap */
559 lazy_vacuum_heap(onerel, vacrelstats);
560 vacrelstats->num_index_scans++;
563 /* Do post-vacuum cleanup and statistics update for each index */
564 for (i = 0; i < nindexes; i++)
565 lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
567 /* If no indexes, make log report that lazy_vacuum_heap would've made */
568 if (vacuumed_pages)
569 ereport(elevel,
570 (errmsg("\"%s\": removed %.0f row versions in %u pages",
571 RelationGetRelationName(onerel),
572 tups_vacuumed, vacuumed_pages)));
574 ereport(elevel,
575 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
576 RelationGetRelationName(onerel),
577 tups_vacuumed, num_tuples, nblocks),
578 errdetail("%.0f dead row versions cannot be removed yet.\n"
579 "There were %.0f unused item pointers.\n"
580 "%u pages are entirely empty.\n"
581 "%s.",
582 nkeep,
583 nunused,
584 empty_pages,
585 pg_rusage_show(&ru0))));
590 * lazy_vacuum_heap() -- second pass over the heap
592 * This routine marks dead tuples as unused and compacts out free
593 * space on their pages. Pages not having dead tuples recorded from
594 * lazy_scan_heap are not visited at all.
596 * Note: the reason for doing this as a second pass is we cannot remove
597 * the tuples until we've removed their index entries, and we want to
598 * process index entry removal in batches as large as possible.
600 static void
601 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
603 int tupindex;
604 int npages;
605 PGRUsage ru0;
607 pg_rusage_init(&ru0);
608 npages = 0;
610 tupindex = 0;
611 while (tupindex < vacrelstats->num_dead_tuples)
613 BlockNumber tblk;
614 Buffer buf;
615 Page page;
616 Size freespace;
618 vacuum_delay_point();
620 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
621 buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
622 LockBufferForCleanup(buf);
623 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
625 /* Now that we've compacted the page, record its available space */
626 page = BufferGetPage(buf);
627 freespace = PageGetHeapFreeSpace(page);
629 UnlockReleaseBuffer(buf);
630 RecordPageWithFreeSpace(onerel, tblk, freespace);
631 npages++;
634 ereport(elevel,
635 (errmsg("\"%s\": removed %d row versions in %d pages",
636 RelationGetRelationName(onerel),
637 tupindex, npages),
638 errdetail("%s.",
639 pg_rusage_show(&ru0))));
643 * lazy_vacuum_page() -- free dead tuples on a page
644 * and repair its fragmentation.
646 * Caller must hold pin and buffer cleanup lock on the buffer.
648 * tupindex is the index in vacrelstats->dead_tuples of the first dead
649 * tuple for this page. We assume the rest follow sequentially.
650 * The return value is the first tupindex after the tuples of this page.
652 static int
653 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
654 int tupindex, LVRelStats *vacrelstats)
656 Page page = BufferGetPage(buffer);
657 OffsetNumber unused[MaxOffsetNumber];
658 int uncnt = 0;
660 START_CRIT_SECTION();
662 for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
664 BlockNumber tblk;
665 OffsetNumber toff;
666 ItemId itemid;
668 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
669 if (tblk != blkno)
670 break; /* past end of tuples for this block */
671 toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
672 itemid = PageGetItemId(page, toff);
673 ItemIdSetUnused(itemid);
674 unused[uncnt++] = toff;
677 PageRepairFragmentation(page);
679 MarkBufferDirty(buffer);
681 /* XLOG stuff */
682 if (!onerel->rd_istemp)
684 XLogRecPtr recptr;
686 recptr = log_heap_clean(onerel, buffer,
687 NULL, 0, NULL, 0,
688 unused, uncnt,
689 false);
690 PageSetLSN(page, recptr);
691 PageSetTLI(page, ThisTimeLineID);
694 END_CRIT_SECTION();
696 return tupindex;
700 * lazy_vacuum_index() -- vacuum one index relation.
702 * Delete all the index entries pointing to tuples listed in
703 * vacrelstats->dead_tuples, and update running statistics.
705 static void
706 lazy_vacuum_index(Relation indrel,
707 IndexBulkDeleteResult **stats,
708 LVRelStats *vacrelstats)
710 IndexVacuumInfo ivinfo;
711 PGRUsage ru0;
713 pg_rusage_init(&ru0);
715 ivinfo.index = indrel;
716 ivinfo.vacuum_full = false;
717 ivinfo.message_level = elevel;
718 /* We don't yet know rel_tuples, so pass -1 */
719 ivinfo.num_heap_tuples = -1;
720 ivinfo.strategy = vac_strategy;
722 /* Do bulk deletion */
723 *stats = index_bulk_delete(&ivinfo, *stats,
724 lazy_tid_reaped, (void *) vacrelstats);
726 ereport(elevel,
727 (errmsg("scanned index \"%s\" to remove %d row versions",
728 RelationGetRelationName(indrel),
729 vacrelstats->num_dead_tuples),
730 errdetail("%s.", pg_rusage_show(&ru0))));
734 * lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
736 static void
737 lazy_cleanup_index(Relation indrel,
738 IndexBulkDeleteResult *stats,
739 LVRelStats *vacrelstats)
741 IndexVacuumInfo ivinfo;
742 PGRUsage ru0;
744 pg_rusage_init(&ru0);
746 ivinfo.index = indrel;
747 ivinfo.vacuum_full = false;
748 ivinfo.message_level = elevel;
749 ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
750 ivinfo.strategy = vac_strategy;
752 stats = index_vacuum_cleanup(&ivinfo, stats);
754 if (!stats)
755 return;
757 /* now update statistics in pg_class */
758 vac_update_relstats(RelationGetRelid(indrel),
759 stats->num_pages,
760 stats->num_index_tuples,
761 false, InvalidTransactionId);
763 ereport(elevel,
764 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
765 RelationGetRelationName(indrel),
766 stats->num_index_tuples,
767 stats->num_pages),
768 errdetail("%.0f index row versions were removed.\n"
769 "%u index pages have been deleted, %u are currently reusable.\n"
770 "%s.",
771 stats->tuples_removed,
772 stats->pages_deleted, stats->pages_free,
773 pg_rusage_show(&ru0))));
775 pfree(stats);
779 * lazy_truncate_heap - try to truncate off any empty pages at the end
781 static void
782 lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
784 BlockNumber old_rel_pages = vacrelstats->rel_pages;
785 BlockNumber new_rel_pages;
786 PGRUsage ru0;
788 pg_rusage_init(&ru0);
791 * We need full exclusive lock on the relation in order to do truncation.
792 * If we can't get it, give up rather than waiting --- we don't want to
793 * block other backends, and we don't want to deadlock (which is quite
794 * possible considering we already hold a lower-grade lock).
796 if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
797 return;
800 * Now that we have exclusive lock, look to see if the rel has grown
801 * whilst we were vacuuming with non-exclusive lock. If so, give up; the
802 * newly added pages presumably contain non-deletable tuples.
804 new_rel_pages = RelationGetNumberOfBlocks(onerel);
805 if (new_rel_pages != old_rel_pages)
807 /* might as well use the latest news when we update pg_class stats */
808 vacrelstats->rel_pages = new_rel_pages;
809 UnlockRelation(onerel, AccessExclusiveLock);
810 return;
814 * Scan backwards from the end to verify that the end pages actually
815 * contain no tuples. This is *necessary*, not optional, because other
816 * backends could have added tuples to these pages whilst we were
817 * vacuuming.
819 new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
821 if (new_rel_pages >= old_rel_pages)
823 /* can't do anything after all */
824 UnlockRelation(onerel, AccessExclusiveLock);
825 return;
829 * Okay to truncate.
831 FreeSpaceMapTruncateRel(onerel, new_rel_pages);
832 RelationTruncate(onerel, new_rel_pages);
835 * Note: once we have truncated, we *must* keep the exclusive lock until
836 * commit. The sinval message that will be sent at commit (as a result of
837 * vac_update_relstats()) must be received by other backends, to cause
838 * them to reset their rd_targblock values, before they can safely access
839 * the table again.
842 /* update statistics */
843 vacrelstats->rel_pages = new_rel_pages;
844 vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
846 ereport(elevel,
847 (errmsg("\"%s\": truncated %u to %u pages",
848 RelationGetRelationName(onerel),
849 old_rel_pages, new_rel_pages),
850 errdetail("%s.",
851 pg_rusage_show(&ru0))));
855 * Rescan end pages to verify that they are (still) empty of tuples.
857 * Returns number of nondeletable pages (last nonempty page + 1).
859 static BlockNumber
860 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
862 BlockNumber blkno;
864 /* Strange coding of loop control is needed because blkno is unsigned */
865 blkno = vacrelstats->rel_pages;
866 while (blkno > vacrelstats->nonempty_pages)
868 Buffer buf;
869 Page page;
870 OffsetNumber offnum,
871 maxoff;
872 bool hastup;
875 * We don't insert a vacuum delay point here, because we have an
876 * exclusive lock on the table which we want to hold for as short a
877 * time as possible. We still need to check for interrupts however.
879 CHECK_FOR_INTERRUPTS();
881 blkno--;
883 buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
885 /* In this phase we only need shared access to the buffer */
886 LockBuffer(buf, BUFFER_LOCK_SHARE);
888 page = BufferGetPage(buf);
890 if (PageIsNew(page) || PageIsEmpty(page))
892 /* PageIsNew probably shouldn't happen... */
893 UnlockReleaseBuffer(buf);
894 continue;
897 hastup = false;
898 maxoff = PageGetMaxOffsetNumber(page);
899 for (offnum = FirstOffsetNumber;
900 offnum <= maxoff;
901 offnum = OffsetNumberNext(offnum))
903 ItemId itemid;
905 itemid = PageGetItemId(page, offnum);
908 * Note: any non-unused item should be taken as a reason to keep
909 * this page. We formerly thought that DEAD tuples could be
910 * thrown away, but that's not so, because we'd not have cleaned
911 * out their index entries.
913 if (ItemIdIsUsed(itemid))
915 hastup = true;
916 break; /* can stop scanning */
918 } /* scan along page */
920 UnlockReleaseBuffer(buf);
922 /* Done scanning if we found a tuple here */
923 if (hastup)
924 return blkno + 1;
928 * If we fall out of the loop, all the previously-thought-to-be-empty
929 * pages still are; we need not bother to look at the last known-nonempty
930 * page.
932 return vacrelstats->nonempty_pages;
936 * lazy_space_alloc - space allocation decisions for lazy vacuum
938 * See the comments at the head of this file for rationale.
940 static void
941 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
943 long maxtuples;
945 if (vacrelstats->hasindex)
947 maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData);
948 maxtuples = Min(maxtuples, INT_MAX);
949 maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
951 /* curious coding here to ensure the multiplication can't overflow */
952 if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
953 maxtuples = relblocks * LAZY_ALLOC_TUPLES;
955 /* stay sane if small maintenance_work_mem */
956 maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
958 else
960 maxtuples = MaxHeapTuplesPerPage;
963 vacrelstats->num_dead_tuples = 0;
964 vacrelstats->max_dead_tuples = (int) maxtuples;
965 vacrelstats->dead_tuples = (ItemPointer)
966 palloc(maxtuples * sizeof(ItemPointerData));
970 * lazy_record_dead_tuple - remember one deletable tuple
972 static void
973 lazy_record_dead_tuple(LVRelStats *vacrelstats,
974 ItemPointer itemptr)
977 * The array shouldn't overflow under normal behavior, but perhaps it
978 * could if we are given a really small maintenance_work_mem. In that
979 * case, just forget the last few tuples (we'll get 'em next time).
981 if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
983 vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
984 vacrelstats->num_dead_tuples++;
989 * lazy_tid_reaped() -- is a particular tid deletable?
991 * This has the right signature to be an IndexBulkDeleteCallback.
993 * Assumes dead_tuples array is in sorted order.
995 static bool
996 lazy_tid_reaped(ItemPointer itemptr, void *state)
998 LVRelStats *vacrelstats = (LVRelStats *) state;
999 ItemPointer res;
1001 res = (ItemPointer) bsearch((void *) itemptr,
1002 (void *) vacrelstats->dead_tuples,
1003 vacrelstats->num_dead_tuples,
1004 sizeof(ItemPointerData),
1005 vac_cmp_itemptr);
1007 return (res != NULL);
1011 * Comparator routines for use with qsort() and bsearch().
1013 static int
1014 vac_cmp_itemptr(const void *left, const void *right)
1016 BlockNumber lblk,
1017 rblk;
1018 OffsetNumber loff,
1019 roff;
1021 lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1022 rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1024 if (lblk < rblk)
1025 return -1;
1026 if (lblk > rblk)
1027 return 1;
1029 loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1030 roff = ItemPointerGetOffsetNumber((ItemPointer) right);
1032 if (loff < roff)
1033 return -1;
1034 if (loff > roff)
1035 return 1;
1037 return 0;