src/backend/commands/vacuumlazy.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * vacuumlazy.c
   4  *        Concurrent ("lazy") vacuuming.
   5  *
   6  *
   7  * The major space usage for LAZY VACUUM is storage for the array of dead
   8  * tuple TIDs, with the next biggest need being storage for per-disk-page
   9  * free space info.  We want to ensure we can vacuum even the very largest
  10  * relations with finite memory space usage.  To do that, we set upper bounds
  11  * on the number of tuples and pages we will keep track of at once.
  12  *
  13  * We are willing to use at most maintenance_work_mem memory space to keep
  14  * track of dead tuples.  We initially allocate an array of TIDs of that size,
  15  * with an upper limit that depends on table size (this limit ensures we don't
  16  * allocate a huge area uselessly for vacuuming small tables).  If the array
  17  * threatens to overflow, we suspend the heap scan phase and perform a pass of
  18  * index cleanup and page compaction, then resume the heap scan with an empty
  19  * TID array.
  20  *
  21  * We can limit the storage for page free space to MaxFSMPages entries,
  22  * since that's the most the free space map will be willing to remember
  23  * anyway.      If the relation has fewer than that many pages with free space,
  24  * life is easy: just build an array of per-page info.  If it has more,
  25  * we store the free space info as a heap ordered by amount of free space,
  26  * so that we can discard the pages with least free space to ensure we never
  27  * have more than MaxFSMPages entries in all.  The surviving page entries
  28  * are passed to the free space map at conclusion of the scan.
  29  *
  30  * If we're processing a table with no indexes, we can just vacuum each page
  31  * as we go; there's no need to save up multiple tuples to minimize the number
  32  * of index scans performed.  So we don't use maintenance_work_mem memory for
  33  * the TID array, just enough to hold as many heap tuples as fit on one page.
  34  *
  35  *
  36  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  37  * Portions Copyright (c) 1994, Regents of the University of California
  38  *
  39  *
  40  * IDENTIFICATION
  41  *        $PostgreSQL$
  42  *
  43  *-------------------------------------------------------------------------
  44  */
  45 #include "postgres.h"
  46
  47 #include <math.h>
  48
  49 #include "access/genam.h"
  50 #include "access/heapam.h"
  51 #include "access/transam.h"
  52 #include "commands/dbcommands.h"
  53 #include "commands/vacuum.h"
  54 #include "miscadmin.h"
  55 #include "pgstat.h"
  56 #include "postmaster/autovacuum.h"
  57 #include "storage/bufmgr.h"
  58 #include "storage/freespace.h"
  59 #include "storage/lmgr.h"
  60 #include "utils/lsyscache.h"
  61 #include "utils/memutils.h"
  62 #include "utils/pg_rusage.h"
  63 #include "utils/tqual.h"
  64
  65
  66 /*
  67  * Space/time tradeoff parameters: do these need to be user-tunable?
  68  *
  69  * To consider truncating the relation, we want there to be at least
  70  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
  71  * is less) potentially-freeable pages.
  72  */
  73 #define REL_TRUNCATE_MINIMUM    1000
  74 #define REL_TRUNCATE_FRACTION   16
  75
  76 /*
  77  * Guesstimation of number of dead tuples per page.  This is used to
  78  * provide an upper limit to memory allocated when vacuuming small
  79  * tables.
  80  */
  81 #define LAZY_ALLOC_TUPLES               MaxHeapTuplesPerPage
  82
  83 typedef struct LVRelStats
  84 {
  85         /* hasindex = true means two-pass strategy; false means one-pass */
  86         bool            hasindex;
  87         /* Overall statistics about rel */
  88         BlockNumber rel_pages;
  89         double          rel_tuples;
  90         BlockNumber pages_removed;
  91         double          tuples_deleted;
  92         BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
  93         Size            threshold;              /* minimum interesting free space */
  94         /* List of TIDs of tuples we intend to delete */
  95         /* NB: this list is ordered by TID address */
  96         int                     num_dead_tuples;        /* current # of entries */
  97         int                     max_dead_tuples;        /* # slots allocated in array */
  98         ItemPointer dead_tuples;        /* array of ItemPointerData */
  99         /* Array or heap of per-page info about free space */
 100         /* We use a simple array until it fills up, then convert to heap */
 101         bool            fs_is_heap;             /* are we using heap organization? */
 102         int                     num_free_pages; /* current # of entries */
 103         int                     max_free_pages; /* # slots allocated in array */
 104         FSMPageData *free_pages;        /* array or heap of blkno/avail */
 105         BlockNumber tot_free_pages; /* total pages with >= threshold space */
 106         int                     num_index_scans;
 107 } LVRelStats;
 108
 109
 110 /* A few variables that don't seem worth passing around as parameters */
 111 static int      elevel = -1;
 112
 113 static TransactionId OldestXmin;
 114 static TransactionId FreezeLimit;
 115
 116 static BufferAccessStrategy vac_strategy;
 117
 118
 119 /* non-export function prototypes */
 120 static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 121                            Relation *Irel, int nindexes);
 122 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 123 static void lazy_vacuum_index(Relation indrel,
 124                                   IndexBulkDeleteResult **stats,
 125                                   LVRelStats *vacrelstats);
 126 static void lazy_cleanup_index(Relation indrel,
 127                                    IndexBulkDeleteResult *stats,
 128                                    LVRelStats *vacrelstats);
 129 static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 130                                  int tupindex, LVRelStats *vacrelstats);
 131 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 132 static BlockNumber count_nondeletable_pages(Relation onerel,
 133                                                  LVRelStats *vacrelstats);
 134 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
 135 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 136                                            ItemPointer itemptr);
 137 static void lazy_record_free_space(LVRelStats *vacrelstats,
 138                                            BlockNumber page, Size avail);
 139 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 140 static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats);
 141 static int      vac_cmp_itemptr(const void *left, const void *right);
 142 static int      vac_cmp_page_spaces(const void *left, const void *right);
 143
 144
 145 /*
 146  *      lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
 147  *
 148  *              This routine vacuums a single heap, cleans out its indexes, and
 149  *              updates its relpages and reltuples statistics.
 150  *
 151  *              At entry, we have already established a transaction and opened
 152  *              and locked the relation.
 153  */
 154 void
 155 lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 156                                 BufferAccessStrategy bstrategy)
 157 {
 158         LVRelStats *vacrelstats;
 159         Relation   *Irel;
 160         int                     nindexes;
 161         BlockNumber possibly_freeable;
 162         PGRUsage        ru0;
 163         TimestampTz starttime = 0;
 164
 165         pg_rusage_init(&ru0);
 166
 167         /* measure elapsed time iff autovacuum logging requires it */
 168         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration > 0)
 169                 starttime = GetCurrentTimestamp();
 170
 171         if (vacstmt->verbose)
 172                 elevel = INFO;
 173         else
 174                 elevel = DEBUG2;
 175
 176         vac_strategy = bstrategy;
 177
 178         vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
 179                                                   &OldestXmin, &FreezeLimit);
 180
 181         vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
 182
 183         /* Set threshold for interesting free space = average request size */
 184         /* XXX should we scale it up or down?  Adjust vacuum.c too, if so */
 185         vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node);
 186
 187         vacrelstats->num_index_scans = 0;
 188
 189         /* Open all indexes of the relation */
 190         vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
 191         vacrelstats->hasindex = (nindexes > 0);
 192
 193         /* Do the vacuuming */
 194         lazy_scan_heap(onerel, vacrelstats, Irel, nindexes);
 195
 196         /* Done with indexes */
 197         vac_close_indexes(nindexes, Irel, NoLock);
 198
 199         /*
 200          * Optionally truncate the relation.
 201          *
 202          * Don't even think about it unless we have a shot at releasing a goodly
 203          * number of pages.  Otherwise, the time taken isn't worth it.
 204          */
 205         possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
 206         if (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
 207                 possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION)
 208                 lazy_truncate_heap(onerel, vacrelstats);
 209
 210         /* Update shared free space map with final free space info */
 211         lazy_update_fsm(onerel, vacrelstats);
 212
 213         if (vacrelstats->tot_free_pages > MaxFSMPages)
 214                 ereport(WARNING,
 215                                 (errmsg("relation \"%s.%s\" contains more than \"max_fsm_pages\" pages with useful free space",
 216                                                 get_namespace_name(RelationGetNamespace(onerel)),
 217                                                 RelationGetRelationName(onerel)),
 218                                  /* Only suggest VACUUM FULL if > 20% free */
 219                                  (vacrelstats->tot_free_pages > vacrelstats->rel_pages * 0.20) ?
 220                                  errhint("Consider using VACUUM FULL on this relation or increasing the configuration parameter \"max_fsm_pages\".") :
 221                                  errhint("Consider increasing the configuration parameter \"max_fsm_pages\".")));
 222
 223         /* Update statistics in pg_class */
 224         vac_update_relstats(RelationGetRelid(onerel),
 225                                                 vacrelstats->rel_pages,
 226                                                 vacrelstats->rel_tuples,
 227                                                 vacrelstats->hasindex,
 228                                                 FreezeLimit);
 229
 230         /* report results to the stats collector, too */
 231         pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared,
 232                                                  vacstmt->analyze, vacrelstats->rel_tuples);
 233
 234         /* and log the action if appropriate */
 235         if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
 236         {
 237                 if (Log_autovacuum_min_duration == 0 ||
 238                         TimestampDifferenceExceeds(starttime, GetCurrentTimestamp(),
 239                                                                            Log_autovacuum_min_duration))
 240                         ereport(LOG,
 241                                         (errmsg("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"
 242                                                         "pages: %d removed, %d remain\n"
 243                                                         "tuples: %.0f removed, %.0f remain\n"
 244                                                         "system usage: %s",
 245                                                         get_database_name(MyDatabaseId),
 246                                                         get_namespace_name(RelationGetNamespace(onerel)),
 247                                                         RelationGetRelationName(onerel),
 248                                                         vacrelstats->num_index_scans,
 249                                                   vacrelstats->pages_removed, vacrelstats->rel_pages,
 250                                                 vacrelstats->tuples_deleted, vacrelstats->rel_tuples,
 251                                                         pg_rusage_show(&ru0))));
 252         }
 253 }
 254
 255
 256 /*
 257  *      lazy_scan_heap() -- scan an open heap relation
 258  *
 259  *              This routine sets commit status bits, builds lists of dead tuples
 260  *              and pages with free space, and calculates statistics on the number
 261  *              of live tuples in the heap.  When done, or when we run low on space
 262  *              for dead-tuple TIDs, invoke vacuuming of indexes and heap.
 263  *
 264  *              If there are no indexes then we just vacuum each dirty page as we
 265  *              process it, since there's no point in gathering many tuples.
 266  */
 267 static void
 268 lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 269                            Relation *Irel, int nindexes)
 270 {
 271         BlockNumber nblocks,
 272                                 blkno;
 273         HeapTupleData tuple;
 274         char       *relname;
 275         BlockNumber empty_pages,
 276                                 vacuumed_pages;
 277         double          num_tuples,
 278                                 tups_vacuumed,
 279                                 nkeep,
 280                                 nunused;
 281         IndexBulkDeleteResult **indstats;
 282         int                     i;
 283         PGRUsage        ru0;
 284
 285         pg_rusage_init(&ru0);
 286
 287         relname = RelationGetRelationName(onerel);
 288         ereport(elevel,
 289                         (errmsg("vacuuming \"%s.%s\"",
 290                                         get_namespace_name(RelationGetNamespace(onerel)),
 291                                         relname)));
 292
 293         empty_pages = vacuumed_pages = 0;
 294         num_tuples = tups_vacuumed = nkeep = nunused = 0;
 295
 296         indstats = (IndexBulkDeleteResult **)
 297                 palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
 298
 299         nblocks = RelationGetNumberOfBlocks(onerel);
 300         vacrelstats->rel_pages = nblocks;
 301         vacrelstats->nonempty_pages = 0;
 302
 303         lazy_space_alloc(vacrelstats, nblocks);
 304
 305         for (blkno = 0; blkno < nblocks; blkno++)
 306         {
 307                 Buffer          buf;
 308                 Page            page;
 309                 OffsetNumber offnum,
 310                                         maxoff;
 311                 bool            tupgone,
 312                                         hastup;
 313                 int                     prev_dead_count;
 314                 OffsetNumber frozen[MaxOffsetNumber];
 315                 int                     nfrozen;
 316
 317                 vacuum_delay_point();
 318
 319                 /*
 320                  * If we are close to overrunning the available space for dead-tuple
 321                  * TIDs, pause and do a cycle of vacuuming before we tackle this page.
 322                  */
 323                 if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
 324                         vacrelstats->num_dead_tuples > 0)
 325                 {
 326                         /* Remove index entries */
 327                         for (i = 0; i < nindexes; i++)
 328                                 lazy_vacuum_index(Irel[i],
 329                                                                   &indstats[i],
 330                                                                   vacrelstats);
 331                         /* Remove tuples from heap */
 332                         lazy_vacuum_heap(onerel, vacrelstats);
 333                         /* Forget the now-vacuumed tuples, and press on */
 334                         vacrelstats->num_dead_tuples = 0;
 335                         vacrelstats->num_index_scans++;
 336                 }
 337
 338                 buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
 339
 340                 /* We need buffer cleanup lock so that we can prune HOT chains. */
 341                 LockBufferForCleanup(buf);
 342
 343                 page = BufferGetPage(buf);
 344
 345                 if (PageIsNew(page))
 346                 {
 347                         /*
 348                          * An all-zeroes page could be left over if a backend extends the
 349                          * relation but crashes before initializing the page. Reclaim such
 350                          * pages for use.
 351                          *
 352                          * We have to be careful here because we could be looking at a
 353                          * page that someone has just added to the relation and not yet
 354                          * been able to initialize (see RelationGetBufferForTuple). To
 355                          * protect against that, release the buffer lock, grab the
 356                          * relation extension lock momentarily, and re-lock the buffer. If
 357                          * the page is still uninitialized by then, it must be left over
 358                          * from a crashed backend, and we can initialize it.
 359                          *
 360                          * We don't really need the relation lock when this is a new or
 361                          * temp relation, but it's probably not worth the code space to
 362                          * check that, since this surely isn't a critical path.
 363                          *
 364                          * Note: the comparable code in vacuum.c need not worry because
 365                          * it's got exclusive lock on the whole relation.
 366                          */
 367                         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 368                         LockRelationForExtension(onerel, ExclusiveLock);
 369                         UnlockRelationForExtension(onerel, ExclusiveLock);
 370                         LockBufferForCleanup(buf);
 371                         if (PageIsNew(page))
 372                         {
 373                                 ereport(WARNING,
 374                                 (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
 375                                                 relname, blkno)));
 376                                 PageInit(page, BufferGetPageSize(buf), 0);
 377                                 empty_pages++;
 378                                 lazy_record_free_space(vacrelstats, blkno,
 379                                                                            PageGetHeapFreeSpace(page));
 380                         }
 381                         MarkBufferDirty(buf);
 382                         UnlockReleaseBuffer(buf);
 383                         continue;
 384                 }
 385
 386                 if (PageIsEmpty(page))
 387                 {
 388                         empty_pages++;
 389                         lazy_record_free_space(vacrelstats, blkno,
 390                                                                    PageGetHeapFreeSpace(page));
 391                         UnlockReleaseBuffer(buf);
 392                         continue;
 393                 }
 394
 395                 /*
 396                  * Prune all HOT-update chains in this page.
 397                  *
 398                  * We count tuples removed by the pruning step as removed by VACUUM.
 399                  */
 400                 tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin,
 401                                                                                  false, false);
 402
 403                 /*
 404                  * Now scan the page to collect vacuumable items and check for tuples
 405                  * requiring freezing.
 406                  */
 407                 nfrozen = 0;
 408                 hastup = false;
 409                 prev_dead_count = vacrelstats->num_dead_tuples;
 410                 maxoff = PageGetMaxOffsetNumber(page);
 411                 for (offnum = FirstOffsetNumber;
 412                          offnum <= maxoff;
 413                          offnum = OffsetNumberNext(offnum))
 414                 {
 415                         ItemId          itemid;
 416
 417                         itemid = PageGetItemId(page, offnum);
 418
 419                         /* Unused items require no processing, but we count 'em */
 420                         if (!ItemIdIsUsed(itemid))
 421                         {
 422                                 nunused += 1;
 423                                 continue;
 424                         }
 425
 426                         /* Redirect items mustn't be touched */
 427                         if (ItemIdIsRedirected(itemid))
 428                         {
 429                                 hastup = true;  /* this page won't be truncatable */
 430                                 continue;
 431                         }
 432
 433                         ItemPointerSet(&(tuple.t_self), blkno, offnum);
 434
 435                         /*
 436                          * DEAD item pointers are to be vacuumed normally; but we don't
 437                          * count them in tups_vacuumed, else we'd be double-counting (at
 438                          * least in the common case where heap_page_prune() just freed up
 439                          * a non-HOT tuple).
 440                          */
 441                         if (ItemIdIsDead(itemid))
 442                         {
 443                                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 444                                 continue;
 445                         }
 446
 447                         Assert(ItemIdIsNormal(itemid));
 448
 449                         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
 450                         tuple.t_len = ItemIdGetLength(itemid);
 451
 452                         tupgone = false;
 453
 454                         switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf))
 455                         {
 456                                 case HEAPTUPLE_DEAD:
 457
 458                                         /*
 459                                          * Ordinarily, DEAD tuples would have been removed by
 460                                          * heap_page_prune(), but it's possible that the tuple
 461                                          * state changed since heap_page_prune() looked.  In
 462                                          * particular an INSERT_IN_PROGRESS tuple could have
 463                                          * changed to DEAD if the inserter aborted.  So this
 464                                          * cannot be considered an error condition.
 465                                          *
 466                                          * If the tuple is HOT-updated then it must only be
 467                                          * removed by a prune operation; so we keep it just as if
 468                                          * it were RECENTLY_DEAD.  Also, if it's a heap-only
 469                                          * tuple, we choose to keep it, because it'll be a lot
 470                                          * cheaper to get rid of it in the next pruning pass than
 471                                          * to treat it like an indexed tuple.
 472                                          */
 473                                         if (HeapTupleIsHotUpdated(&tuple) ||
 474                                                 HeapTupleIsHeapOnly(&tuple))
 475                                                 nkeep += 1;
 476                                         else
 477                                                 tupgone = true; /* we can delete the tuple */
 478                                         break;
 479                                 case HEAPTUPLE_LIVE:
 480                                         /* Tuple is good --- but let's do some validity checks */
 481                                         if (onerel->rd_rel->relhasoids &&
 482                                                 !OidIsValid(HeapTupleGetOid(&tuple)))
 483                                                 elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
 484                                                          relname, blkno, offnum);
 485                                         break;
 486                                 case HEAPTUPLE_RECENTLY_DEAD:
 487
 488                                         /*
 489                                          * If tuple is recently deleted then we must not remove it
 490                                          * from relation.
 491                                          */
 492                                         nkeep += 1;
 493                                         break;
 494                                 case HEAPTUPLE_INSERT_IN_PROGRESS:
 495                                         /* This is an expected case during concurrent vacuum */
 496                                         break;
 497                                 case HEAPTUPLE_DELETE_IN_PROGRESS:
 498                                         /* This is an expected case during concurrent vacuum */
 499                                         break;
 500                                 default:
 501                                         elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
 502                                         break;
 503                         }
 504
 505                         if (tupgone)
 506                         {
 507                                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
 508                                 tups_vacuumed += 1;
 509                         }
 510                         else
 511                         {
 512                                 num_tuples += 1;
 513                                 hastup = true;
 514
 515                                 /*
 516                                  * Each non-removable tuple must be checked to see if it needs
 517                                  * freezing.  Note we already have exclusive buffer lock.
 518                                  */
 519                                 if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
 520                                                                           InvalidBuffer))
 521                                         frozen[nfrozen++] = offnum;
 522                         }
 523                 }                                               /* scan along page */
 524
 525                 /*
 526                  * If we froze any tuples, mark the buffer dirty, and write a WAL
 527                  * record recording the changes.  We must log the changes to be
 528                  * crash-safe against future truncation of CLOG.
 529                  */
 530                 if (nfrozen > 0)
 531                 {
 532                         MarkBufferDirty(buf);
 533                         /* no XLOG for temp tables, though */
 534                         if (!onerel->rd_istemp)
 535                         {
 536                                 XLogRecPtr      recptr;
 537
 538                                 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
 539                                                                                  frozen, nfrozen);
 540                                 PageSetLSN(page, recptr);
 541                                 PageSetTLI(page, ThisTimeLineID);
 542                         }
 543                 }
 544
 545                 /*
 546                  * If there are no indexes then we can vacuum the page right now
 547                  * instead of doing a second scan.
 548                  */
 549                 if (nindexes == 0 &&
 550                         vacrelstats->num_dead_tuples > 0)
 551                 {
 552                         /* Remove tuples from heap */
 553                         lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
 554                         /* Forget the now-vacuumed tuples, and press on */
 555                         vacrelstats->num_dead_tuples = 0;
 556                         vacuumed_pages++;
 557                 }
 558
 559                 /*
 560                  * If we remembered any tuples for deletion, then the page will be
 561                  * visited again by lazy_vacuum_heap, which will compute and record
 562                  * its post-compaction free space.      If not, then we're done with this
 563                  * page, so remember its free space as-is.      (This path will always be
 564                  * taken if there are no indexes.)
 565                  */
 566                 if (vacrelstats->num_dead_tuples == prev_dead_count)
 567                 {
 568                         lazy_record_free_space(vacrelstats, blkno,
 569                                                                    PageGetHeapFreeSpace(page));
 570                 }
 571
 572                 /* Remember the location of the last page with nonremovable tuples */
 573                 if (hastup)
 574                         vacrelstats->nonempty_pages = blkno + 1;
 575
 576                 UnlockReleaseBuffer(buf);
 577         }
 578
 579         /* save stats for use later */
 580         vacrelstats->rel_tuples = num_tuples;
 581         vacrelstats->tuples_deleted = tups_vacuumed;
 582
 583         /* If any tuples need to be deleted, perform final vacuum cycle */
 584         /* XXX put a threshold on min number of tuples here? */
 585         if (vacrelstats->num_dead_tuples > 0)
 586         {
 587                 /* Remove index entries */
 588                 for (i = 0; i < nindexes; i++)
 589                         lazy_vacuum_index(Irel[i],
 590                                                           &indstats[i],
 591                                                           vacrelstats);
 592                 /* Remove tuples from heap */
 593                 lazy_vacuum_heap(onerel, vacrelstats);
 594                 vacrelstats->num_index_scans++;
 595         }
 596
 597         /* Do post-vacuum cleanup and statistics update for each index */
 598         for (i = 0; i < nindexes; i++)
 599                 lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
 600
 601         /* If no indexes, make log report that lazy_vacuum_heap would've made */
 602         if (vacuumed_pages)
 603                 ereport(elevel,
 604                                 (errmsg("\"%s\": removed %.0f row versions in %u pages",
 605                                                 RelationGetRelationName(onerel),
 606                                                 tups_vacuumed, vacuumed_pages)));
 607
 608         ereport(elevel,
 609                         (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
 610                                         RelationGetRelationName(onerel),
 611                                         tups_vacuumed, num_tuples, nblocks),
 612                          errdetail("%.0f dead row versions cannot be removed yet.\n"
 613                                            "There were %.0f unused item pointers.\n"
 614                                            "%u pages contain useful free space.\n"
 615                                            "%u pages are entirely empty.\n"
 616                                            "%s.",
 617                                            nkeep,
 618                                            nunused,
 619                                            vacrelstats->tot_free_pages,
 620                                            empty_pages,
 621                                            pg_rusage_show(&ru0))));
 622 }
 623
 624
 625 /*
 626  *      lazy_vacuum_heap() -- second pass over the heap
 627  *
 628  *              This routine marks dead tuples as unused and compacts out free
 629  *              space on their pages.  Pages not having dead tuples recorded from
 630  *              lazy_scan_heap are not visited at all.
 631  *
 632  * Note: the reason for doing this as a second pass is we cannot remove
 633  * the tuples until we've removed their index entries, and we want to
 634  * process index entry removal in batches as large as possible.
 635  */
 636 static void
 637 lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 638 {
 639         int                     tupindex;
 640         int                     npages;
 641         PGRUsage        ru0;
 642
 643         pg_rusage_init(&ru0);
 644         npages = 0;
 645
 646         tupindex = 0;
 647         while (tupindex < vacrelstats->num_dead_tuples)
 648         {
 649                 BlockNumber tblk;
 650                 Buffer          buf;
 651                 Page            page;
 652
 653                 vacuum_delay_point();
 654
 655                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
 656                 buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
 657                 LockBufferForCleanup(buf);
 658                 tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
 659                 /* Now that we've compacted the page, record its available space */
 660                 page = BufferGetPage(buf);
 661                 lazy_record_free_space(vacrelstats, tblk,
 662                                                            PageGetHeapFreeSpace(page));
 663                 UnlockReleaseBuffer(buf);
 664                 npages++;
 665         }
 666
 667         ereport(elevel,
 668                         (errmsg("\"%s\": removed %d row versions in %d pages",
 669                                         RelationGetRelationName(onerel),
 670                                         tupindex, npages),
 671                          errdetail("%s.",
 672                                            pg_rusage_show(&ru0))));
 673 }
 674
 675 /*
 676  *      lazy_vacuum_page() -- free dead tuples on a page
 677  *                                       and repair its fragmentation.
 678  *
 679  * Caller must hold pin and buffer cleanup lock on the buffer.
 680  *
 681  * tupindex is the index in vacrelstats->dead_tuples of the first dead
 682  * tuple for this page.  We assume the rest follow sequentially.
 683  * The return value is the first tupindex after the tuples of this page.
 684  */
 685 static int
 686 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 687                                  int tupindex, LVRelStats *vacrelstats)
 688 {
 689         Page            page = BufferGetPage(buffer);
 690         OffsetNumber unused[MaxOffsetNumber];
 691         int                     uncnt = 0;
 692
 693         START_CRIT_SECTION();
 694
 695         for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
 696         {
 697                 BlockNumber tblk;
 698                 OffsetNumber toff;
 699                 ItemId          itemid;
 700
 701                 tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
 702                 if (tblk != blkno)
 703                         break;                          /* past end of tuples for this block */
 704                 toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
 705                 itemid = PageGetItemId(page, toff);
 706                 ItemIdSetUnused(itemid);
 707                 unused[uncnt++] = toff;
 708         }
 709
 710         PageRepairFragmentation(page);
 711
 712         MarkBufferDirty(buffer);
 713
 714         /* XLOG stuff */
 715         if (!onerel->rd_istemp)
 716         {
 717                 XLogRecPtr      recptr;
 718
 719                 recptr = log_heap_clean(onerel, buffer,
 720                                                                 NULL, 0, NULL, 0,
 721                                                                 unused, uncnt,
 722                                                                 false);
 723                 PageSetLSN(page, recptr);
 724                 PageSetTLI(page, ThisTimeLineID);
 725         }
 726
 727         END_CRIT_SECTION();
 728
 729         return tupindex;
 730 }
 731
 732 /*
 733  *      lazy_vacuum_index() -- vacuum one index relation.
 734  *
 735  *              Delete all the index entries pointing to tuples listed in
 736  *              vacrelstats->dead_tuples, and update running statistics.
 737  */
 738 static void
 739 lazy_vacuum_index(Relation indrel,
 740                                   IndexBulkDeleteResult **stats,
 741                                   LVRelStats *vacrelstats)
 742 {
 743         IndexVacuumInfo ivinfo;
 744         PGRUsage        ru0;
 745
 746         pg_rusage_init(&ru0);
 747
 748         ivinfo.index = indrel;
 749         ivinfo.vacuum_full = false;
 750         ivinfo.message_level = elevel;
 751         /* We don't yet know rel_tuples, so pass -1 */
 752         ivinfo.num_heap_tuples = -1;
 753         ivinfo.strategy = vac_strategy;
 754
 755         /* Do bulk deletion */
 756         *stats = index_bulk_delete(&ivinfo, *stats,
 757                                                            lazy_tid_reaped, (void *) vacrelstats);
 758
 759         ereport(elevel,
 760                         (errmsg("scanned index \"%s\" to remove %d row versions",
 761                                         RelationGetRelationName(indrel),
 762                                         vacrelstats->num_dead_tuples),
 763                          errdetail("%s.", pg_rusage_show(&ru0))));
 764 }
 765
 766 /*
 767  *      lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
 768  */
 769 static void
 770 lazy_cleanup_index(Relation indrel,
 771                                    IndexBulkDeleteResult *stats,
 772                                    LVRelStats *vacrelstats)
 773 {
 774         IndexVacuumInfo ivinfo;
 775         PGRUsage        ru0;
 776
 777         pg_rusage_init(&ru0);
 778
 779         ivinfo.index = indrel;
 780         ivinfo.vacuum_full = false;
 781         ivinfo.message_level = elevel;
 782         ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
 783         ivinfo.strategy = vac_strategy;
 784
 785         stats = index_vacuum_cleanup(&ivinfo, stats);
 786
 787         if (!stats)
 788                 return;
 789
 790         /* now update statistics in pg_class */
 791         vac_update_relstats(RelationGetRelid(indrel),
 792                                                 stats->num_pages,
 793                                                 stats->num_index_tuples,
 794                                                 false, InvalidTransactionId);
 795
 796         ereport(elevel,
 797                         (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
 798                                         RelationGetRelationName(indrel),
 799                                         stats->num_index_tuples,
 800                                         stats->num_pages),
 801                          errdetail("%.0f index row versions were removed.\n"
 802                          "%u index pages have been deleted, %u are currently reusable.\n"
 803                                            "%s.",
 804                                            stats->tuples_removed,
 805                                            stats->pages_deleted, stats->pages_free,
 806                                            pg_rusage_show(&ru0))));
 807
 808         pfree(stats);
 809 }
 810
 811 /*
 812  * lazy_truncate_heap - try to truncate off any empty pages at the end
 813  */
 814 static void
 815 lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 816 {
 817         BlockNumber old_rel_pages = vacrelstats->rel_pages;
 818         BlockNumber new_rel_pages;
 819         FSMPageData *pageSpaces;
 820         int                     n;
 821         int                     i,
 822                                 j;
 823         PGRUsage        ru0;
 824
 825         pg_rusage_init(&ru0);
 826
 827         /*
 828          * We need full exclusive lock on the relation in order to do truncation.
 829          * If we can't get it, give up rather than waiting --- we don't want to
 830          * block other backends, and we don't want to deadlock (which is quite
 831          * possible considering we already hold a lower-grade lock).
 832          */
 833         if (!ConditionalLockRelation(onerel, AccessExclusiveLock))
 834                 return;
 835
 836         /*
 837          * Now that we have exclusive lock, look to see if the rel has grown
 838          * whilst we were vacuuming with non-exclusive lock.  If so, give up; the
 839          * newly added pages presumably contain non-deletable tuples.
 840          */
 841         new_rel_pages = RelationGetNumberOfBlocks(onerel);
 842         if (new_rel_pages != old_rel_pages)
 843         {
 844                 /* might as well use the latest news when we update pg_class stats */
 845                 vacrelstats->rel_pages = new_rel_pages;
 846                 UnlockRelation(onerel, AccessExclusiveLock);
 847                 return;
 848         }
 849
 850         /*
 851          * Scan backwards from the end to verify that the end pages actually
 852          * contain no tuples.  This is *necessary*, not optional, because other
 853          * backends could have added tuples to these pages whilst we were
 854          * vacuuming.
 855          */
 856         new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
 857
 858         if (new_rel_pages >= old_rel_pages)
 859         {
 860                 /* can't do anything after all */
 861                 UnlockRelation(onerel, AccessExclusiveLock);
 862                 return;
 863         }
 864
 865         /*
 866          * Okay to truncate.
 867          */
 868         RelationTruncate(onerel, new_rel_pages);
 869
 870         /*
 871          * Note: once we have truncated, we *must* keep the exclusive lock until
 872          * commit.      The sinval message that will be sent at commit (as a result of
 873          * vac_update_relstats()) must be received by other backends, to cause
 874          * them to reset their rd_targblock values, before they can safely access
 875          * the table again.
 876          */
 877
 878         /*
 879          * Drop free-space info for removed blocks; these must not get entered
 880          * into the FSM!
 881          */
 882         pageSpaces = vacrelstats->free_pages;
 883         n = vacrelstats->num_free_pages;
 884         j = 0;
 885         for (i = 0; i < n; i++)
 886         {
 887                 if (FSMPageGetPageNum(&pageSpaces[i]) < new_rel_pages)
 888                 {
 889                         pageSpaces[j] = pageSpaces[i];
 890                         j++;
 891                 }
 892         }
 893         vacrelstats->num_free_pages = j;
 894
 895         /*
 896          * If tot_free_pages was more than num_free_pages, we can't tell for sure
 897          * what its correct value is now, because we don't know which of the
 898          * forgotten pages are getting truncated.  Conservatively set it equal to
 899          * num_free_pages.
 900          */
 901         vacrelstats->tot_free_pages = j;
 902
 903         /* We destroyed the heap ordering, so mark array unordered */
 904         vacrelstats->fs_is_heap = false;
 905
 906         /* update statistics */
 907         vacrelstats->rel_pages = new_rel_pages;
 908         vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
 909
 910         ereport(elevel,
 911                         (errmsg("\"%s\": truncated %u to %u pages",
 912                                         RelationGetRelationName(onerel),
 913                                         old_rel_pages, new_rel_pages),
 914                          errdetail("%s.",
 915                                            pg_rusage_show(&ru0))));
 916 }
 917
 918 /*
 919  * Rescan end pages to verify that they are (still) empty of tuples.
 920  *
 921  * Returns number of nondeletable pages (last nonempty page + 1).
 922  */
 923 static BlockNumber
 924 count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 925 {
 926         BlockNumber blkno;
 927
 928         /* Strange coding of loop control is needed because blkno is unsigned */
 929         blkno = vacrelstats->rel_pages;
 930         while (blkno > vacrelstats->nonempty_pages)
 931         {
 932                 Buffer          buf;
 933                 Page            page;
 934                 OffsetNumber offnum,
 935                                         maxoff;
 936                 bool            hastup;
 937
 938                 /*
 939                  * We don't insert a vacuum delay point here, because we have an
 940                  * exclusive lock on the table which we want to hold for as short a
 941                  * time as possible.  We still need to check for interrupts however.
 942                  */
 943                 CHECK_FOR_INTERRUPTS();
 944
 945                 blkno--;
 946
 947                 buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
 948
 949                 /* In this phase we only need shared access to the buffer */
 950                 LockBuffer(buf, BUFFER_LOCK_SHARE);
 951
 952                 page = BufferGetPage(buf);
 953
 954                 if (PageIsNew(page) || PageIsEmpty(page))
 955                 {
 956                         /* PageIsNew probably shouldn't happen... */
 957                         UnlockReleaseBuffer(buf);
 958                         continue;
 959                 }
 960
 961                 hastup = false;
 962                 maxoff = PageGetMaxOffsetNumber(page);
 963                 for (offnum = FirstOffsetNumber;
 964                          offnum <= maxoff;
 965                          offnum = OffsetNumberNext(offnum))
 966                 {
 967                         ItemId          itemid;
 968
 969                         itemid = PageGetItemId(page, offnum);
 970
 971                         /*
 972                          * Note: any non-unused item should be taken as a reason to keep
 973                          * this page.  We formerly thought that DEAD tuples could be
 974                          * thrown away, but that's not so, because we'd not have cleaned
 975                          * out their index entries.
 976                          */
 977                         if (ItemIdIsUsed(itemid))
 978                         {
 979                                 hastup = true;
 980                                 break;                  /* can stop scanning */
 981                         }
 982                 }                                               /* scan along page */
 983
 984                 UnlockReleaseBuffer(buf);
 985
 986                 /* Done scanning if we found a tuple here */
 987                 if (hastup)
 988                         return blkno + 1;
 989         }
 990
 991         /*
 992          * If we fall out of the loop, all the previously-thought-to-be-empty
 993          * pages still are; we need not bother to look at the last known-nonempty
 994          * page.
 995          */
 996         return vacrelstats->nonempty_pages;
 997 }
 998
 999 /*
1000  * lazy_space_alloc - space allocation decisions for lazy vacuum
1001  *
1002  * See the comments at the head of this file for rationale.
1003  */
1004 static void
1005 lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
1006 {
1007         long            maxtuples;
1008         int                     maxpages;
1009
1010         if (vacrelstats->hasindex)
1011         {
1012                 maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData);
1013                 maxtuples = Min(maxtuples, INT_MAX);
1014                 maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
1015
1016                 /* curious coding here to ensure the multiplication can't overflow */
1017                 if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
1018                         maxtuples = relblocks * LAZY_ALLOC_TUPLES;
1019
1020                 /* stay sane if small maintenance_work_mem */
1021                 maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
1022         }
1023         else
1024         {
1025                 maxtuples = MaxHeapTuplesPerPage;
1026         }
1027
1028         vacrelstats->num_dead_tuples = 0;
1029         vacrelstats->max_dead_tuples = (int) maxtuples;
1030         vacrelstats->dead_tuples = (ItemPointer)
1031                 palloc(maxtuples * sizeof(ItemPointerData));
1032
1033         maxpages = MaxFSMPages;
1034         maxpages = Min(maxpages, MaxAllocSize / sizeof(FSMPageData));
1035         /* No need to allocate more pages than the relation has blocks */
1036         if (relblocks < (BlockNumber) maxpages)
1037                 maxpages = (int) relblocks;
1038
1039         vacrelstats->fs_is_heap = false;
1040         vacrelstats->num_free_pages = 0;
1041         vacrelstats->max_free_pages = maxpages;
1042         vacrelstats->free_pages = (FSMPageData *)
1043                 palloc(maxpages * sizeof(FSMPageData));
1044         vacrelstats->tot_free_pages = 0;
1045 }
1046
1047 /*
1048  * lazy_record_dead_tuple - remember one deletable tuple
1049  */
1050 static void
1051 lazy_record_dead_tuple(LVRelStats *vacrelstats,
1052                                            ItemPointer itemptr)
1053 {
1054         /*
1055          * The array shouldn't overflow under normal behavior, but perhaps it
1056          * could if we are given a really small maintenance_work_mem. In that
1057          * case, just forget the last few tuples (we'll get 'em next time).
1058          */
1059         if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
1060         {
1061                 vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
1062                 vacrelstats->num_dead_tuples++;
1063         }
1064 }
1065
1066 /*
1067  * lazy_record_free_space - remember free space on one page
1068  */
1069 static void
1070 lazy_record_free_space(LVRelStats *vacrelstats,
1071                                            BlockNumber page,
1072                                            Size avail)
1073 {
1074         FSMPageData *pageSpaces;
1075         int                     n;
1076
1077         /*
1078          * A page with less than stats->threshold free space will be forgotten
1079          * immediately, and never passed to the free space map.  Removing the
1080          * uselessly small entries early saves cycles, and in particular reduces
1081          * the amount of time we spend holding the FSM lock when we finally call
1082          * RecordRelationFreeSpace.  Since the FSM will probably drop pages with
1083          * little free space anyway, there's no point in making this really small.
1084          *
1085          * XXX Is it worth trying to measure average tuple size, and using that to
1086          * adjust the threshold?  Would be worthwhile if FSM has no stats yet for
1087          * this relation.  But changing the threshold as we scan the rel might
1088          * lead to bizarre behavior, too.  Also, it's probably better if vacuum.c
1089          * has the same thresholding behavior as we do here.
1090          */
1091         if (avail < vacrelstats->threshold)
1092                 return;
1093
1094         /* Count all pages over threshold, even if not enough space in array */
1095         vacrelstats->tot_free_pages++;
1096
1097         /* Copy pointers to local variables for notational simplicity */
1098         pageSpaces = vacrelstats->free_pages;
1099         n = vacrelstats->max_free_pages;
1100
1101         /* If we haven't filled the array yet, just keep adding entries */
1102         if (vacrelstats->num_free_pages < n)
1103         {
1104                 FSMPageSetPageNum(&pageSpaces[vacrelstats->num_free_pages], page);
1105                 FSMPageSetSpace(&pageSpaces[vacrelstats->num_free_pages], avail);
1106                 vacrelstats->num_free_pages++;
1107                 return;
1108         }
1109
1110         /*----------
1111          * The rest of this routine works with "heap" organization of the
1112          * free space arrays, wherein we maintain the heap property
1113          *                      avail[(j-1) div 2] <= avail[j]  for 0 < j < n.
1114          * In particular, the zero'th element always has the smallest available
1115          * space and can be discarded to make room for a new page with more space.
1116          * See Knuth's discussion of heap-based priority queues, sec 5.2.3;
1117          * but note he uses 1-origin array subscripts, not 0-origin.
1118          *----------
1119          */
1120
1121         /* If we haven't yet converted the array to heap organization, do it */
1122         if (!vacrelstats->fs_is_heap)
1123         {
1124                 /*
1125                  * Scan backwards through the array, "sift-up" each value into its
1126                  * correct position.  We can start the scan at n/2-1 since each entry
1127                  * above that position has no children to worry about.
1128                  */
1129                 int                     l = n / 2;
1130
1131                 while (--l >= 0)
1132                 {
1133                         BlockNumber R = FSMPageGetPageNum(&pageSpaces[l]);
1134                         Size            K = FSMPageGetSpace(&pageSpaces[l]);
1135                         int                     i;              /* i is where the "hole" is */
1136
1137                         i = l;
1138                         for (;;)
1139                         {
1140                                 int                     j = 2 * i + 1;
1141
1142                                 if (j >= n)
1143                                         break;
1144                                 if (j + 1 < n && FSMPageGetSpace(&pageSpaces[j]) > FSMPageGetSpace(&pageSpaces[j + 1]))
1145                                         j++;
1146                                 if (K <= FSMPageGetSpace(&pageSpaces[j]))
1147                                         break;
1148                                 pageSpaces[i] = pageSpaces[j];
1149                                 i = j;
1150                         }
1151                         FSMPageSetPageNum(&pageSpaces[i], R);
1152                         FSMPageSetSpace(&pageSpaces[i], K);
1153                 }
1154
1155                 vacrelstats->fs_is_heap = true;
1156         }
1157
1158         /* If new page has more than zero'th entry, insert it into heap */
1159         if (avail > FSMPageGetSpace(&pageSpaces[0]))
1160         {
1161                 /*
1162                  * Notionally, we replace the zero'th entry with the new data, and
1163                  * then sift-up to maintain the heap property.  Physically, the new
1164                  * data doesn't get stored into the arrays until we find the right
1165                  * location for it.
1166                  */
1167                 int                     i = 0;          /* i is where the "hole" is */
1168
1169                 for (;;)
1170                 {
1171                         int                     j = 2 * i + 1;
1172
1173                         if (j >= n)
1174                                 break;
1175                         if (j + 1 < n && FSMPageGetSpace(&pageSpaces[j]) > FSMPageGetSpace(&pageSpaces[j + 1]))
1176                                 j++;
1177                         if (avail <= FSMPageGetSpace(&pageSpaces[j]))
1178                                 break;
1179                         pageSpaces[i] = pageSpaces[j];
1180                         i = j;
1181                 }
1182                 FSMPageSetPageNum(&pageSpaces[i], page);
1183                 FSMPageSetSpace(&pageSpaces[i], avail);
1184         }
1185 }
1186
1187 /*
1188  *      lazy_tid_reaped() -- is a particular tid deletable?
1189  *
1190  *              This has the right signature to be an IndexBulkDeleteCallback.
1191  *
1192  *              Assumes dead_tuples array is in sorted order.
1193  */
1194 static bool
1195 lazy_tid_reaped(ItemPointer itemptr, void *state)
1196 {
1197         LVRelStats *vacrelstats = (LVRelStats *) state;
1198         ItemPointer res;
1199
1200         res = (ItemPointer) bsearch((void *) itemptr,
1201                                                                 (void *) vacrelstats->dead_tuples,
1202                                                                 vacrelstats->num_dead_tuples,
1203                                                                 sizeof(ItemPointerData),
1204                                                                 vac_cmp_itemptr);
1205
1206         return (res != NULL);
1207 }
1208
1209 /*
1210  * Update the shared Free Space Map with the info we now have about
1211  * free space in the relation, discarding any old info the map may have.
1212  */
1213 static void
1214 lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats)
1215 {
1216         FSMPageData *pageSpaces = vacrelstats->free_pages;
1217         int                     nPages = vacrelstats->num_free_pages;
1218
1219         /*
1220          * Sort data into order, as required by RecordRelationFreeSpace.
1221          */
1222         if (nPages > 1)
1223                 qsort(pageSpaces, nPages, sizeof(FSMPageData),
1224                           vac_cmp_page_spaces);
1225
1226         RecordRelationFreeSpace(&onerel->rd_node, vacrelstats->tot_free_pages,
1227                                                         nPages, pageSpaces);
1228 }
1229
1230 /*
1231  * Comparator routines for use with qsort() and bsearch().
1232  */
1233 static int
1234 vac_cmp_itemptr(const void *left, const void *right)
1235 {
1236         BlockNumber lblk,
1237                                 rblk;
1238         OffsetNumber loff,
1239                                 roff;
1240
1241         lblk = ItemPointerGetBlockNumber((ItemPointer) left);
1242         rblk = ItemPointerGetBlockNumber((ItemPointer) right);
1243
1244         if (lblk < rblk)
1245                 return -1;
1246         if (lblk > rblk)
1247                 return 1;
1248
1249         loff = ItemPointerGetOffsetNumber((ItemPointer) left);
1250         roff = ItemPointerGetOffsetNumber((ItemPointer) right);
1251
1252         if (loff < roff)
1253                 return -1;
1254         if (loff > roff)
1255                 return 1;
1256
1257         return 0;
1258 }
1259
1260 static int
1261 vac_cmp_page_spaces(const void *left, const void *right)
1262 {
1263         FSMPageData *linfo = (FSMPageData *) left;
1264         FSMPageData *rinfo = (FSMPageData *) right;
1265         BlockNumber     lblkno = FSMPageGetPageNum(linfo);
1266         BlockNumber     rblkno = FSMPageGetPageNum(rinfo);
1267
1268         if (lblkno < rblkno)
1269                 return -1;
1270         else if (lblkno > rblkno)
1271                 return 1;
1272         return 0;
1273 }