1 /*-------------------------------------------------------------------------
5 * PostgreSQL Integrated Autovacuum Daemon
7 * The autovacuum system is structured in two different kinds of processes: the
8 * autovacuum launcher and the autovacuum worker. The launcher is an
9 * always-running process, started by the postmaster when the autovacuum GUC
10 * parameter is set. The launcher schedules autovacuum workers to be started
11 * when appropriate. The workers are the processes which execute the actual
12 * vacuuming; they connect to a database as determined in the launcher, and
13 * once connected they examine the catalogs to select the tables to vacuum.
15 * The autovacuum launcher cannot start the worker processes by itself,
16 * because doing so would cause robustness issues (namely, failure to shut
17 * them down on exceptional conditions, and also, since the launcher is
18 * connected to shared memory and is thus subject to corruption there, it is
19 * not as robust as the postmaster). So it leaves that task to the postmaster.
21 * There is an autovacuum shared memory area, where the launcher stores
22 * information about the database it wants vacuumed. When it wants a new
23 * worker to start, it sets a flag in shared memory and sends a signal to the
24 * postmaster. Then postmaster knows nothing more than it must start a worker;
25 * so it forks a new child, which turns into a worker. This new process
26 * connects to shared memory, and there it can inspect the information that the
27 * launcher has set up.
29 * If the fork() call fails in the postmaster, it sets a flag in the shared
30 * memory area, and sends a signal to the launcher. The launcher, upon
31 * noticing the flag, can try starting the worker again by resending the
32 * signal. Note that the failure can only be transient (fork failure due to
33 * high load, memory pressure, too many processes, etc); more permanent
34 * problems, like failure to connect to a database, are detected later in the
35 * worker and dealt with just by having the worker exit normally. The launcher
36 * will launch a new worker again later, per schedule.
38 * When the worker is done vacuuming it sends SIGUSR1 to the launcher. The
39 * launcher then wakes up and is able to launch another worker, if the schedule
40 * is so tight that a new worker is needed immediately. At this time the
41 * launcher can also balance the settings for the various remaining workers'
42 * cost-based vacuum delay feature.
44 * Note that there can be more than one worker in a database concurrently.
45 * They will store the table they are currently vacuuming in shared memory, so
46 * that other workers avoid being blocked waiting for the vacuum lock for that
47 * table. They will also reload the pgstats data just before vacuuming each
48 * table, to avoid vacuuming a table that was just finished being vacuumed by
49 * another worker and thus is no longer noted in shared memory. However,
50 * there is a window (caused by pgstat delay) on which a worker may choose a
51 * table that was already vacuumed; this is a bug in the current design.
53 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
54 * Portions Copyright (c) 1994, Regents of the University of California
60 *-------------------------------------------------------------------------
65 #include <sys/types.h>
70 #include "access/genam.h"
71 #include "access/heapam.h"
72 #include "access/transam.h"
73 #include "access/xact.h"
74 #include "catalog/dependency.h"
75 #include "catalog/indexing.h"
76 #include "catalog/namespace.h"
77 #include "catalog/pg_autovacuum.h"
78 #include "catalog/pg_database.h"
79 #include "commands/dbcommands.h"
80 #include "commands/vacuum.h"
81 #include "libpq/hba.h"
82 #include "libpq/pqsignal.h"
83 #include "miscadmin.h"
85 #include "postmaster/autovacuum.h"
86 #include "postmaster/fork_process.h"
87 #include "postmaster/postmaster.h"
88 #include "storage/bufmgr.h"
89 #include "storage/fd.h"
90 #include "storage/ipc.h"
91 #include "storage/pmsignal.h"
92 #include "storage/proc.h"
93 #include "storage/procarray.h"
94 #include "storage/sinvaladt.h"
95 #include "tcop/tcopprot.h"
96 #include "utils/dynahash.h"
97 #include "utils/flatfiles.h"
98 #include "utils/fmgroids.h"
99 #include "utils/lsyscache.h"
100 #include "utils/memutils.h"
101 #include "utils/ps_status.h"
102 #include "utils/syscache.h"
103 #include "utils/tqual.h"
109 bool autovacuum_start_daemon
= false;
110 int autovacuum_max_workers
;
111 int autovacuum_naptime
;
112 int autovacuum_vac_thresh
;
113 double autovacuum_vac_scale
;
114 int autovacuum_anl_thresh
;
115 double autovacuum_anl_scale
;
116 int autovacuum_freeze_max_age
;
118 int autovacuum_vac_cost_delay
;
119 int autovacuum_vac_cost_limit
;
121 int Log_autovacuum_min_duration
= -1;
123 /* how long to keep pgstat data in the launcher, in milliseconds */
124 #define STATS_READ_DELAY 1000
127 /* Flags to tell if we are in an autovacuum process */
128 static bool am_autovacuum_launcher
= false;
129 static bool am_autovacuum_worker
= false;
131 /* Flags set by signal handlers */
132 static volatile sig_atomic_t got_SIGHUP
= false;
133 static volatile sig_atomic_t got_SIGUSR1
= false;
134 static volatile sig_atomic_t got_SIGTERM
= false;
136 /* Comparison point for determining whether freeze_max_age is exceeded */
137 static TransactionId recentXid
;
139 /* Default freeze_min_age to use for autovacuum (varies by database) */
140 static int default_freeze_min_age
;
142 /* Memory context for long-lived data */
143 static MemoryContext AutovacMemCxt
;
145 /* struct to keep track of databases in launcher */
146 typedef struct avl_dbase
148 Oid adl_datid
; /* hash key -- must be first */
149 TimestampTz adl_next_worker
;
153 /* struct to keep track of databases in worker */
154 typedef struct avw_dbase
158 TransactionId adw_frozenxid
;
159 PgStat_StatDBEntry
*adw_entry
;
162 /* struct to keep track of tables to vacuum and/or analyze, in 1st pass */
163 typedef struct av_relation
165 Oid ar_toastrelid
; /* hash key - must be first */
169 /* struct to keep track of tables to vacuum and/or analyze, after rechecking */
170 typedef struct autovac_table
176 int at_freeze_min_age
;
177 int at_vacuum_cost_delay
;
178 int at_vacuum_cost_limit
;
186 * This struct holds information about a single worker's whereabouts. We keep
187 * an array of these in shared memory, sized according to
188 * autovacuum_max_workers.
190 * wi_links entry into free list or running list
191 * wi_dboid OID of the database this worker is supposed to work on
192 * wi_tableoid OID of the table currently being vacuumed
193 * wi_proc pointer to PGPROC of the running worker, NULL if not started
194 * wi_launchtime Time at which this worker was launched
195 * wi_cost_* Vacuum cost-based delay parameters current in this worker
197 * All fields are protected by AutovacuumLock, except for wi_tableoid which is
198 * protected by AutovacuumScheduleLock (which is read-only for everyone except
199 * that worker itself).
202 typedef struct WorkerInfoData
208 TimestampTz wi_launchtime
;
211 int wi_cost_limit_base
;
214 typedef struct WorkerInfoData
*WorkerInfo
;
217 * Possible signals received by the launcher from remote processes. These are
218 * stored atomically in shared memory so that other processes can set them
223 AutoVacForkFailed
, /* failed trying to start a worker */
224 AutoVacRebalance
, /* rebalance the cost limits */
225 AutoVacNumSignals
= AutoVacRebalance
/* must be last */
229 * The main autovacuum shmem struct. On shared memory we store this main
230 * struct and the array of WorkerInfo structs. This struct keeps:
232 * av_signal set by other processes to indicate various conditions
233 * av_launcherpid the PID of the autovacuum launcher
234 * av_freeWorkers the WorkerInfo freelist
235 * av_runningWorkers the WorkerInfo non-free queue
236 * av_startingWorker pointer to WorkerInfo currently being started (cleared by
237 * the worker itself as soon as it's up and running)
239 * This struct is protected by AutovacuumLock, except for av_signal and parts
240 * of the worker list (see above).
245 sig_atomic_t av_signal
[AutoVacNumSignals
];
246 pid_t av_launcherpid
;
247 SHMEM_OFFSET av_freeWorkers
;
248 SHM_QUEUE av_runningWorkers
;
249 SHMEM_OFFSET av_startingWorker
;
250 } AutoVacuumShmemStruct
;
252 static AutoVacuumShmemStruct
*AutoVacuumShmem
;
254 /* the database list in the launcher, and the context that contains it */
255 static Dllist
*DatabaseList
= NULL
;
256 static MemoryContext DatabaseListCxt
= NULL
;
258 /* Pointer to my own WorkerInfo, valid on each worker */
259 static WorkerInfo MyWorkerInfo
= NULL
;
261 /* PID of launcher, valid only in worker while shutting down */
262 int AutovacuumLauncherPid
= 0;
265 static pid_t
avlauncher_forkexec(void);
266 static pid_t
avworker_forkexec(void);
268 NON_EXEC_STATIC
void AutoVacWorkerMain(int argc
, char *argv
[]);
269 NON_EXEC_STATIC
void AutoVacLauncherMain(int argc
, char *argv
[]);
271 static Oid
do_start_worker(void);
272 static void launcher_determine_sleep(bool canlaunch
, bool recursing
,
273 struct timeval
* nap
);
274 static void launch_worker(TimestampTz now
);
275 static List
*get_database_list(void);
276 static void rebuild_database_list(Oid newdb
);
277 static int db_comparator(const void *a
, const void *b
);
278 static void autovac_balance_cost(void);
280 static void do_autovacuum(void);
281 static void FreeWorkerInfo(int code
, Datum arg
);
283 static autovac_table
*table_recheck_autovac(Oid relid
, HTAB
*table_toast_map
);
284 static void relation_needs_vacanalyze(Oid relid
, Form_pg_autovacuum avForm
,
285 Form_pg_class classForm
,
286 PgStat_StatTabEntry
*tabentry
, bool *dovacuum
,
287 bool *doanalyze
, bool *wraparound
);
289 static void autovacuum_do_vac_analyze(autovac_table
*tab
,
290 BufferAccessStrategy bstrategy
);
291 static HeapTuple
get_pg_autovacuum_tuple_relid(Relation avRel
, Oid relid
,
292 HTAB
*table_toast_map
);
293 static PgStat_StatTabEntry
*get_pgstat_tabentry_relid(Oid relid
, bool isshared
,
294 PgStat_StatDBEntry
*shared
,
295 PgStat_StatDBEntry
*dbentry
);
296 static void autovac_report_activity(autovac_table
*tab
);
297 static void avl_sighup_handler(SIGNAL_ARGS
);
298 static void avl_sigusr1_handler(SIGNAL_ARGS
);
299 static void avl_sigterm_handler(SIGNAL_ARGS
);
300 static void avl_quickdie(SIGNAL_ARGS
);
301 static void autovac_refresh_stats(void);
305 /********************************************************************
306 * AUTOVACUUM LAUNCHER CODE
307 ********************************************************************/
311 * forkexec routine for the autovacuum launcher process.
313 * Format up the arglist, then fork and exec.
316 avlauncher_forkexec(void)
321 av
[ac
++] = "postgres";
322 av
[ac
++] = "--forkavlauncher";
323 av
[ac
++] = NULL
; /* filled in by postmaster_forkexec */
326 Assert(ac
< lengthof(av
));
328 return postmaster_forkexec(ac
, av
);
332 * We need this set from the outside, before InitProcess is called
335 AutovacuumLauncherIAm(void)
337 am_autovacuum_launcher
= true;
342 * Main entry point for autovacuum launcher process, to be called from the
346 StartAutoVacLauncher(void)
351 switch ((AutoVacPID
= avlauncher_forkexec()))
353 switch ((AutoVacPID
= fork_process()))
358 (errmsg("could not fork autovacuum launcher process: %m")));
363 /* in postmaster child ... */
364 /* Close the postmaster's sockets */
365 ClosePostmasterPorts(false);
367 /* Lose the postmaster's on-exit routines */
370 AutoVacLauncherMain(0, NULL
);
374 return (int) AutoVacPID
;
377 /* shouldn't get here */
382 * Main loop for the autovacuum launcher process.
385 AutoVacLauncherMain(int argc
, char *argv
[])
387 sigjmp_buf local_sigjmp_buf
;
389 /* we are a postmaster subprocess now */
390 IsUnderPostmaster
= true;
391 am_autovacuum_launcher
= true;
393 /* reset MyProcPid */
394 MyProcPid
= getpid();
396 /* record Start Time for logging */
397 MyStartTime
= time(NULL
);
399 /* Identify myself via ps */
400 init_ps_display("autovacuum launcher process", "", "", "");
403 pg_usleep(PostAuthDelay
* 1000000L);
405 SetProcessingMode(InitProcessing
);
408 * If possible, make this process a group leader, so that the postmaster
409 * can signal any child processes too. (autovacuum probably never has any
410 * child processes, but for consistency we make all postmaster child
411 * processes do this.)
415 elog(FATAL
, "setsid() failed: %m");
419 * Set up signal handlers. Since this is an auxiliary process, it has
420 * particular signal requirements -- no deadlock checker or sinval
421 * catchup, for example.
423 pqsignal(SIGHUP
, avl_sighup_handler
);
425 pqsignal(SIGINT
, SIG_IGN
);
426 pqsignal(SIGTERM
, avl_sigterm_handler
);
427 pqsignal(SIGQUIT
, avl_quickdie
);
428 pqsignal(SIGALRM
, SIG_IGN
);
430 pqsignal(SIGPIPE
, SIG_IGN
);
431 pqsignal(SIGUSR1
, avl_sigusr1_handler
);
432 /* We don't listen for async notifies */
433 pqsignal(SIGUSR2
, SIG_IGN
);
434 pqsignal(SIGFPE
, FloatExceptionHandler
);
435 pqsignal(SIGCHLD
, SIG_DFL
);
437 /* Early initialization */
441 * Create a per-backend PGPROC struct in shared memory, except in the
442 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
443 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
444 * had to do some stuff with LWLocks).
447 InitAuxiliaryProcess();
451 * Create a memory context that we will do all our work in. We do this so
452 * that we can reset the context during error recovery and thereby avoid
453 * possible memory leaks.
455 AutovacMemCxt
= AllocSetContextCreate(TopMemoryContext
,
456 "Autovacuum Launcher",
457 ALLOCSET_DEFAULT_MINSIZE
,
458 ALLOCSET_DEFAULT_INITSIZE
,
459 ALLOCSET_DEFAULT_MAXSIZE
);
460 MemoryContextSwitchTo(AutovacMemCxt
);
464 * If an exception is encountered, processing resumes here.
466 * This code is heavily based on bgwriter.c, q.v.
468 if (sigsetjmp(local_sigjmp_buf
, 1) != 0)
470 /* since not using PG_TRY, must reset error stack by hand */
471 error_context_stack
= NULL
;
473 /* Prevents interrupts while cleaning up */
476 /* Report the error to the server log */
480 * These operations are really just a minimal subset of
481 * AbortTransaction(). We don't have very many resources to worry
482 * about, but we do have LWLocks.
486 AtEOXact_HashTables(false);
489 * Now return to normal top-level context and clear ErrorContext for
492 MemoryContextSwitchTo(AutovacMemCxt
);
495 /* Flush any leaked data in the top-level context */
496 MemoryContextResetAndDeleteChildren(AutovacMemCxt
);
498 /* don't leave dangling pointers to freed memory */
499 DatabaseListCxt
= NULL
;
503 * Make sure pgstat also considers our stat data as gone. Note: we
504 * mustn't use autovac_refresh_stats here.
506 pgstat_clear_snapshot();
508 /* Now we can allow interrupts again */
512 * Sleep at least 1 second after any error. We don't want to be
513 * filling the error logs as fast as we can.
518 /* We can now handle ereport(ERROR) */
519 PG_exception_stack
= &local_sigjmp_buf
;
522 (errmsg("autovacuum launcher started")));
524 /* must unblock signals before calling rebuild_database_list */
525 PG_SETMASK(&UnBlockSig
);
527 /* in emergency mode, just start a worker and go away */
528 if (!AutoVacuumingActive())
531 proc_exit(0); /* done */
534 AutoVacuumShmem
->av_launcherpid
= MyProcPid
;
537 * Create the initial database list. The invariant we want this list to
538 * keep is that it's ordered by decreasing next_time. As soon as an entry
539 * is updated to a higher time, it will be moved to the front (which is
540 * correct because the only operation is to add autovacuum_naptime to the
541 * entry, and time always increases).
543 rebuild_database_list(InvalidOid
);
548 TimestampTz current_time
= 0;
553 * Emergency bailout if postmaster has died. This is to avoid the
554 * necessity for manual cleanup of all postmaster children.
556 if (!PostmasterIsAlive(true))
559 launcher_determine_sleep(AutoVacuumShmem
->av_freeWorkers
!=
560 INVALID_OFFSET
, false, &nap
);
563 * Sleep for a while according to schedule.
565 * On some platforms, signals won't interrupt the sleep. To ensure we
566 * respond reasonably promptly when someone signals us, break down the
567 * sleep into 1-second increments, and check for interrupts after each
570 while (nap
.tv_sec
> 0 || nap
.tv_usec
> 0)
581 sleeptime
= nap
.tv_usec
;
584 pg_usleep(sleeptime
);
587 * Emergency bailout if postmaster has died. This is to avoid the
588 * necessity for manual cleanup of all postmaster children.
590 if (!PostmasterIsAlive(true))
593 if (got_SIGTERM
|| got_SIGHUP
|| got_SIGUSR1
)
597 /* the normal shutdown case */
604 ProcessConfigFile(PGC_SIGHUP
);
606 /* shutdown requested in config file */
607 if (!AutoVacuumingActive())
610 /* rebalance in case the default cost parameters changed */
611 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
612 autovac_balance_cost();
613 LWLockRelease(AutovacuumLock
);
615 /* rebuild the list in case the naptime changed */
616 rebuild_database_list(InvalidOid
);
620 * a worker finished, or postmaster signalled failure to start a
627 /* rebalance cost limits, if needed */
628 if (AutoVacuumShmem
->av_signal
[AutoVacRebalance
])
630 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
631 AutoVacuumShmem
->av_signal
[AutoVacRebalance
] = false;
632 autovac_balance_cost();
633 LWLockRelease(AutovacuumLock
);
636 if (AutoVacuumShmem
->av_signal
[AutoVacForkFailed
])
639 * If the postmaster failed to start a new worker, we sleep
640 * for a little while and resend the signal. The new worker's
641 * state is still in memory, so this is sufficient. After
642 * that, we restart the main loop.
644 * XXX should we put a limit to the number of times we retry?
645 * I don't think it makes much sense, because a future start
646 * of a worker will continue to fail in the same way.
648 AutoVacuumShmem
->av_signal
[AutoVacForkFailed
] = false;
649 pg_usleep(100000L); /* 100ms */
650 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER
);
656 * There are some conditions that we need to check before trying to
657 * start a launcher. First, we need to make sure that there is a
658 * launcher slot available. Second, we need to make sure that no
659 * other worker failed while starting up.
662 current_time
= GetCurrentTimestamp();
663 LWLockAcquire(AutovacuumLock
, LW_SHARED
);
665 can_launch
= (AutoVacuumShmem
->av_freeWorkers
!= INVALID_OFFSET
);
667 if (AutoVacuumShmem
->av_startingWorker
!= INVALID_OFFSET
)
671 WorkerInfo worker
= (WorkerInfo
) MAKE_PTR(AutoVacuumShmem
->av_startingWorker
);
674 * We can't launch another worker when another one is still
675 * starting up (or failed while doing so), so just sleep for a bit
676 * more; that worker will wake us up again as soon as it's ready.
677 * We will only wait autovacuum_naptime seconds (up to a maximum
678 * of 60 seconds) for this to happen however. Note that failure
679 * to connect to a particular database is not a problem here,
680 * because the worker removes itself from the startingWorker
681 * pointer before trying to connect. Problems detected by the
682 * postmaster (like fork() failure) are also reported and handled
683 * differently. The only problems that may cause this code to
684 * fire are errors in the earlier sections of AutoVacWorkerMain,
685 * before the worker removes the WorkerInfo from the
686 * startingWorker pointer.
688 waittime
= Min(autovacuum_naptime
, 60) * 1000;
689 if (TimestampDifferenceExceeds(worker
->wi_launchtime
, current_time
,
692 LWLockRelease(AutovacuumLock
);
693 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
696 * No other process can put a worker in starting mode, so if
697 * startingWorker is still INVALID after exchanging our lock,
698 * we assume it's the same one we saw above (so we don't
699 * recheck the launch time).
701 if (AutoVacuumShmem
->av_startingWorker
!= INVALID_OFFSET
)
703 worker
= (WorkerInfo
) MAKE_PTR(AutoVacuumShmem
->av_startingWorker
);
704 worker
->wi_dboid
= InvalidOid
;
705 worker
->wi_tableoid
= InvalidOid
;
706 worker
->wi_proc
= NULL
;
707 worker
->wi_launchtime
= 0;
708 worker
->wi_links
.next
= AutoVacuumShmem
->av_freeWorkers
;
709 AutoVacuumShmem
->av_freeWorkers
= MAKE_OFFSET(worker
);
710 AutoVacuumShmem
->av_startingWorker
= INVALID_OFFSET
;
711 elog(WARNING
, "worker took too long to start; cancelled");
717 LWLockRelease(AutovacuumLock
); /* either shared or exclusive */
719 /* if we can't do anything, just go back to sleep */
723 /* We're OK to start a new worker */
725 elem
= DLGetTail(DatabaseList
);
728 avl_dbase
*avdb
= DLE_VAL(elem
);
731 * launch a worker if next_worker is right now or it is in the
734 if (TimestampDifferenceExceeds(avdb
->adl_next_worker
,
736 launch_worker(current_time
);
741 * Special case when the list is empty: start a worker right away.
742 * This covers the initial case, when no database is in pgstats
743 * (thus the list is empty). Note that the constraints in
744 * launcher_determine_sleep keep us from starting workers too
745 * quickly (at most once every autovacuum_naptime when the list is
748 launch_worker(current_time
);
752 /* Normal exit from the autovac launcher is here */
754 (errmsg("autovacuum launcher shutting down")));
755 AutoVacuumShmem
->av_launcherpid
= 0;
757 proc_exit(0); /* done */
761 * Determine the time to sleep, based on the database list.
763 * The "canlaunch" parameter indicates whether we can start a worker right now,
764 * for example due to the workers being all busy. If this is false, we will
765 * cause a long sleep, which will be interrupted when a worker exits.
768 launcher_determine_sleep(bool canlaunch
, bool recursing
, struct timeval
* nap
)
773 * We sleep until the next scheduled vacuum. We trust that when the
774 * database list was built, care was taken so that no entries have times
775 * in the past; if the first entry has too close a next_worker value, or a
776 * time in the past, we will sleep a small nominal time.
780 nap
->tv_sec
= autovacuum_naptime
;
783 else if ((elem
= DLGetTail(DatabaseList
)) != NULL
)
785 avl_dbase
*avdb
= DLE_VAL(elem
);
786 TimestampTz current_time
= GetCurrentTimestamp();
787 TimestampTz next_wakeup
;
791 next_wakeup
= avdb
->adl_next_worker
;
792 TimestampDifference(current_time
, next_wakeup
, &secs
, &usecs
);
795 nap
->tv_usec
= usecs
;
799 /* list is empty, sleep for whole autovacuum_naptime seconds */
800 nap
->tv_sec
= autovacuum_naptime
;
805 * If the result is exactly zero, it means a database had an entry with
806 * time in the past. Rebuild the list so that the databases are evenly
807 * distributed again, and recalculate the time to sleep. This can happen
808 * if there are more tables needing vacuum than workers, and they all take
809 * longer to vacuum than autovacuum_naptime.
811 * We only recurse once. rebuild_database_list should always return times
812 * in the future, but it seems best not to trust too much on that.
814 if (nap
->tv_sec
== 0 && nap
->tv_usec
== 0 && !recursing
)
816 rebuild_database_list(InvalidOid
);
817 launcher_determine_sleep(canlaunch
, true, nap
);
821 /* 100ms is the smallest time we'll allow the launcher to sleep */
822 if (nap
->tv_sec
<= 0 && nap
->tv_usec
<= 100000)
825 nap
->tv_usec
= 100000; /* 100 ms */
830 * Build an updated DatabaseList. It must only contain databases that appear
831 * in pgstats, and must be sorted by next_worker from highest to lowest,
832 * distributed regularly across the next autovacuum_naptime interval.
834 * Receives the Oid of the database that made this list be generated (we call
835 * this the "new" database, because when the database was already present on
836 * the list, we expect that this function is not called at all). The
837 * preexisting list, if any, will be used to preserve the order of the
838 * databases in the autovacuum_naptime period. The new database is put at the
839 * end of the interval. The actual values are not saved, which should not be
843 rebuild_database_list(Oid newdb
)
847 MemoryContext newcxt
;
848 MemoryContext oldcxt
;
849 MemoryContext tmpcxt
;
855 /* use fresh stats */
856 autovac_refresh_stats();
858 newcxt
= AllocSetContextCreate(AutovacMemCxt
,
860 ALLOCSET_DEFAULT_MINSIZE
,
861 ALLOCSET_DEFAULT_INITSIZE
,
862 ALLOCSET_DEFAULT_MAXSIZE
);
863 tmpcxt
= AllocSetContextCreate(newcxt
,
865 ALLOCSET_DEFAULT_MINSIZE
,
866 ALLOCSET_DEFAULT_INITSIZE
,
867 ALLOCSET_DEFAULT_MAXSIZE
);
868 oldcxt
= MemoryContextSwitchTo(tmpcxt
);
871 * Implementing this is not as simple as it sounds, because we need to put
872 * the new database at the end of the list; next the databases that were
873 * already on the list, and finally (at the tail of the list) all the
874 * other databases that are not on the existing list.
876 * To do this, we build an empty hash table of scored databases. We will
877 * start with the lowest score (zero) for the new database, then
878 * increasing scores for the databases in the existing list, in order, and
879 * lastly increasing scores for all databases gotten via
880 * get_database_list() that are not already on the hash.
882 * Then we will put all the hash elements into an array, sort the array by
883 * score, and finally put the array elements into the new doubly linked
886 hctl
.keysize
= sizeof(Oid
);
887 hctl
.entrysize
= sizeof(avl_dbase
);
888 hctl
.hash
= oid_hash
;
890 dbhash
= hash_create("db hash", 20, &hctl
, /* magic number here FIXME */
891 HASH_ELEM
| HASH_FUNCTION
| HASH_CONTEXT
);
893 /* start by inserting the new database */
895 if (OidIsValid(newdb
))
898 PgStat_StatDBEntry
*entry
;
900 /* only consider this database if it has a pgstat entry */
901 entry
= pgstat_fetch_stat_dbentry(newdb
);
904 /* we assume it isn't found because the hash was just created */
905 db
= hash_search(dbhash
, &newdb
, HASH_ENTER
, NULL
);
907 /* hash_search already filled in the key */
908 db
->adl_score
= score
++;
909 /* next_worker is filled in later */
913 /* Now insert the databases from the existing list */
914 if (DatabaseList
!= NULL
)
918 elem
= DLGetHead(DatabaseList
);
921 avl_dbase
*avdb
= DLE_VAL(elem
);
924 PgStat_StatDBEntry
*entry
;
926 elem
= DLGetSucc(elem
);
929 * skip databases with no stat entries -- in particular, this gets
930 * rid of dropped databases
932 entry
= pgstat_fetch_stat_dbentry(avdb
->adl_datid
);
936 db
= hash_search(dbhash
, &(avdb
->adl_datid
), HASH_ENTER
, &found
);
940 /* hash_search already filled in the key */
941 db
->adl_score
= score
++;
942 /* next_worker is filled in later */
947 /* finally, insert all qualifying databases not previously inserted */
948 dblist
= get_database_list();
949 foreach(cell
, dblist
)
951 avw_dbase
*avdb
= lfirst(cell
);
954 PgStat_StatDBEntry
*entry
;
956 /* only consider databases with a pgstat entry */
957 entry
= pgstat_fetch_stat_dbentry(avdb
->adw_datid
);
961 db
= hash_search(dbhash
, &(avdb
->adw_datid
), HASH_ENTER
, &found
);
962 /* only update the score if the database was not already on the hash */
965 /* hash_search already filled in the key */
966 db
->adl_score
= score
++;
967 /* next_worker is filled in later */
972 /* from here on, the allocated memory belongs to the new list */
973 MemoryContextSwitchTo(newcxt
);
974 DatabaseList
= DLNewList();
978 TimestampTz current_time
;
979 int millis_increment
;
985 /* put all the hash elements into an array */
986 dbary
= palloc(nelems
* sizeof(avl_dbase
));
989 hash_seq_init(&seq
, dbhash
);
990 while ((db
= hash_seq_search(&seq
)) != NULL
)
991 memcpy(&(dbary
[i
++]), db
, sizeof(avl_dbase
));
994 qsort(dbary
, nelems
, sizeof(avl_dbase
), db_comparator
);
996 /* this is the time interval between databases in the schedule */
997 millis_increment
= 1000.0 * autovacuum_naptime
/ nelems
;
998 current_time
= GetCurrentTimestamp();
1001 * move the elements from the array into the dllist, setting the
1002 * next_worker while walking the array
1004 for (i
= 0; i
< nelems
; i
++)
1006 avl_dbase
*db
= &(dbary
[i
]);
1009 current_time
= TimestampTzPlusMilliseconds(current_time
,
1011 db
->adl_next_worker
= current_time
;
1013 elem
= DLNewElem(db
);
1014 /* later elements should go closer to the head of the list */
1015 DLAddHead(DatabaseList
, elem
);
1019 /* all done, clean up memory */
1020 if (DatabaseListCxt
!= NULL
)
1021 MemoryContextDelete(DatabaseListCxt
);
1022 MemoryContextDelete(tmpcxt
);
1023 DatabaseListCxt
= newcxt
;
1024 MemoryContextSwitchTo(oldcxt
);
1027 /* qsort comparator for avl_dbase, using adl_score */
1029 db_comparator(const void *a
, const void *b
)
1031 if (((avl_dbase
*) a
)->adl_score
== ((avl_dbase
*) b
)->adl_score
)
1034 return (((avl_dbase
*) a
)->adl_score
< ((avl_dbase
*) b
)->adl_score
) ? 1 : -1;
1040 * Bare-bones procedure for starting an autovacuum worker from the launcher.
1041 * It determines what database to work on, sets up shared memory stuff and
1042 * signals postmaster to start the worker. It fails gracefully if invoked when
1043 * autovacuum_workers are already active.
1045 * Return value is the OID of the database that the worker is going to process,
1046 * or InvalidOid if no worker was actually started.
1049 do_start_worker(void)
1053 TransactionId xidForceLimit
;
1056 TimestampTz current_time
;
1057 bool skipit
= false;
1058 Oid retval
= InvalidOid
;
1059 MemoryContext tmpcxt
,
1062 /* return quickly when there are no free workers */
1063 LWLockAcquire(AutovacuumLock
, LW_SHARED
);
1064 if (AutoVacuumShmem
->av_freeWorkers
== INVALID_OFFSET
)
1066 LWLockRelease(AutovacuumLock
);
1069 LWLockRelease(AutovacuumLock
);
1072 * Create and switch to a temporary context to avoid leaking the memory
1073 * allocated for the database list.
1075 tmpcxt
= AllocSetContextCreate(CurrentMemoryContext
,
1076 "Start worker tmp cxt",
1077 ALLOCSET_DEFAULT_MINSIZE
,
1078 ALLOCSET_DEFAULT_INITSIZE
,
1079 ALLOCSET_DEFAULT_MAXSIZE
);
1080 oldcxt
= MemoryContextSwitchTo(tmpcxt
);
1082 /* use fresh stats */
1083 autovac_refresh_stats();
1085 /* Get a list of databases */
1086 dblist
= get_database_list();
1089 * Determine the oldest datfrozenxid/relfrozenxid that we will allow to
1090 * pass without forcing a vacuum. (This limit can be tightened for
1091 * particular tables, but not loosened.)
1093 recentXid
= ReadNewTransactionId();
1094 xidForceLimit
= recentXid
- autovacuum_freeze_max_age
;
1095 /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
1096 if (xidForceLimit
< FirstNormalTransactionId
)
1097 xidForceLimit
-= FirstNormalTransactionId
;
1100 * Choose a database to connect to. We pick the database that was least
1101 * recently auto-vacuumed, or one that needs vacuuming to prevent Xid
1102 * wraparound-related data loss. If any db at risk of wraparound is
1103 * found, we pick the one with oldest datfrozenxid, independently of
1106 * Note that a database with no stats entry is not considered, except for
1107 * Xid wraparound purposes. The theory is that if no one has ever
1108 * connected to it since the stats were last initialized, it doesn't need
1111 * XXX This could be improved if we had more info about whether it needs
1112 * vacuuming before connecting to it. Perhaps look through the pgstats
1113 * data for the database's tables? One idea is to keep track of the
1114 * number of new and dead tuples per database in pgstats. However it
1115 * isn't clear how to construct a metric that measures that and not cause
1116 * starvation for less busy databases.
1119 for_xid_wrap
= false;
1120 current_time
= GetCurrentTimestamp();
1121 foreach(cell
, dblist
)
1123 avw_dbase
*tmp
= lfirst(cell
);
1126 /* Check to see if this one is at risk of wraparound */
1127 if (TransactionIdPrecedes(tmp
->adw_frozenxid
, xidForceLimit
))
1130 TransactionIdPrecedes(tmp
->adw_frozenxid
, avdb
->adw_frozenxid
))
1132 for_xid_wrap
= true;
1135 else if (for_xid_wrap
)
1136 continue; /* ignore not-at-risk DBs */
1138 /* Find pgstat entry if any */
1139 tmp
->adw_entry
= pgstat_fetch_stat_dbentry(tmp
->adw_datid
);
1142 * Skip a database with no pgstat entry; it means it hasn't seen any
1145 if (!tmp
->adw_entry
)
1149 * Also, skip a database that appears on the database list as having
1150 * been processed recently (less than autovacuum_naptime seconds ago).
1151 * We do this so that we don't select a database which we just
1152 * selected, but that pgstat hasn't gotten around to updating the last
1153 * autovacuum time yet.
1156 elem
= DatabaseList
? DLGetTail(DatabaseList
) : NULL
;
1158 while (elem
!= NULL
)
1160 avl_dbase
*dbp
= DLE_VAL(elem
);
1162 if (dbp
->adl_datid
== tmp
->adw_datid
)
1165 * Skip this database if its next_worker value falls between
1166 * the current time and the current time plus naptime.
1168 if (!TimestampDifferenceExceeds(dbp
->adl_next_worker
,
1170 !TimestampDifferenceExceeds(current_time
,
1171 dbp
->adl_next_worker
,
1172 autovacuum_naptime
* 1000))
1177 elem
= DLGetPred(elem
);
1183 * Remember the db with oldest autovac time. (If we are here, both
1184 * tmp->entry and db->entry must be non-null.)
1187 tmp
->adw_entry
->last_autovac_time
< avdb
->adw_entry
->last_autovac_time
)
1191 /* Found a database -- process it */
1195 SHMEM_OFFSET sworker
;
1197 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
1200 * Get a worker entry from the freelist. We checked above, so there
1201 * really should be a free slot -- complain very loudly if there
1204 sworker
= AutoVacuumShmem
->av_freeWorkers
;
1205 if (sworker
== INVALID_OFFSET
)
1206 elog(FATAL
, "no free worker found");
1208 worker
= (WorkerInfo
) MAKE_PTR(sworker
);
1209 AutoVacuumShmem
->av_freeWorkers
= worker
->wi_links
.next
;
1211 worker
->wi_dboid
= avdb
->adw_datid
;
1212 worker
->wi_proc
= NULL
;
1213 worker
->wi_launchtime
= GetCurrentTimestamp();
1215 AutoVacuumShmem
->av_startingWorker
= sworker
;
1217 LWLockRelease(AutovacuumLock
);
1219 SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER
);
1221 retval
= avdb
->adw_datid
;
1226 * If we skipped all databases on the list, rebuild it, because it
1227 * probably contains a dropped database.
1229 rebuild_database_list(InvalidOid
);
1232 MemoryContextSwitchTo(oldcxt
);
1233 MemoryContextDelete(tmpcxt
);
1241 * Wrapper for starting a worker from the launcher. Besides actually starting
1242 * it, update the database list to reflect the next time that another one will
1243 * need to be started on the selected database. The actual database choice is
1244 * left to do_start_worker.
1246 * This routine is also expected to insert an entry into the database list if
1247 * the selected database was previously absent from the list.
1250 launch_worker(TimestampTz now
)
1255 dbid
= do_start_worker();
1256 if (OidIsValid(dbid
))
1259 * Walk the database list and update the corresponding entry. If the
1260 * database is not on the list, we'll recreate the list.
1262 elem
= (DatabaseList
== NULL
) ? NULL
: DLGetHead(DatabaseList
);
1263 while (elem
!= NULL
)
1265 avl_dbase
*avdb
= DLE_VAL(elem
);
1267 if (avdb
->adl_datid
== dbid
)
1270 * add autovacuum_naptime seconds to the current time, and use
1271 * that as the new "next_worker" field for this database.
1273 avdb
->adl_next_worker
=
1274 TimestampTzPlusMilliseconds(now
, autovacuum_naptime
* 1000);
1276 DLMoveToFront(elem
);
1279 elem
= DLGetSucc(elem
);
1283 * If the database was not present in the database list, we rebuild
1284 * the list. It's possible that the database does not get into the
1285 * list anyway, for example if it's a database that doesn't have a
1286 * pgstat entry, but this is not a problem because we don't want to
1287 * schedule workers regularly into those in any case.
1290 rebuild_database_list(dbid
);
1295 * Called from postmaster to signal a failure to fork a process to become
1296 * worker. The postmaster should kill(SIGUSR1) the launcher shortly
1297 * after calling this function.
1300 AutoVacWorkerFailed(void)
1302 AutoVacuumShmem
->av_signal
[AutoVacForkFailed
] = true;
1305 /* SIGHUP: set flag to re-read config file at next convenient time */
1307 avl_sighup_handler(SIGNAL_ARGS
)
1312 /* SIGUSR1: a worker is up and running, or just finished */
1314 avl_sigusr1_handler(SIGNAL_ARGS
)
1319 /* SIGTERM: time to die */
1321 avl_sigterm_handler(SIGNAL_ARGS
)
1327 * avl_quickdie occurs when signalled SIGQUIT from postmaster.
1329 * Some backend has bought the farm, so we need to stop what we're doing
1333 avl_quickdie(SIGNAL_ARGS
)
1335 PG_SETMASK(&BlockSig
);
1338 * DO NOT proc_exit() -- we're here because shared memory may be
1339 * corrupted, so we don't want to try to clean up our transaction. Just
1340 * nail the windows shut and get out of town.
1342 * Note we do exit(2) not exit(0). This is to force the postmaster into a
1343 * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
1344 * backend. This is necessary precisely because we don't clean up our
1345 * shared memory state.
1351 /********************************************************************
1352 * AUTOVACUUM WORKER CODE
1353 ********************************************************************/
1357 * forkexec routines for the autovacuum worker.
1359 * Format up the arglist, then fork and exec.
1362 avworker_forkexec(void)
1367 av
[ac
++] = "postgres";
1368 av
[ac
++] = "--forkavworker";
1369 av
[ac
++] = NULL
; /* filled in by postmaster_forkexec */
1372 Assert(ac
< lengthof(av
));
1374 return postmaster_forkexec(ac
, av
);
1378 * We need this set from the outside, before InitProcess is called
1381 AutovacuumWorkerIAm(void)
1383 am_autovacuum_worker
= true;
1388 * Main entry point for autovacuum worker process.
1390 * This code is heavily based on pgarch.c, q.v.
1393 StartAutoVacWorker(void)
1398 switch ((worker_pid
= avworker_forkexec()))
1400 switch ((worker_pid
= fork_process()))
1405 (errmsg("could not fork autovacuum worker process: %m")));
1408 #ifndef EXEC_BACKEND
1410 /* in postmaster child ... */
1411 /* Close the postmaster's sockets */
1412 ClosePostmasterPorts(false);
1414 /* Lose the postmaster's on-exit routines */
1417 AutoVacWorkerMain(0, NULL
);
1421 return (int) worker_pid
;
1424 /* shouldn't get here */
1431 NON_EXEC_STATIC
void
1432 AutoVacWorkerMain(int argc
, char *argv
[])
1434 sigjmp_buf local_sigjmp_buf
;
1437 /* we are a postmaster subprocess now */
1438 IsUnderPostmaster
= true;
1439 am_autovacuum_worker
= true;
1441 /* reset MyProcPid */
1442 MyProcPid
= getpid();
1444 /* record Start Time for logging */
1445 MyStartTime
= time(NULL
);
1447 /* Identify myself via ps */
1448 init_ps_display("autovacuum worker process", "", "", "");
1450 SetProcessingMode(InitProcessing
);
1453 * If possible, make this process a group leader, so that the postmaster
1454 * can signal any child processes too. (autovacuum probably never has any
1455 * child processes, but for consistency we make all postmaster child
1456 * processes do this.)
1460 elog(FATAL
, "setsid() failed: %m");
1464 * Set up signal handlers. We operate on databases much like a regular
1465 * backend, so we use the same signal handling. See equivalent code in
1468 * Currently, we don't pay attention to postgresql.conf changes that
1469 * happen during a single daemon iteration, so we can ignore SIGHUP.
1471 pqsignal(SIGHUP
, SIG_IGN
);
1474 * SIGINT is used to signal cancelling the current table's vacuum; SIGTERM
1475 * means abort and exit cleanly, and SIGQUIT means abandon ship.
1477 pqsignal(SIGINT
, StatementCancelHandler
);
1478 pqsignal(SIGTERM
, die
);
1479 pqsignal(SIGQUIT
, quickdie
);
1480 pqsignal(SIGALRM
, handle_sig_alarm
);
1482 pqsignal(SIGPIPE
, SIG_IGN
);
1483 pqsignal(SIGUSR1
, CatchupInterruptHandler
);
1484 /* We don't listen for async notifies */
1485 pqsignal(SIGUSR2
, SIG_IGN
);
1486 pqsignal(SIGFPE
, FloatExceptionHandler
);
1487 pqsignal(SIGCHLD
, SIG_DFL
);
1489 /* Early initialization */
1493 * Create a per-backend PGPROC struct in shared memory, except in the
1494 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do
1495 * this before we can use LWLocks (and in the EXEC_BACKEND case we already
1496 * had to do some stuff with LWLocks).
1498 #ifndef EXEC_BACKEND
1503 * If an exception is encountered, processing resumes here.
1505 * See notes in postgres.c about the design of this coding.
1507 if (sigsetjmp(local_sigjmp_buf
, 1) != 0)
1509 /* Prevents interrupts while cleaning up */
1512 /* Report the error to the server log */
1516 * We can now go away. Note that because we called InitProcess, a
1517 * callback was registered to do ProcKill, which will clean up
1523 /* We can now handle ereport(ERROR) */
1524 PG_exception_stack
= &local_sigjmp_buf
;
1526 PG_SETMASK(&UnBlockSig
);
1529 * Force zero_damaged_pages OFF in the autovac process, even if it is set
1530 * in postgresql.conf. We don't really want such a dangerous option being
1531 * applied non-interactively.
1533 SetConfigOption("zero_damaged_pages", "false", PGC_SUSET
, PGC_S_OVERRIDE
);
1536 * Force statement_timeout to zero to avoid a timeout setting from
1537 * preventing regular maintenance from being executed.
1539 SetConfigOption("statement_timeout", "0", PGC_SUSET
, PGC_S_OVERRIDE
);
1542 * Get the info about the database we're going to work on.
1544 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
1547 * beware of startingWorker being INVALID; this should normally not
1548 * happen, but if a worker fails after forking and before this, the
1549 * launcher might have decided to remove it from the queue and start
1552 if (AutoVacuumShmem
->av_startingWorker
!= INVALID_OFFSET
)
1554 MyWorkerInfo
= (WorkerInfo
) MAKE_PTR(AutoVacuumShmem
->av_startingWorker
);
1555 dbid
= MyWorkerInfo
->wi_dboid
;
1556 MyWorkerInfo
->wi_proc
= MyProc
;
1558 /* insert into the running list */
1559 SHMQueueInsertBefore(&AutoVacuumShmem
->av_runningWorkers
,
1560 &MyWorkerInfo
->wi_links
);
1563 * remove from the "starting" pointer, so that the launcher can start
1564 * a new worker if required
1566 AutoVacuumShmem
->av_startingWorker
= INVALID_OFFSET
;
1567 LWLockRelease(AutovacuumLock
);
1569 on_shmem_exit(FreeWorkerInfo
, 0);
1571 /* wake up the launcher */
1572 if (AutoVacuumShmem
->av_launcherpid
!= 0)
1573 kill(AutoVacuumShmem
->av_launcherpid
, SIGUSR1
);
1577 /* no worker entry for me, go away */
1578 elog(WARNING
, "autovacuum worker started without a worker entry");
1580 LWLockRelease(AutovacuumLock
);
1583 if (OidIsValid(dbid
))
1588 * Report autovac startup to the stats collector. We deliberately do
1589 * this before InitPostgres, so that the last_autovac_time will get
1590 * updated even if the connection attempt fails. This is to prevent
1591 * autovac from getting "stuck" repeatedly selecting an unopenable
1592 * database, rather than making any progress on stuff it can connect
1595 pgstat_report_autovac(dbid
);
1598 * Connect to the selected database
1600 * Note: if we have selected a just-deleted database (due to using
1601 * stale stats info), we'll fail and exit here.
1603 InitPostgres(NULL
, dbid
, NULL
, &dbname
);
1604 SetProcessingMode(NormalProcessing
);
1605 set_ps_display(dbname
, false);
1607 (errmsg("autovacuum: processing database \"%s\"", dbname
)));
1610 pg_usleep(PostAuthDelay
* 1000000L);
1612 /* And do an appropriate amount of work */
1613 recentXid
= ReadNewTransactionId();
1618 * The launcher will be notified of my death in ProcKill, *if* we managed
1619 * to get a worker slot at all
1622 /* All done, go away */
1627 * Return a WorkerInfo to the free list
1630 FreeWorkerInfo(int code
, Datum arg
)
1632 if (MyWorkerInfo
!= NULL
)
1634 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
1637 * Wake the launcher up so that he can launch a new worker immediately
1638 * if required. We only save the launcher's PID in local memory here;
1639 * the actual signal will be sent when the PGPROC is recycled. Note
1640 * that we always do this, so that the launcher can rebalance the cost
1641 * limit setting of the remaining workers.
1643 * We somewhat ignore the risk that the launcher changes its PID
1644 * between we reading it and the actual kill; we expect ProcKill to be
1645 * called shortly after us, and we assume that PIDs are not reused too
1646 * quickly after a process exits.
1648 AutovacuumLauncherPid
= AutoVacuumShmem
->av_launcherpid
;
1650 SHMQueueDelete(&MyWorkerInfo
->wi_links
);
1651 MyWorkerInfo
->wi_links
.next
= AutoVacuumShmem
->av_freeWorkers
;
1652 MyWorkerInfo
->wi_dboid
= InvalidOid
;
1653 MyWorkerInfo
->wi_tableoid
= InvalidOid
;
1654 MyWorkerInfo
->wi_proc
= NULL
;
1655 MyWorkerInfo
->wi_launchtime
= 0;
1656 MyWorkerInfo
->wi_cost_delay
= 0;
1657 MyWorkerInfo
->wi_cost_limit
= 0;
1658 MyWorkerInfo
->wi_cost_limit_base
= 0;
1659 AutoVacuumShmem
->av_freeWorkers
= MAKE_OFFSET(MyWorkerInfo
);
1660 /* not mine anymore */
1661 MyWorkerInfo
= NULL
;
1664 * now that we're inactive, cause a rebalancing of the surviving
1667 AutoVacuumShmem
->av_signal
[AutoVacRebalance
] = true;
1668 LWLockRelease(AutovacuumLock
);
1673 * Update the cost-based delay parameters, so that multiple workers consume
1674 * each a fraction of the total available I/O.
1677 AutoVacuumUpdateDelay(void)
1681 VacuumCostDelay
= MyWorkerInfo
->wi_cost_delay
;
1682 VacuumCostLimit
= MyWorkerInfo
->wi_cost_limit
;
1687 * autovac_balance_cost
1688 * Recalculate the cost limit setting for each active workers.
1690 * Caller must hold the AutovacuumLock in exclusive mode.
1693 autovac_balance_cost(void)
1698 * note: in cost_limit, zero also means use value from elsewhere, because
1699 * zero is not a valid value.
1701 int vac_cost_limit
= (autovacuum_vac_cost_limit
> 0 ?
1702 autovacuum_vac_cost_limit
: VacuumCostLimit
);
1703 int vac_cost_delay
= (autovacuum_vac_cost_delay
>= 0 ?
1704 autovacuum_vac_cost_delay
: VacuumCostDelay
);
1708 /* not set? nothing to do */
1709 if (vac_cost_limit
<= 0 || vac_cost_delay
<= 0)
1712 /* caculate the total base cost limit of active workers */
1714 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1715 &AutoVacuumShmem
->av_runningWorkers
,
1716 offsetof(WorkerInfoData
, wi_links
));
1719 if (worker
->wi_proc
!= NULL
&&
1720 worker
->wi_cost_limit_base
> 0 && worker
->wi_cost_delay
> 0)
1722 (double) worker
->wi_cost_limit_base
/ worker
->wi_cost_delay
;
1724 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1726 offsetof(WorkerInfoData
, wi_links
));
1728 /* there are no cost limits -- nothing to do */
1729 if (cost_total
<= 0)
1733 * Adjust each cost limit of active workers to balance the total of cost
1734 * limit to autovacuum_vacuum_cost_limit.
1736 cost_avail
= (double) vac_cost_limit
/ vac_cost_delay
;
1737 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1738 &AutoVacuumShmem
->av_runningWorkers
,
1739 offsetof(WorkerInfoData
, wi_links
));
1742 if (worker
->wi_proc
!= NULL
&&
1743 worker
->wi_cost_limit_base
> 0 && worker
->wi_cost_delay
> 0)
1746 (cost_avail
* worker
->wi_cost_limit_base
/ cost_total
);
1749 * We put a lower bound of 1 to the cost_limit, to avoid division-
1750 * by-zero in the vacuum code.
1752 worker
->wi_cost_limit
= Max(Min(limit
, worker
->wi_cost_limit_base
), 1);
1754 elog(DEBUG2
, "autovac_balance_cost(pid=%u db=%u, rel=%u, cost_limit=%d, cost_delay=%d)",
1755 worker
->wi_proc
->pid
, worker
->wi_dboid
,
1756 worker
->wi_tableoid
, worker
->wi_cost_limit
, worker
->wi_cost_delay
);
1759 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
1761 offsetof(WorkerInfoData
, wi_links
));
1768 * Return a list of all databases. Note we cannot use pg_database,
1769 * because we aren't connected; we use the flat database file.
1772 get_database_list(void)
1776 char thisname
[NAMEDATALEN
];
1780 TransactionId db_frozenxid
;
1782 filename
= database_getflatfilename();
1783 db_file
= AllocateFile(filename
, "r");
1784 if (db_file
== NULL
)
1786 (errcode_for_file_access(),
1787 errmsg("could not open file \"%s\": %m", filename
)));
1789 while (read_pg_database_line(db_file
, thisname
, &db_id
,
1790 &db_tablespace
, &db_frozenxid
))
1794 avdb
= (avw_dbase
*) palloc(sizeof(avw_dbase
));
1796 avdb
->adw_datid
= db_id
;
1797 avdb
->adw_name
= pstrdup(thisname
);
1798 avdb
->adw_frozenxid
= db_frozenxid
;
1799 /* this gets set later: */
1800 avdb
->adw_entry
= NULL
;
1802 dblist
= lappend(dblist
, avdb
);
1812 * Process a database table-by-table
1814 * Note that CHECK_FOR_INTERRUPTS is supposed to be used in certain spots in
1815 * order not to ignore shutdown commands for too long.
1823 HeapScanDesc relScan
;
1824 Form_pg_database dbForm
;
1825 List
*table_oids
= NIL
;
1827 HTAB
*table_toast_map
;
1828 ListCell
*volatile cell
;
1829 PgStat_StatDBEntry
*shared
;
1830 PgStat_StatDBEntry
*dbentry
;
1831 BufferAccessStrategy bstrategy
;
1835 * StartTransactionCommand and CommitTransactionCommand will automatically
1836 * switch to other contexts. We need this one to keep the list of
1837 * relations to vacuum/analyze across transactions.
1839 AutovacMemCxt
= AllocSetContextCreate(TopMemoryContext
,
1841 ALLOCSET_DEFAULT_MINSIZE
,
1842 ALLOCSET_DEFAULT_INITSIZE
,
1843 ALLOCSET_DEFAULT_MAXSIZE
);
1844 MemoryContextSwitchTo(AutovacMemCxt
);
1847 * may be NULL if we couldn't find an entry (only happens if we are
1848 * forcing a vacuum for anti-wrap purposes).
1850 dbentry
= pgstat_fetch_stat_dbentry(MyDatabaseId
);
1852 /* Start a transaction so our commands have one to play into. */
1853 StartTransactionCommand();
1856 * Clean up any dead statistics collector entries for this DB. We always
1857 * want to do this exactly once per DB-processing cycle, even if we find
1858 * nothing worth vacuuming in the database.
1860 pgstat_vacuum_stat();
1863 * Find the pg_database entry and select the default freeze_min_age. We
1864 * use zero in template and nonconnectable databases, else the system-wide
1867 tuple
= SearchSysCache(DATABASEOID
,
1868 ObjectIdGetDatum(MyDatabaseId
),
1870 if (!HeapTupleIsValid(tuple
))
1871 elog(ERROR
, "cache lookup failed for database %u", MyDatabaseId
);
1872 dbForm
= (Form_pg_database
) GETSTRUCT(tuple
);
1874 if (dbForm
->datistemplate
|| !dbForm
->datallowconn
)
1875 default_freeze_min_age
= 0;
1877 default_freeze_min_age
= vacuum_freeze_min_age
;
1879 ReleaseSysCache(tuple
);
1881 /* StartTransactionCommand changed elsewhere */
1882 MemoryContextSwitchTo(AutovacMemCxt
);
1884 /* The database hash where pgstat keeps shared relations */
1885 shared
= pgstat_fetch_stat_dbentry(InvalidOid
);
1887 classRel
= heap_open(RelationRelationId
, AccessShareLock
);
1888 avRel
= heap_open(AutovacuumRelationId
, AccessShareLock
);
1890 /* create hash table for toast <-> main relid mapping */
1891 MemSet(&ctl
, 0, sizeof(ctl
));
1892 ctl
.keysize
= sizeof(Oid
);
1893 ctl
.entrysize
= sizeof(Oid
) * 2;
1894 ctl
.hash
= oid_hash
;
1896 table_toast_map
= hash_create("TOAST to main relid map",
1899 HASH_ELEM
| HASH_FUNCTION
);
1902 * Scan pg_class to determine which tables to vacuum.
1904 * We do this in two passes: on the first one we collect the list of
1905 * plain relations, and on the second one we collect TOAST tables.
1906 * The reason for doing the second pass is that during it we want to use
1907 * the main relation's pg_autovacuum entry if the TOAST table does not have
1908 * any, and we cannot obtain it unless we know beforehand what's the main
1911 * We need to check TOAST tables separately because in cases with short,
1912 * wide tables there might be proportionally much more activity in the
1913 * TOAST table than in its parent.
1916 Anum_pg_class_relkind
,
1917 BTEqualStrategyNumber
, F_CHAREQ
,
1918 CharGetDatum(RELKIND_RELATION
));
1920 relScan
= heap_beginscan(classRel
, SnapshotNow
, 1, &key
);
1923 * On the first pass, we collect main tables to vacuum, and also the
1924 * main table relid to TOAST relid mapping.
1926 while ((tuple
= heap_getnext(relScan
, ForwardScanDirection
)) != NULL
)
1928 Form_pg_class classForm
= (Form_pg_class
) GETSTRUCT(tuple
);
1929 Form_pg_autovacuum avForm
= NULL
;
1930 PgStat_StatTabEntry
*tabentry
;
1938 relid
= HeapTupleGetOid(tuple
);
1940 /* Fetch the pg_autovacuum tuple for the relation, if any */
1941 avTup
= get_pg_autovacuum_tuple_relid(avRel
, relid
, NULL
);
1942 if (HeapTupleIsValid(avTup
))
1943 avForm
= (Form_pg_autovacuum
) GETSTRUCT(avTup
);
1945 /* Fetch the pgstat entry for this table */
1946 tabentry
= get_pgstat_tabentry_relid(relid
, classForm
->relisshared
,
1949 /* Check if it needs vacuum or analyze */
1950 relation_needs_vacanalyze(relid
, avForm
, classForm
, tabentry
,
1951 &dovacuum
, &doanalyze
, &wraparound
);
1954 * Check if it is a temp table (presumably, of some other backend's).
1955 * We cannot safely process other backends' temp tables.
1957 backendID
= GetTempNamespaceBackendId(classForm
->relnamespace
);
1961 /* We just ignore it if the owning backend is still active */
1962 if (backendID
== MyBackendId
|| !BackendIdIsActive(backendID
))
1965 * We found an orphan temp table (which was probably left
1966 * behind by a crashed backend). If it's so old as to need
1967 * vacuum for wraparound, forcibly drop it. Otherwise just
1972 ObjectAddress object
;
1975 (errmsg("autovacuum: dropping orphan temp table \"%s\".\"%s\" in database \"%s\"",
1976 get_namespace_name(classForm
->relnamespace
),
1977 NameStr(classForm
->relname
),
1978 get_database_name(MyDatabaseId
))));
1979 object
.classId
= RelationRelationId
;
1980 object
.objectId
= relid
;
1981 object
.objectSubId
= 0;
1982 performDeletion(&object
, DROP_CASCADE
);
1987 (errmsg("autovacuum: found orphan temp table \"%s\".\"%s\" in database \"%s\"",
1988 get_namespace_name(classForm
->relnamespace
),
1989 NameStr(classForm
->relname
),
1990 get_database_name(MyDatabaseId
))));
1996 /* Plain relations that need work are added to table_oids */
1997 if (dovacuum
|| doanalyze
)
1998 table_oids
= lappend_oid(table_oids
, relid
);
2001 * Remember the association for the second pass. Note: we must do
2002 * this even if the table is going to be vacuumed, because we
2003 * don't automatically vacuum toast tables along the parent table.
2005 if (OidIsValid(classForm
->reltoastrelid
))
2007 av_relation
*hentry
;
2010 hentry
= hash_search(table_toast_map
,
2011 &classForm
->reltoastrelid
,
2012 HASH_ENTER
, &found
);
2016 /* hash_search already filled in the key */
2017 hentry
->ar_relid
= relid
;
2022 if (HeapTupleIsValid(avTup
))
2023 heap_freetuple(avTup
);
2026 heap_endscan(relScan
);
2028 /* second pass: check TOAST tables */
2030 Anum_pg_class_relkind
,
2031 BTEqualStrategyNumber
, F_CHAREQ
,
2032 CharGetDatum(RELKIND_TOASTVALUE
));
2034 relScan
= heap_beginscan(classRel
, SnapshotNow
, 1, &key
);
2035 while ((tuple
= heap_getnext(relScan
, ForwardScanDirection
)) != NULL
)
2037 Form_pg_class classForm
= (Form_pg_class
) GETSTRUCT(tuple
);
2038 Form_pg_autovacuum avForm
= NULL
;
2039 PgStat_StatTabEntry
*tabentry
;
2047 * Skip temp tables (i.e. those in temp namespaces). We cannot safely
2048 * process other backends' temp tables.
2050 if (isAnyTempNamespace(classForm
->relnamespace
))
2053 relid
= HeapTupleGetOid(tuple
);
2055 /* Fetch the pg_autovacuum tuple for this rel */
2056 avTup
= get_pg_autovacuum_tuple_relid(avRel
, relid
, table_toast_map
);
2058 if (HeapTupleIsValid(avTup
))
2059 avForm
= (Form_pg_autovacuum
) GETSTRUCT(avTup
);
2061 /* Fetch the pgstat entry for this table */
2062 tabentry
= get_pgstat_tabentry_relid(relid
, classForm
->relisshared
,
2065 relation_needs_vacanalyze(relid
, avForm
, classForm
, tabentry
,
2066 &dovacuum
, &doanalyze
, &wraparound
);
2068 /* ignore analyze for toast tables */
2070 table_oids
= lappend_oid(table_oids
, relid
);
2073 heap_endscan(relScan
);
2074 heap_close(avRel
, AccessShareLock
);
2075 heap_close(classRel
, AccessShareLock
);
2078 * Create a buffer access strategy object for VACUUM to use. We want to
2079 * use the same one across all the vacuum operations we perform, since the
2080 * point is for VACUUM not to blow out the shared cache.
2082 bstrategy
= GetAccessStrategy(BAS_VACUUM
);
2085 * create a memory context to act as fake PortalContext, so that the
2086 * contexts created in the vacuum code are cleaned up for each table.
2088 PortalContext
= AllocSetContextCreate(AutovacMemCxt
,
2089 "Autovacuum Portal",
2090 ALLOCSET_DEFAULT_INITSIZE
,
2091 ALLOCSET_DEFAULT_MINSIZE
,
2092 ALLOCSET_DEFAULT_MAXSIZE
);
2095 * Perform operations on collected tables.
2097 foreach(cell
, table_oids
)
2099 Oid relid
= lfirst_oid(cell
);
2104 CHECK_FOR_INTERRUPTS();
2107 * hold schedule lock from here until we're sure that this table still
2108 * needs vacuuming. We also need the AutovacuumLock to walk the
2109 * worker array, but we'll let go of that one quickly.
2111 LWLockAcquire(AutovacuumScheduleLock
, LW_EXCLUSIVE
);
2112 LWLockAcquire(AutovacuumLock
, LW_SHARED
);
2115 * Check whether the table is being vacuumed concurrently by another
2119 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
2120 &AutoVacuumShmem
->av_runningWorkers
,
2121 offsetof(WorkerInfoData
, wi_links
));
2125 if (worker
== MyWorkerInfo
)
2128 /* ignore workers in other databases */
2129 if (worker
->wi_dboid
!= MyDatabaseId
)
2132 if (worker
->wi_tableoid
== relid
)
2139 worker
= (WorkerInfo
) SHMQueueNext(&AutoVacuumShmem
->av_runningWorkers
,
2141 offsetof(WorkerInfoData
, wi_links
));
2143 LWLockRelease(AutovacuumLock
);
2146 LWLockRelease(AutovacuumScheduleLock
);
2151 * Check whether pgstat data still says we need to vacuum this table.
2152 * It could have changed if something else processed the table while
2153 * we weren't looking.
2155 * FIXME we ignore the possibility that the table was finished being
2156 * vacuumed in the last 500ms (PGSTAT_STAT_INTERVAL). This is a bug.
2158 MemoryContextSwitchTo(AutovacMemCxt
);
2159 tab
= table_recheck_autovac(relid
, table_toast_map
);
2162 /* someone else vacuumed the table */
2163 LWLockRelease(AutovacuumScheduleLock
);
2168 * Ok, good to go. Store the table in shared memory before releasing
2169 * the lock so that other workers don't vacuum it concurrently.
2171 MyWorkerInfo
->wi_tableoid
= relid
;
2172 LWLockRelease(AutovacuumScheduleLock
);
2174 /* Set the initial vacuum cost parameters for this table */
2175 VacuumCostDelay
= tab
->at_vacuum_cost_delay
;
2176 VacuumCostLimit
= tab
->at_vacuum_cost_limit
;
2178 /* Last fixups before actually starting to work */
2179 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
2181 /* advertise my cost delay parameters for the balancing algorithm */
2182 MyWorkerInfo
->wi_cost_delay
= tab
->at_vacuum_cost_delay
;
2183 MyWorkerInfo
->wi_cost_limit
= tab
->at_vacuum_cost_limit
;
2184 MyWorkerInfo
->wi_cost_limit_base
= tab
->at_vacuum_cost_limit
;
2187 autovac_balance_cost();
2190 LWLockRelease(AutovacuumLock
);
2192 /* clean up memory before each iteration */
2193 MemoryContextResetAndDeleteChildren(PortalContext
);
2196 * Save the relation name for a possible error message, to avoid a
2197 * catalog lookup in case of an error. If any of these return NULL,
2198 * then the relation has been dropped since last we checked; skip it.
2199 * Note: they must live in a long-lived memory context because we call
2200 * vacuum and analyze in different transactions.
2203 tab
->at_relname
= get_rel_name(tab
->at_relid
);
2204 tab
->at_nspname
= get_namespace_name(get_rel_namespace(tab
->at_relid
));
2205 tab
->at_datname
= get_database_name(MyDatabaseId
);
2206 if (!tab
->at_relname
|| !tab
->at_nspname
|| !tab
->at_datname
)
2210 * We will abort vacuuming the current table if something errors out,
2211 * and continue with the next one in schedule; in particular, this
2212 * happens if we are interrupted with SIGINT.
2217 MemoryContextSwitchTo(TopTransactionContext
);
2218 autovacuum_do_vac_analyze(tab
, bstrategy
);
2221 * Clear a possible query-cancel signal, to avoid a late reaction
2222 * to an automatically-sent signal because of vacuuming the
2223 * current table (we're done with it, so it would make no sense to
2224 * cancel at this point.)
2226 QueryCancelPending
= false;
2231 * Abort the transaction, start a new one, and proceed with the
2232 * next table in our list.
2235 if (tab
->at_dovacuum
)
2236 errcontext("automatic vacuum of table \"%s.%s.%s\"",
2237 tab
->at_datname
, tab
->at_nspname
, tab
->at_relname
);
2239 errcontext("automatic analyze of table \"%s.%s.%s\"",
2240 tab
->at_datname
, tab
->at_nspname
, tab
->at_relname
);
2243 /* this resets the PGPROC flags too */
2244 AbortOutOfAnyTransaction();
2246 MemoryContextResetAndDeleteChildren(PortalContext
);
2248 /* restart our transaction for the following operations */
2249 StartTransactionCommand();
2250 RESUME_INTERRUPTS();
2254 /* the PGPROC flags are reset at the next end of transaction */
2258 if (tab
->at_datname
!= NULL
)
2259 pfree(tab
->at_datname
);
2260 if (tab
->at_nspname
!= NULL
)
2261 pfree(tab
->at_nspname
);
2262 if (tab
->at_relname
!= NULL
)
2263 pfree(tab
->at_relname
);
2266 /* remove my info from shared memory */
2267 LWLockAcquire(AutovacuumLock
, LW_EXCLUSIVE
);
2268 MyWorkerInfo
->wi_tableoid
= InvalidOid
;
2269 LWLockRelease(AutovacuumLock
);
2273 * We leak table_toast_map here (among other things), but since we're going
2274 * away soon, it's not a problem.
2278 * Update pg_database.datfrozenxid, and truncate pg_clog if possible. We
2279 * only need to do this once, not after each table.
2281 vac_update_datfrozenxid();
2283 /* Finally close out the last transaction. */
2284 CommitTransactionCommand();
2288 * Returns a copy of the pg_autovacuum tuple for the given relid, or NULL if
2289 * there isn't any. avRel is pg_autovacuum, already open and suitably locked.
2291 * If table_toast_map is not null, use it to find an alternative OID with which
2292 * to search a pg_autovacuum entry, if the passed relid does not yield one
2296 get_pg_autovacuum_tuple_relid(Relation avRel
, Oid relid
,
2297 HTAB
*table_toast_map
)
2299 ScanKeyData entry
[1];
2303 ScanKeyInit(&entry
[0],
2304 Anum_pg_autovacuum_vacrelid
,
2305 BTEqualStrategyNumber
, F_OIDEQ
,
2306 ObjectIdGetDatum(relid
));
2308 avScan
= systable_beginscan(avRel
, AutovacuumRelidIndexId
, true,
2309 SnapshotNow
, 1, entry
);
2311 avTup
= systable_getnext(avScan
);
2313 if (HeapTupleIsValid(avTup
))
2314 avTup
= heap_copytuple(avTup
);
2316 systable_endscan(avScan
);
2318 if (!HeapTupleIsValid(avTup
) && table_toast_map
!= NULL
)
2320 av_relation
*hentry
;
2323 hentry
= hash_search(table_toast_map
, &relid
, HASH_FIND
, &found
);
2325 /* avoid second recursion */
2326 avTup
= get_pg_autovacuum_tuple_relid(avRel
, hentry
->ar_relid
,
2334 * get_pgstat_tabentry_relid
2336 * Fetch the pgstat entry of a table, either local to a database or shared.
2338 static PgStat_StatTabEntry
*
2339 get_pgstat_tabentry_relid(Oid relid
, bool isshared
, PgStat_StatDBEntry
*shared
,
2340 PgStat_StatDBEntry
*dbentry
)
2342 PgStat_StatTabEntry
*tabentry
= NULL
;
2346 if (PointerIsValid(shared
))
2347 tabentry
= hash_search(shared
->tables
, &relid
,
2350 else if (PointerIsValid(dbentry
))
2351 tabentry
= hash_search(dbentry
->tables
, &relid
,
2358 * table_recheck_autovac
2360 * Recheck whether a table still needs vacuum or analyze. Return value is a
2361 * valid autovac_table pointer if it does, NULL otherwise.
2363 * Note that the returned autovac_table does not have the name fields set.
2365 static autovac_table
*
2366 table_recheck_autovac(Oid relid
, HTAB
*table_toast_map
)
2368 Form_pg_autovacuum avForm
= NULL
;
2369 Form_pg_class classForm
;
2375 autovac_table
*tab
= NULL
;
2376 PgStat_StatTabEntry
*tabentry
;
2377 PgStat_StatDBEntry
*shared
;
2378 PgStat_StatDBEntry
*dbentry
;
2381 /* use fresh stats */
2382 autovac_refresh_stats();
2384 shared
= pgstat_fetch_stat_dbentry(InvalidOid
);
2385 dbentry
= pgstat_fetch_stat_dbentry(MyDatabaseId
);
2387 /* fetch the relation's relcache entry */
2388 classTup
= SearchSysCacheCopy(RELOID
,
2389 ObjectIdGetDatum(relid
),
2391 if (!HeapTupleIsValid(classTup
))
2393 classForm
= (Form_pg_class
) GETSTRUCT(classTup
);
2396 * Fetch the pg_autovacuum entry, if any. For a toast table, also try the
2397 * main rel's pg_autovacuum entry if there isn't one for the TOAST table
2400 avRel
= heap_open(AutovacuumRelationId
, AccessShareLock
);
2401 avTup
= get_pg_autovacuum_tuple_relid(avRel
, relid
,
2402 classForm
->relkind
== RELKIND_TOASTVALUE
? table_toast_map
: NULL
);
2404 if (HeapTupleIsValid(avTup
))
2405 avForm
= (Form_pg_autovacuum
) GETSTRUCT(avTup
);
2407 /* fetch the pgstat table entry */
2408 tabentry
= get_pgstat_tabentry_relid(relid
, classForm
->relisshared
,
2411 relation_needs_vacanalyze(relid
, avForm
, classForm
, tabentry
,
2412 &dovacuum
, &doanalyze
, &wraparound
);
2414 /* ignore ANALYZE for toast tables */
2415 if (classForm
->relkind
== RELKIND_TOASTVALUE
)
2418 /* OK, it needs something done */
2419 if (doanalyze
|| dovacuum
)
2426 * Calculate the vacuum cost parameters and the minimum freeze age. If
2427 * there is a tuple in pg_autovacuum, use it; else, use the GUC
2428 * defaults. Note that the fields may contain "-1" (or indeed any
2429 * negative value), which means use the GUC defaults for each setting.
2430 * In cost_limit, the value 0 also means to use the value from
2435 vac_cost_limit
= (avForm
->vac_cost_limit
> 0) ?
2436 avForm
->vac_cost_limit
:
2437 ((autovacuum_vac_cost_limit
> 0) ?
2438 autovacuum_vac_cost_limit
: VacuumCostLimit
);
2440 vac_cost_delay
= (avForm
->vac_cost_delay
>= 0) ?
2441 avForm
->vac_cost_delay
:
2442 ((autovacuum_vac_cost_delay
>= 0) ?
2443 autovacuum_vac_cost_delay
: VacuumCostDelay
);
2445 freeze_min_age
= (avForm
->freeze_min_age
>= 0) ?
2446 avForm
->freeze_min_age
: default_freeze_min_age
;
2450 vac_cost_limit
= (autovacuum_vac_cost_limit
> 0) ?
2451 autovacuum_vac_cost_limit
: VacuumCostLimit
;
2453 vac_cost_delay
= (autovacuum_vac_cost_delay
>= 0) ?
2454 autovacuum_vac_cost_delay
: VacuumCostDelay
;
2456 freeze_min_age
= default_freeze_min_age
;
2459 tab
= palloc(sizeof(autovac_table
));
2460 tab
->at_relid
= relid
;
2461 tab
->at_dovacuum
= dovacuum
;
2462 tab
->at_doanalyze
= doanalyze
;
2463 tab
->at_freeze_min_age
= freeze_min_age
;
2464 tab
->at_vacuum_cost_limit
= vac_cost_limit
;
2465 tab
->at_vacuum_cost_delay
= vac_cost_delay
;
2466 tab
->at_wraparound
= wraparound
;
2467 tab
->at_relname
= NULL
;
2468 tab
->at_nspname
= NULL
;
2469 tab
->at_datname
= NULL
;
2472 heap_close(avRel
, AccessShareLock
);
2473 if (HeapTupleIsValid(avTup
))
2474 heap_freetuple(avTup
);
2475 heap_freetuple(classTup
);
2481 * relation_needs_vacanalyze
2483 * Check whether a relation needs to be vacuumed or analyzed; return each into
2484 * "dovacuum" and "doanalyze", respectively. Also return whether the vacuum is
2485 * being forced because of Xid wraparound. avForm and tabentry can be NULL,
2486 * classForm shouldn't.
2488 * A table needs to be vacuumed if the number of dead tuples exceeds a
2489 * threshold. This threshold is calculated as
2491 * threshold = vac_base_thresh + vac_scale_factor * reltuples
2493 * For analyze, the analysis done is that the number of tuples inserted,
2494 * deleted and updated since the last analyze exceeds a threshold calculated
2495 * in the same fashion as above. Note that the collector actually stores
2496 * the number of tuples (both live and dead) that there were as of the last
2497 * analyze. This is asymmetric to the VACUUM case.
2499 * We also force vacuum if the table's relfrozenxid is more than freeze_max_age
2500 * transactions back.
2502 * A table whose pg_autovacuum.enabled value is false, is automatically
2503 * skipped (unless we have to vacuum it due to freeze_max_age). Thus
2504 * autovacuum can be disabled for specific tables. Also, when the stats
2505 * collector does not have data about a table, it will be skipped.
2507 * A table whose vac_base_thresh value is <0 takes the base value from the
2508 * autovacuum_vacuum_threshold GUC variable. Similarly, a vac_scale_factor
2509 * value <0 is substituted with the value of
2510 * autovacuum_vacuum_scale_factor GUC variable. Ditto for analyze.
2513 relation_needs_vacanalyze(Oid relid
,
2514 Form_pg_autovacuum avForm
,
2515 Form_pg_class classForm
,
2516 PgStat_StatTabEntry
*tabentry
,
2517 /* output params below */
2523 float4 reltuples
; /* pg_class.reltuples */
2525 /* constants from pg_autovacuum or GUC variables */
2526 int vac_base_thresh
,
2528 float4 vac_scale_factor
,
2531 /* thresholds calculated from above constants */
2535 /* number of vacuum (resp. analyze) tuples at this time */
2539 /* freeze parameters */
2541 TransactionId xidForceLimit
;
2543 AssertArg(classForm
!= NULL
);
2544 AssertArg(OidIsValid(relid
));
2547 * Determine vacuum/analyze equation parameters. If there is a tuple in
2548 * pg_autovacuum, use it; else, use the GUC defaults. Note that the
2549 * fields may contain "-1" (or indeed any negative value), which means use
2550 * the GUC defaults for each setting.
2554 vac_scale_factor
= (avForm
->vac_scale_factor
>= 0) ?
2555 avForm
->vac_scale_factor
: autovacuum_vac_scale
;
2556 vac_base_thresh
= (avForm
->vac_base_thresh
>= 0) ?
2557 avForm
->vac_base_thresh
: autovacuum_vac_thresh
;
2559 anl_scale_factor
= (avForm
->anl_scale_factor
>= 0) ?
2560 avForm
->anl_scale_factor
: autovacuum_anl_scale
;
2561 anl_base_thresh
= (avForm
->anl_base_thresh
>= 0) ?
2562 avForm
->anl_base_thresh
: autovacuum_anl_thresh
;
2564 freeze_max_age
= (avForm
->freeze_max_age
>= 0) ?
2565 Min(avForm
->freeze_max_age
, autovacuum_freeze_max_age
) :
2566 autovacuum_freeze_max_age
;
2570 vac_scale_factor
= autovacuum_vac_scale
;
2571 vac_base_thresh
= autovacuum_vac_thresh
;
2573 anl_scale_factor
= autovacuum_anl_scale
;
2574 anl_base_thresh
= autovacuum_anl_thresh
;
2576 freeze_max_age
= autovacuum_freeze_max_age
;
2579 /* Force vacuum if table is at risk of wraparound */
2580 xidForceLimit
= recentXid
- freeze_max_age
;
2581 if (xidForceLimit
< FirstNormalTransactionId
)
2582 xidForceLimit
-= FirstNormalTransactionId
;
2583 force_vacuum
= (TransactionIdIsNormal(classForm
->relfrozenxid
) &&
2584 TransactionIdPrecedes(classForm
->relfrozenxid
,
2586 *wraparound
= force_vacuum
;
2588 /* User disabled it in pg_autovacuum? (But ignore if at risk) */
2589 if (avForm
&& !avForm
->enabled
&& !force_vacuum
)
2596 if (PointerIsValid(tabentry
))
2598 reltuples
= classForm
->reltuples
;
2599 vactuples
= tabentry
->n_dead_tuples
;
2600 anltuples
= tabentry
->n_live_tuples
+ tabentry
->n_dead_tuples
-
2601 tabentry
->last_anl_tuples
;
2603 vacthresh
= (float4
) vac_base_thresh
+ vac_scale_factor
* reltuples
;
2604 anlthresh
= (float4
) anl_base_thresh
+ anl_scale_factor
* reltuples
;
2607 * Note that we don't need to take special consideration for stat
2608 * reset, because if that happens, the last vacuum and analyze counts
2609 * will be reset too.
2611 elog(DEBUG3
, "%s: vac: %.0f (threshold %.0f), anl: %.0f (threshold %.0f)",
2612 NameStr(classForm
->relname
),
2613 vactuples
, vacthresh
, anltuples
, anlthresh
);
2615 /* Determine if this table needs vacuum or analyze. */
2616 *dovacuum
= force_vacuum
|| (vactuples
> vacthresh
);
2617 *doanalyze
= (anltuples
> anlthresh
);
2622 * Skip a table not found in stat hash, unless we have to force vacuum
2623 * for anti-wrap purposes. If it's not acted upon, there's no need to
2626 *dovacuum
= force_vacuum
;
2630 /* ANALYZE refuses to work with pg_statistics */
2631 if (relid
== StatisticRelationId
)
2636 * autovacuum_do_vac_analyze
2637 * Vacuum and/or analyze the specified table
2640 autovacuum_do_vac_analyze(autovac_table
*tab
,
2641 BufferAccessStrategy bstrategy
)
2645 /* Set up command parameters --- use a local variable instead of palloc */
2646 MemSet(&vacstmt
, 0, sizeof(vacstmt
));
2648 vacstmt
.type
= T_VacuumStmt
;
2649 vacstmt
.vacuum
= tab
->at_dovacuum
;
2650 vacstmt
.full
= false;
2651 vacstmt
.analyze
= tab
->at_doanalyze
;
2652 vacstmt
.freeze_min_age
= tab
->at_freeze_min_age
;
2653 vacstmt
.verbose
= false;
2654 vacstmt
.relation
= NULL
; /* not used since we pass a relid */
2655 vacstmt
.va_cols
= NIL
;
2657 /* Let pgstat know what we're doing */
2658 autovac_report_activity(tab
);
2660 vacuum(&vacstmt
, tab
->at_relid
, false, bstrategy
, tab
->at_wraparound
, true);
2664 * autovac_report_activity
2665 * Report to pgstat what autovacuum is doing
2667 * We send a SQL string corresponding to what the user would see if the
2668 * equivalent command was to be issued manually.
2670 * Note we assume that we are going to report the next command as soon as we're
2671 * done with the current one, and exit right after the last one, so we don't
2672 * bother to report "<IDLE>" or some such.
2675 autovac_report_activity(autovac_table
*tab
)
2677 #define MAX_AUTOVAC_ACTIV_LEN (NAMEDATALEN * 2 + 56)
2678 char activity
[MAX_AUTOVAC_ACTIV_LEN
];
2681 /* Report the command and possible options */
2682 if (tab
->at_dovacuum
)
2683 snprintf(activity
, MAX_AUTOVAC_ACTIV_LEN
,
2684 "autovacuum: VACUUM%s",
2685 tab
->at_doanalyze
? " ANALYZE" : "");
2687 snprintf(activity
, MAX_AUTOVAC_ACTIV_LEN
,
2688 "autovacuum: ANALYZE");
2691 * Report the qualified name of the relation.
2693 len
= strlen(activity
);
2695 snprintf(activity
+ len
, MAX_AUTOVAC_ACTIV_LEN
- len
,
2696 " %s.%s%s", tab
->at_nspname
, tab
->at_relname
,
2697 tab
->at_wraparound
? " (to prevent wraparound)" : "");
2699 /* Set statement_timestamp() to current time for pg_stat_activity */
2700 SetCurrentStatementStartTimestamp();
2702 pgstat_report_activity(activity
);
2706 * AutoVacuumingActive
2707 * Check GUC vars and report whether the autovacuum process should be
2711 AutoVacuumingActive(void)
2713 if (!autovacuum_start_daemon
|| !pgstat_track_counts
)
2720 * This is called at postmaster initialization.
2722 * All we do here is annoy the user if he got it wrong.
2727 if (autovacuum_start_daemon
&& !pgstat_track_counts
)
2729 (errmsg("autovacuum not started because of misconfiguration"),
2730 errhint("Enable the \"track_counts\" option.")));
2734 * IsAutoVacuum functions
2735 * Return whether this is either a launcher autovacuum process or a worker
2739 IsAutoVacuumLauncherProcess(void)
2741 return am_autovacuum_launcher
;
2745 IsAutoVacuumWorkerProcess(void)
2747 return am_autovacuum_worker
;
2752 * AutoVacuumShmemSize
2753 * Compute space needed for autovacuum-related shared memory
2756 AutoVacuumShmemSize(void)
2761 * Need the fixed struct and the array of WorkerInfoData.
2763 size
= sizeof(AutoVacuumShmemStruct
);
2764 size
= MAXALIGN(size
);
2765 size
= add_size(size
, mul_size(autovacuum_max_workers
,
2766 sizeof(WorkerInfoData
)));
2771 * AutoVacuumShmemInit
2772 * Allocate and initialize autovacuum-related shared memory
2775 AutoVacuumShmemInit(void)
2779 AutoVacuumShmem
= (AutoVacuumShmemStruct
*)
2780 ShmemInitStruct("AutoVacuum Data",
2781 AutoVacuumShmemSize(),
2783 if (AutoVacuumShmem
== NULL
)
2785 (errcode(ERRCODE_OUT_OF_MEMORY
),
2786 errmsg("not enough shared memory for autovacuum")));
2788 if (!IsUnderPostmaster
)
2795 AutoVacuumShmem
->av_launcherpid
= 0;
2796 AutoVacuumShmem
->av_freeWorkers
= INVALID_OFFSET
;
2797 SHMQueueInit(&AutoVacuumShmem
->av_runningWorkers
);
2798 AutoVacuumShmem
->av_startingWorker
= INVALID_OFFSET
;
2800 worker
= (WorkerInfo
) ((char *) AutoVacuumShmem
+
2801 MAXALIGN(sizeof(AutoVacuumShmemStruct
)));
2803 /* initialize the WorkerInfo free list */
2804 for (i
= 0; i
< autovacuum_max_workers
; i
++)
2806 worker
[i
].wi_links
.next
= AutoVacuumShmem
->av_freeWorkers
;
2807 AutoVacuumShmem
->av_freeWorkers
= MAKE_OFFSET(&worker
[i
]);
2815 * autovac_refresh_stats
2816 * Refresh pgstats data for an autovacuum process
2818 * Cause the next pgstats read operation to obtain fresh data, but throttle
2819 * such refreshing in the autovacuum launcher. This is mostly to avoid
2820 * rereading the pgstats files too many times in quick succession when there
2821 * are many databases.
2823 * Note: we avoid throttling in the autovac worker, as it would be
2824 * counterproductive in the recheck logic.
2827 autovac_refresh_stats(void)
2829 if (IsAutoVacuumLauncherProcess())
2831 static TimestampTz last_read
= 0;
2832 TimestampTz current_time
;
2834 current_time
= GetCurrentTimestamp();
2836 if (!TimestampDifferenceExceeds(last_read
, current_time
,
2840 last_read
= current_time
;
2843 pgstat_clear_snapshot();