1 /*-------------------------------------------------------------------------
5 * PostgreSQL WAL archiver
7 * All functions relating to archiver are included here
9 * - All functions executed by archiver process
11 * - archiver is forked from postmaster, and the two
12 * processes then communicate using signals. All functions
13 * executed by postmaster are included in this file.
15 * Initial author: Simon Riggs simon@2ndquadrant.com
17 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
24 *-------------------------------------------------------------------------
35 #include "access/xlog_internal.h"
36 #include "libpq/pqsignal.h"
37 #include "miscadmin.h"
38 #include "postmaster/fork_process.h"
39 #include "postmaster/pgarch.h"
40 #include "postmaster/postmaster.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/pg_shmem.h"
44 #include "storage/pmsignal.h"
45 #include "utils/guc.h"
46 #include "utils/ps_status.h"
53 #define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
54 * archive status directory; in
56 #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
57 * failed archiver; in seconds. */
60 * Archiver control info.
62 * We expect that archivable files within pg_xlog will have names between
63 * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
64 * appearing in VALID_XFN_CHARS. The status files in archive_status have
65 * corresponding names with ".ready" or ".done" appended.
68 #define MIN_XFN_CHARS 16
69 #define MAX_XFN_CHARS 40
70 #define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
72 #define NUM_ARCHIVE_RETRIES 3
79 static time_t last_pgarch_start_time
;
80 static time_t last_sigterm_time
= 0;
83 * Flags set by interrupt handlers for later service in the main loop.
85 static volatile sig_atomic_t got_SIGHUP
= false;
86 static volatile sig_atomic_t got_SIGTERM
= false;
87 static volatile sig_atomic_t wakened
= false;
88 static volatile sig_atomic_t ready_to_stop
= false;
91 * Local function forward declarations
95 static pid_t
pgarch_forkexec(void);
98 NON_EXEC_STATIC
void PgArchiverMain(int argc
, char *argv
[]);
99 static void pgarch_exit(SIGNAL_ARGS
);
100 static void ArchSigHupHandler(SIGNAL_ARGS
);
101 static void ArchSigTermHandler(SIGNAL_ARGS
);
102 static void pgarch_waken(SIGNAL_ARGS
);
103 static void pgarch_waken_stop(SIGNAL_ARGS
);
104 static void pgarch_MainLoop(void);
105 static void pgarch_ArchiverCopyLoop(void);
106 static bool pgarch_archiveXlog(char *xlog
);
107 static bool pgarch_readyXlog(char *xlog
);
108 static void pgarch_archiveDone(char *xlog
);
111 /* ------------------------------------------------------------
112 * Public functions called from postmaster follow
113 * ------------------------------------------------------------
119 * Called from postmaster at startup or after an existing archiver
120 * died. Attempt to fire up a fresh archiver process.
122 * Returns PID of child process, or 0 if fail.
124 * Note: if fail, we will be called again from the postmaster main loop.
133 * Do nothing if no archiver needed
135 if (!XLogArchivingActive())
139 * Do nothing if too soon since last archiver start. This is a safety
140 * valve to protect against continuous respawn attempts if the archiver is
141 * dying immediately at launch. Note that since we will be re-called from
142 * the postmaster main loop, we will get another chance later.
144 curtime
= time(NULL
);
145 if ((unsigned int) (curtime
- last_pgarch_start_time
) <
146 (unsigned int) PGARCH_RESTART_INTERVAL
)
148 last_pgarch_start_time
= curtime
;
151 switch ((pgArchPid
= pgarch_forkexec()))
153 switch ((pgArchPid
= fork_process()))
158 (errmsg("could not fork archiver: %m")));
163 /* in postmaster child ... */
164 /* Close the postmaster's sockets */
165 ClosePostmasterPorts(false);
167 /* Lose the postmaster's on-exit routines */
170 /* Drop our connection to postmaster's shared memory, as well */
171 PGSharedMemoryDetach();
173 PgArchiverMain(0, NULL
);
178 return (int) pgArchPid
;
181 /* shouldn't get here */
185 /* ------------------------------------------------------------
186 * Local functions called by archiver follow
187 * ------------------------------------------------------------
194 * pgarch_forkexec() -
196 * Format up the arglist for, then fork and exec, archive process
199 pgarch_forkexec(void)
204 av
[ac
++] = "postgres";
206 av
[ac
++] = "--forkarch";
208 av
[ac
++] = NULL
; /* filled in by postmaster_forkexec */
211 Assert(ac
< lengthof(av
));
213 return postmaster_forkexec(ac
, av
);
215 #endif /* EXEC_BACKEND */
221 * The argc/argv parameters are valid only in EXEC_BACKEND case. However,
222 * since we don't use 'em, it hardly matters...
225 PgArchiverMain(int argc
, char *argv
[])
227 IsUnderPostmaster
= true; /* we are a postmaster subprocess now */
229 MyProcPid
= getpid(); /* reset MyProcPid */
231 MyStartTime
= time(NULL
); /* record Start Time for logging */
234 * If possible, make this process a group leader, so that the postmaster
235 * can signal any child processes too.
239 elog(FATAL
, "setsid() failed: %m");
243 * Ignore all signals usually bound to some action in the postmaster,
244 * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
246 pqsignal(SIGHUP
, ArchSigHupHandler
);
247 pqsignal(SIGINT
, SIG_IGN
);
248 pqsignal(SIGTERM
, ArchSigTermHandler
);
249 pqsignal(SIGQUIT
, pgarch_exit
);
250 pqsignal(SIGALRM
, SIG_IGN
);
251 pqsignal(SIGPIPE
, SIG_IGN
);
252 pqsignal(SIGUSR1
, pgarch_waken
);
253 pqsignal(SIGUSR2
, pgarch_waken_stop
);
254 pqsignal(SIGCHLD
, SIG_DFL
);
255 pqsignal(SIGTTIN
, SIG_DFL
);
256 pqsignal(SIGTTOU
, SIG_DFL
);
257 pqsignal(SIGCONT
, SIG_DFL
);
258 pqsignal(SIGWINCH
, SIG_DFL
);
259 PG_SETMASK(&UnBlockSig
);
262 * Identify myself via ps
264 init_ps_display("archiver process", "", "", "");
271 /* SIGQUIT signal handler for archiver process */
273 pgarch_exit(SIGNAL_ARGS
)
275 /* SIGQUIT means curl up and die ... */
279 /* SIGHUP signal handler for archiver process */
281 ArchSigHupHandler(SIGNAL_ARGS
)
283 /* set flag to re-read config file at next convenient time */
287 /* SIGTERM signal handler for archiver process */
289 ArchSigTermHandler(SIGNAL_ARGS
)
292 * The postmaster never sends us SIGTERM, so we assume that this means
293 * that init is trying to shut down the whole system. If we hang around
294 * too long we'll get SIGKILL'd. Set flag to prevent starting any more
300 /* SIGUSR1 signal handler for archiver process */
302 pgarch_waken(SIGNAL_ARGS
)
304 /* set flag that there is work to be done */
308 /* SIGUSR2 signal handler for archiver process */
310 pgarch_waken_stop(SIGNAL_ARGS
)
312 /* set flag to do a final cycle and shut down afterwards */
313 ready_to_stop
= true;
319 * Main loop for archiver
322 pgarch_MainLoop(void)
324 time_t last_copy_time
= 0;
328 * We run the copy loop immediately upon entry, in case there are
329 * unarchived files left over from a previous database run (or maybe the
330 * archiver died unexpectedly). After that we wait for a signal or
331 * timeout before doing more.
337 /* When we get SIGUSR2, we do one more archive cycle, then exit */
338 time_to_stop
= ready_to_stop
;
340 /* Check for config update */
344 ProcessConfigFile(PGC_SIGHUP
);
348 * If we've gotten SIGTERM, we normally just sit and do nothing until
349 * SIGUSR2 arrives. However, that means a random SIGTERM would
350 * disable archiving indefinitely, which doesn't seem like a good
351 * idea. If more than 60 seconds pass since SIGTERM, exit anyway, so
352 * that the postmaster can start a new archiver if needed.
356 time_t curtime
= time(NULL
);
358 if (last_sigterm_time
== 0)
359 last_sigterm_time
= curtime
;
360 else if ((unsigned int) (curtime
- last_sigterm_time
) >=
365 /* Do what we're here for */
366 if (wakened
|| time_to_stop
)
369 pgarch_ArchiverCopyLoop();
370 last_copy_time
= time(NULL
);
374 * There shouldn't be anything for the archiver to do except to wait
375 * for a signal ... however, the archiver exists to protect our data,
376 * so she wakes up occasionally to allow herself to be proactive.
378 * On some platforms, signals won't interrupt the sleep. To ensure we
379 * respond reasonably promptly when someone signals us, break down the
380 * sleep into 1-second increments, and check for interrupts after each
383 while (!(wakened
|| ready_to_stop
|| got_SIGHUP
||
384 !PostmasterIsAlive(true)))
389 curtime
= time(NULL
);
390 if ((unsigned int) (curtime
- last_copy_time
) >=
391 (unsigned int) PGARCH_AUTOWAKE_INTERVAL
)
396 * The archiver quits either when the postmaster dies (not expected)
397 * or after completing one more archiving cycle after receiving
400 } while (PostmasterIsAlive(true) && !time_to_stop
);
404 * pgarch_ArchiverCopyLoop
406 * Archives all outstanding xlogs then returns
409 pgarch_ArchiverCopyLoop(void)
411 char xlog
[MAX_XFN_CHARS
+ 1];
413 if (!XLogArchiveCommandSet())
416 (errmsg("archive_mode enabled, yet archive_command is not set")));
417 /* can't do anything if no command ... */
422 * loop through all xlogs with archive_status of .ready and archive
423 * them...mostly we expect this to be a single file, though it is possible
424 * some backend will add files onto the list of those that need archiving
425 * while we are still copying earlier archives
427 while (pgarch_readyXlog(xlog
))
434 * Do not initiate any more archive commands after receiving
435 * SIGTERM, nor after the postmaster has died unexpectedly. The
436 * first condition is to try to keep from having init SIGKILL the
437 * command, and the second is to avoid conflicts with another
438 * archiver spawned by a newer postmaster.
440 if (got_SIGTERM
|| !PostmasterIsAlive(true))
443 if (pgarch_archiveXlog(xlog
))
446 pgarch_archiveDone(xlog
);
447 break; /* out of inner retry loop */
451 if (++failures
>= NUM_ARCHIVE_RETRIES
)
454 (errmsg("transaction log file \"%s\" could not be archived: too many failures",
456 return; /* give up archiving for now */
458 pg_usleep(1000000L); /* wait a bit before retrying */
467 * Invokes system(3) to copy one archive file to wherever it should go
469 * Returns true if successful
472 pgarch_archiveXlog(char *xlog
)
474 char xlogarchcmd
[MAXPGPATH
];
475 char pathname
[MAXPGPATH
];
476 char activitymsg
[MAXFNAMELEN
+ 16];
482 snprintf(pathname
, MAXPGPATH
, XLOGDIR
"/%s", xlog
);
485 * construct the command to be executed
488 endp
= xlogarchcmd
+ MAXPGPATH
- 1;
491 for (sp
= XLogArchiveCommand
; *sp
; sp
++)
498 /* %p: relative path of source file */
500 strlcpy(dp
, pathname
, endp
- dp
);
501 make_native_path(dp
);
505 /* %f: filename of source file */
507 strlcpy(dp
, xlog
, endp
- dp
);
511 /* convert %% to a single % */
517 /* otherwise treat the % as not special */
532 (errmsg_internal("executing archive command \"%s\"",
535 /* Report archive activity in PS display */
536 snprintf(activitymsg
, sizeof(activitymsg
), "archiving %s", xlog
);
537 set_ps_display(activitymsg
, false);
539 rc
= system(xlogarchcmd
);
543 * If either the shell itself, or a called command, died on a signal,
544 * abort the archiver. We do this because system() ignores SIGINT and
545 * SIGQUIT while waiting; so a signal is very likely something that
546 * should have interrupted us too. If we overreact it's no big deal,
547 * the postmaster will just start the archiver again.
549 * Per the Single Unix Spec, shells report exit status > 128 when a
550 * called command died on a signal.
552 int lev
= (WIFSIGNALED(rc
) || WEXITSTATUS(rc
) > 128) ? FATAL
: LOG
;
557 (errmsg("archive command failed with exit code %d",
559 errdetail("The failed archive command was: %s",
562 else if (WIFSIGNALED(rc
))
566 (errmsg("archive command was terminated by exception 0x%X",
568 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
569 errdetail("The failed archive command was: %s",
571 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
573 (errmsg("archive command was terminated by signal %d: %s",
575 WTERMSIG(rc
) < NSIG
? sys_siglist
[WTERMSIG(rc
)] : "(unknown)"),
576 errdetail("The failed archive command was: %s",
580 (errmsg("archive command was terminated by signal %d",
582 errdetail("The failed archive command was: %s",
589 (errmsg("archive command exited with unrecognized status %d",
591 errdetail("The failed archive command was: %s",
595 snprintf(activitymsg
, sizeof(activitymsg
), "failed on %s", xlog
);
596 set_ps_display(activitymsg
, false);
601 (errmsg("archived transaction log file \"%s\"", xlog
)));
603 snprintf(activitymsg
, sizeof(activitymsg
), "last was %s", xlog
);
604 set_ps_display(activitymsg
, false);
612 * Return name of the oldest xlog file that has not yet been archived.
613 * No notification is set that file archiving is now in progress, so
614 * this would need to be extended if multiple concurrent archival
615 * tasks were created. If a failure occurs, we will completely
616 * re-copy the file at the next available opportunity.
618 * It is important that we return the oldest, so that we archive xlogs
619 * in order that they were written, for two reasons:
620 * 1) to maintain the sequential chain of xlogs required for recovery
621 * 2) because the oldest ones will sooner become candidates for
622 * recycling at time of checkpoint
624 * NOTE: the "oldest" comparison will presently consider all segments of
625 * a timeline with a smaller ID to be older than all segments of a timeline
626 * with a larger ID; the net result being that past timelines are given
627 * higher priority for archiving. This seems okay, or at least not
628 * obviously worth changing.
631 pgarch_readyXlog(char *xlog
)
634 * open xlog status directory and read through list of xlogs that have the
635 * .ready suffix, looking for earliest file. It is possible to optimise
636 * this code, though only a single file is expected on the vast majority
639 char XLogArchiveStatusDir
[MAXPGPATH
];
640 char newxlog
[MAX_XFN_CHARS
+ 6 + 1];
645 snprintf(XLogArchiveStatusDir
, MAXPGPATH
, XLOGDIR
"/archive_status");
646 rldir
= AllocateDir(XLogArchiveStatusDir
);
649 (errcode_for_file_access(),
650 errmsg("could not open archive status directory \"%s\": %m",
651 XLogArchiveStatusDir
)));
653 while ((rlde
= ReadDir(rldir
, XLogArchiveStatusDir
)) != NULL
)
655 int basenamelen
= (int) strlen(rlde
->d_name
) - 6;
657 if (basenamelen
>= MIN_XFN_CHARS
&&
658 basenamelen
<= MAX_XFN_CHARS
&&
659 strspn(rlde
->d_name
, VALID_XFN_CHARS
) >= basenamelen
&&
660 strcmp(rlde
->d_name
+ basenamelen
, ".ready") == 0)
664 strcpy(newxlog
, rlde
->d_name
);
669 if (strcmp(rlde
->d_name
, newxlog
) < 0)
670 strcpy(newxlog
, rlde
->d_name
);
678 /* truncate off the .ready */
679 newxlog
[strlen(newxlog
) - 6] = '\0';
680 strcpy(xlog
, newxlog
);
688 * Emit notification that an xlog file has been successfully archived.
689 * We do this by renaming the status file from NNN.ready to NNN.done.
690 * Eventually, a checkpoint process will notice this and delete both the
691 * NNN.done file and the xlog file itself.
694 pgarch_archiveDone(char *xlog
)
696 char rlogready
[MAXPGPATH
];
697 char rlogdone
[MAXPGPATH
];
699 StatusFilePath(rlogready
, xlog
, ".ready");
700 StatusFilePath(rlogdone
, xlog
, ".done");
701 if (rename(rlogready
, rlogdone
) < 0)
703 (errcode_for_file_access(),
704 errmsg("could not rename file \"%s\" to \"%s\": %m",
705 rlogready
, rlogdone
)));