Query in SQL function still not schema-safe; add a couple
[PostgreSQL.git] / src / backend / postmaster / pgarch.c
blob56bc7f15cc4199d31bc7d2a014be07bd69836f8e
1 /*-------------------------------------------------------------------------
3 * pgarch.c
5 * PostgreSQL WAL archiver
7 * All functions relating to archiver are included here
9 * - All functions executed by archiver process
11 * - archiver is forked from postmaster, and the two
12 * processes then communicate using signals. All functions
13 * executed by postmaster are included in this file.
15 * Initial author: Simon Riggs simon@2ndquadrant.com
17 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
18 * Portions Copyright (c) 1994, Regents of the University of California
21 * IDENTIFICATION
22 * $PostgreSQL$
24 *-------------------------------------------------------------------------
26 #include "postgres.h"
28 #include <fcntl.h>
29 #include <signal.h>
30 #include <time.h>
31 #include <sys/time.h>
32 #include <sys/wait.h>
33 #include <unistd.h>
35 #include "access/xlog_internal.h"
36 #include "libpq/pqsignal.h"
37 #include "miscadmin.h"
38 #include "postmaster/fork_process.h"
39 #include "postmaster/pgarch.h"
40 #include "postmaster/postmaster.h"
41 #include "storage/fd.h"
42 #include "storage/ipc.h"
43 #include "storage/pg_shmem.h"
44 #include "storage/pmsignal.h"
45 #include "utils/guc.h"
46 #include "utils/ps_status.h"
49 /* ----------
50 * Timer definitions.
51 * ----------
53 #define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of the
54 * archive status directory; in
55 * seconds. */
56 #define PGARCH_RESTART_INTERVAL 10 /* How often to attempt to restart a
57 * failed archiver; in seconds. */
59 /* ----------
60 * Archiver control info.
62 * We expect that archivable files within pg_xlog will have names between
63 * MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
64 * appearing in VALID_XFN_CHARS. The status files in archive_status have
65 * corresponding names with ".ready" or ".done" appended.
66 * ----------
68 #define MIN_XFN_CHARS 16
69 #define MAX_XFN_CHARS 40
70 #define VALID_XFN_CHARS "0123456789ABCDEF.history.backup"
72 #define NUM_ARCHIVE_RETRIES 3
75 /* ----------
76 * Local data
77 * ----------
79 static time_t last_pgarch_start_time;
80 static time_t last_sigterm_time = 0;
83 * Flags set by interrupt handlers for later service in the main loop.
85 static volatile sig_atomic_t got_SIGHUP = false;
86 static volatile sig_atomic_t got_SIGTERM = false;
87 static volatile sig_atomic_t wakened = false;
88 static volatile sig_atomic_t ready_to_stop = false;
90 /* ----------
91 * Local function forward declarations
92 * ----------
94 #ifdef EXEC_BACKEND
95 static pid_t pgarch_forkexec(void);
96 #endif
98 NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
99 static void pgarch_exit(SIGNAL_ARGS);
100 static void ArchSigHupHandler(SIGNAL_ARGS);
101 static void ArchSigTermHandler(SIGNAL_ARGS);
102 static void pgarch_waken(SIGNAL_ARGS);
103 static void pgarch_waken_stop(SIGNAL_ARGS);
104 static void pgarch_MainLoop(void);
105 static void pgarch_ArchiverCopyLoop(void);
106 static bool pgarch_archiveXlog(char *xlog);
107 static bool pgarch_readyXlog(char *xlog);
108 static void pgarch_archiveDone(char *xlog);
111 /* ------------------------------------------------------------
112 * Public functions called from postmaster follow
113 * ------------------------------------------------------------
117 * pgarch_start
119 * Called from postmaster at startup or after an existing archiver
120 * died. Attempt to fire up a fresh archiver process.
122 * Returns PID of child process, or 0 if fail.
124 * Note: if fail, we will be called again from the postmaster main loop.
127 pgarch_start(void)
129 time_t curtime;
130 pid_t pgArchPid;
133 * Do nothing if no archiver needed
135 if (!XLogArchivingActive())
136 return 0;
139 * Do nothing if too soon since last archiver start. This is a safety
140 * valve to protect against continuous respawn attempts if the archiver is
141 * dying immediately at launch. Note that since we will be re-called from
142 * the postmaster main loop, we will get another chance later.
144 curtime = time(NULL);
145 if ((unsigned int) (curtime - last_pgarch_start_time) <
146 (unsigned int) PGARCH_RESTART_INTERVAL)
147 return 0;
148 last_pgarch_start_time = curtime;
150 #ifdef EXEC_BACKEND
151 switch ((pgArchPid = pgarch_forkexec()))
152 #else
153 switch ((pgArchPid = fork_process()))
154 #endif
156 case -1:
157 ereport(LOG,
158 (errmsg("could not fork archiver: %m")));
159 return 0;
161 #ifndef EXEC_BACKEND
162 case 0:
163 /* in postmaster child ... */
164 /* Close the postmaster's sockets */
165 ClosePostmasterPorts(false);
167 /* Lose the postmaster's on-exit routines */
168 on_exit_reset();
170 /* Drop our connection to postmaster's shared memory, as well */
171 PGSharedMemoryDetach();
173 PgArchiverMain(0, NULL);
174 break;
175 #endif
177 default:
178 return (int) pgArchPid;
181 /* shouldn't get here */
182 return 0;
185 /* ------------------------------------------------------------
186 * Local functions called by archiver follow
187 * ------------------------------------------------------------
191 #ifdef EXEC_BACKEND
194 * pgarch_forkexec() -
196 * Format up the arglist for, then fork and exec, archive process
198 static pid_t
199 pgarch_forkexec(void)
201 char *av[10];
202 int ac = 0;
204 av[ac++] = "postgres";
206 av[ac++] = "--forkarch";
208 av[ac++] = NULL; /* filled in by postmaster_forkexec */
210 av[ac] = NULL;
211 Assert(ac < lengthof(av));
213 return postmaster_forkexec(ac, av);
215 #endif /* EXEC_BACKEND */
219 * PgArchiverMain
221 * The argc/argv parameters are valid only in EXEC_BACKEND case. However,
222 * since we don't use 'em, it hardly matters...
224 NON_EXEC_STATIC void
225 PgArchiverMain(int argc, char *argv[])
227 IsUnderPostmaster = true; /* we are a postmaster subprocess now */
229 MyProcPid = getpid(); /* reset MyProcPid */
231 MyStartTime = time(NULL); /* record Start Time for logging */
234 * If possible, make this process a group leader, so that the postmaster
235 * can signal any child processes too.
237 #ifdef HAVE_SETSID
238 if (setsid() < 0)
239 elog(FATAL, "setsid() failed: %m");
240 #endif
243 * Ignore all signals usually bound to some action in the postmaster,
244 * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT.
246 pqsignal(SIGHUP, ArchSigHupHandler);
247 pqsignal(SIGINT, SIG_IGN);
248 pqsignal(SIGTERM, ArchSigTermHandler);
249 pqsignal(SIGQUIT, pgarch_exit);
250 pqsignal(SIGALRM, SIG_IGN);
251 pqsignal(SIGPIPE, SIG_IGN);
252 pqsignal(SIGUSR1, pgarch_waken);
253 pqsignal(SIGUSR2, pgarch_waken_stop);
254 pqsignal(SIGCHLD, SIG_DFL);
255 pqsignal(SIGTTIN, SIG_DFL);
256 pqsignal(SIGTTOU, SIG_DFL);
257 pqsignal(SIGCONT, SIG_DFL);
258 pqsignal(SIGWINCH, SIG_DFL);
259 PG_SETMASK(&UnBlockSig);
262 * Identify myself via ps
264 init_ps_display("archiver process", "", "", "");
266 pgarch_MainLoop();
268 exit(0);
271 /* SIGQUIT signal handler for archiver process */
272 static void
273 pgarch_exit(SIGNAL_ARGS)
275 /* SIGQUIT means curl up and die ... */
276 exit(1);
279 /* SIGHUP signal handler for archiver process */
280 static void
281 ArchSigHupHandler(SIGNAL_ARGS)
283 /* set flag to re-read config file at next convenient time */
284 got_SIGHUP = true;
287 /* SIGTERM signal handler for archiver process */
288 static void
289 ArchSigTermHandler(SIGNAL_ARGS)
292 * The postmaster never sends us SIGTERM, so we assume that this means
293 * that init is trying to shut down the whole system. If we hang around
294 * too long we'll get SIGKILL'd. Set flag to prevent starting any more
295 * archive commands.
297 got_SIGTERM = true;
300 /* SIGUSR1 signal handler for archiver process */
301 static void
302 pgarch_waken(SIGNAL_ARGS)
304 /* set flag that there is work to be done */
305 wakened = true;
308 /* SIGUSR2 signal handler for archiver process */
309 static void
310 pgarch_waken_stop(SIGNAL_ARGS)
312 /* set flag to do a final cycle and shut down afterwards */
313 ready_to_stop = true;
317 * pgarch_MainLoop
319 * Main loop for archiver
321 static void
322 pgarch_MainLoop(void)
324 time_t last_copy_time = 0;
325 bool time_to_stop;
328 * We run the copy loop immediately upon entry, in case there are
329 * unarchived files left over from a previous database run (or maybe the
330 * archiver died unexpectedly). After that we wait for a signal or
331 * timeout before doing more.
333 wakened = true;
337 /* When we get SIGUSR2, we do one more archive cycle, then exit */
338 time_to_stop = ready_to_stop;
340 /* Check for config update */
341 if (got_SIGHUP)
343 got_SIGHUP = false;
344 ProcessConfigFile(PGC_SIGHUP);
348 * If we've gotten SIGTERM, we normally just sit and do nothing until
349 * SIGUSR2 arrives. However, that means a random SIGTERM would
350 * disable archiving indefinitely, which doesn't seem like a good
351 * idea. If more than 60 seconds pass since SIGTERM, exit anyway, so
352 * that the postmaster can start a new archiver if needed.
354 if (got_SIGTERM)
356 time_t curtime = time(NULL);
358 if (last_sigterm_time == 0)
359 last_sigterm_time = curtime;
360 else if ((unsigned int) (curtime - last_sigterm_time) >=
361 (unsigned int) 60)
362 break;
365 /* Do what we're here for */
366 if (wakened || time_to_stop)
368 wakened = false;
369 pgarch_ArchiverCopyLoop();
370 last_copy_time = time(NULL);
374 * There shouldn't be anything for the archiver to do except to wait
375 * for a signal ... however, the archiver exists to protect our data,
376 * so she wakes up occasionally to allow herself to be proactive.
378 * On some platforms, signals won't interrupt the sleep. To ensure we
379 * respond reasonably promptly when someone signals us, break down the
380 * sleep into 1-second increments, and check for interrupts after each
381 * nap.
383 while (!(wakened || ready_to_stop || got_SIGHUP ||
384 !PostmasterIsAlive(true)))
386 time_t curtime;
388 pg_usleep(1000000L);
389 curtime = time(NULL);
390 if ((unsigned int) (curtime - last_copy_time) >=
391 (unsigned int) PGARCH_AUTOWAKE_INTERVAL)
392 wakened = true;
396 * The archiver quits either when the postmaster dies (not expected)
397 * or after completing one more archiving cycle after receiving
398 * SIGUSR2.
400 } while (PostmasterIsAlive(true) && !time_to_stop);
404 * pgarch_ArchiverCopyLoop
406 * Archives all outstanding xlogs then returns
408 static void
409 pgarch_ArchiverCopyLoop(void)
411 char xlog[MAX_XFN_CHARS + 1];
413 if (!XLogArchiveCommandSet())
415 ereport(WARNING,
416 (errmsg("archive_mode enabled, yet archive_command is not set")));
417 /* can't do anything if no command ... */
418 return;
422 * loop through all xlogs with archive_status of .ready and archive
423 * them...mostly we expect this to be a single file, though it is possible
424 * some backend will add files onto the list of those that need archiving
425 * while we are still copying earlier archives
427 while (pgarch_readyXlog(xlog))
429 int failures = 0;
431 for (;;)
434 * Do not initiate any more archive commands after receiving
435 * SIGTERM, nor after the postmaster has died unexpectedly. The
436 * first condition is to try to keep from having init SIGKILL the
437 * command, and the second is to avoid conflicts with another
438 * archiver spawned by a newer postmaster.
440 if (got_SIGTERM || !PostmasterIsAlive(true))
441 return;
443 if (pgarch_archiveXlog(xlog))
445 /* successful */
446 pgarch_archiveDone(xlog);
447 break; /* out of inner retry loop */
449 else
451 if (++failures >= NUM_ARCHIVE_RETRIES)
453 ereport(WARNING,
454 (errmsg("transaction log file \"%s\" could not be archived: too many failures",
455 xlog)));
456 return; /* give up archiving for now */
458 pg_usleep(1000000L); /* wait a bit before retrying */
465 * pgarch_archiveXlog
467 * Invokes system(3) to copy one archive file to wherever it should go
469 * Returns true if successful
471 static bool
472 pgarch_archiveXlog(char *xlog)
474 char xlogarchcmd[MAXPGPATH];
475 char pathname[MAXPGPATH];
476 char activitymsg[MAXFNAMELEN + 16];
477 char *dp;
478 char *endp;
479 const char *sp;
480 int rc;
482 snprintf(pathname, MAXPGPATH, XLOGDIR "/%s", xlog);
485 * construct the command to be executed
487 dp = xlogarchcmd;
488 endp = xlogarchcmd + MAXPGPATH - 1;
489 *endp = '\0';
491 for (sp = XLogArchiveCommand; *sp; sp++)
493 if (*sp == '%')
495 switch (sp[1])
497 case 'p':
498 /* %p: relative path of source file */
499 sp++;
500 strlcpy(dp, pathname, endp - dp);
501 make_native_path(dp);
502 dp += strlen(dp);
503 break;
504 case 'f':
505 /* %f: filename of source file */
506 sp++;
507 strlcpy(dp, xlog, endp - dp);
508 dp += strlen(dp);
509 break;
510 case '%':
511 /* convert %% to a single % */
512 sp++;
513 if (dp < endp)
514 *dp++ = *sp;
515 break;
516 default:
517 /* otherwise treat the % as not special */
518 if (dp < endp)
519 *dp++ = *sp;
520 break;
523 else
525 if (dp < endp)
526 *dp++ = *sp;
529 *dp = '\0';
531 ereport(DEBUG3,
532 (errmsg_internal("executing archive command \"%s\"",
533 xlogarchcmd)));
535 /* Report archive activity in PS display */
536 snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
537 set_ps_display(activitymsg, false);
539 rc = system(xlogarchcmd);
540 if (rc != 0)
543 * If either the shell itself, or a called command, died on a signal,
544 * abort the archiver. We do this because system() ignores SIGINT and
545 * SIGQUIT while waiting; so a signal is very likely something that
546 * should have interrupted us too. If we overreact it's no big deal,
547 * the postmaster will just start the archiver again.
549 * Per the Single Unix Spec, shells report exit status > 128 when a
550 * called command died on a signal.
552 int lev = (WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128) ? FATAL : LOG;
554 if (WIFEXITED(rc))
556 ereport(lev,
557 (errmsg("archive command failed with exit code %d",
558 WEXITSTATUS(rc)),
559 errdetail("The failed archive command was: %s",
560 xlogarchcmd)));
562 else if (WIFSIGNALED(rc))
564 #if defined(WIN32)
565 ereport(lev,
566 (errmsg("archive command was terminated by exception 0x%X",
567 WTERMSIG(rc)),
568 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
569 errdetail("The failed archive command was: %s",
570 xlogarchcmd)));
571 #elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
572 ereport(lev,
573 (errmsg("archive command was terminated by signal %d: %s",
574 WTERMSIG(rc),
575 WTERMSIG(rc) < NSIG ? sys_siglist[WTERMSIG(rc)] : "(unknown)"),
576 errdetail("The failed archive command was: %s",
577 xlogarchcmd)));
578 #else
579 ereport(lev,
580 (errmsg("archive command was terminated by signal %d",
581 WTERMSIG(rc)),
582 errdetail("The failed archive command was: %s",
583 xlogarchcmd)));
584 #endif
586 else
588 ereport(lev,
589 (errmsg("archive command exited with unrecognized status %d",
590 rc),
591 errdetail("The failed archive command was: %s",
592 xlogarchcmd)));
595 snprintf(activitymsg, sizeof(activitymsg), "failed on %s", xlog);
596 set_ps_display(activitymsg, false);
598 return false;
600 ereport(DEBUG1,
601 (errmsg("archived transaction log file \"%s\"", xlog)));
603 snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
604 set_ps_display(activitymsg, false);
606 return true;
610 * pgarch_readyXlog
612 * Return name of the oldest xlog file that has not yet been archived.
613 * No notification is set that file archiving is now in progress, so
614 * this would need to be extended if multiple concurrent archival
615 * tasks were created. If a failure occurs, we will completely
616 * re-copy the file at the next available opportunity.
618 * It is important that we return the oldest, so that we archive xlogs
619 * in order that they were written, for two reasons:
620 * 1) to maintain the sequential chain of xlogs required for recovery
621 * 2) because the oldest ones will sooner become candidates for
622 * recycling at time of checkpoint
624 * NOTE: the "oldest" comparison will presently consider all segments of
625 * a timeline with a smaller ID to be older than all segments of a timeline
626 * with a larger ID; the net result being that past timelines are given
627 * higher priority for archiving. This seems okay, or at least not
628 * obviously worth changing.
630 static bool
631 pgarch_readyXlog(char *xlog)
634 * open xlog status directory and read through list of xlogs that have the
635 * .ready suffix, looking for earliest file. It is possible to optimise
636 * this code, though only a single file is expected on the vast majority
637 * of calls, so....
639 char XLogArchiveStatusDir[MAXPGPATH];
640 char newxlog[MAX_XFN_CHARS + 6 + 1];
641 DIR *rldir;
642 struct dirent *rlde;
643 bool found = false;
645 snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
646 rldir = AllocateDir(XLogArchiveStatusDir);
647 if (rldir == NULL)
648 ereport(ERROR,
649 (errcode_for_file_access(),
650 errmsg("could not open archive status directory \"%s\": %m",
651 XLogArchiveStatusDir)));
653 while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
655 int basenamelen = (int) strlen(rlde->d_name) - 6;
657 if (basenamelen >= MIN_XFN_CHARS &&
658 basenamelen <= MAX_XFN_CHARS &&
659 strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
660 strcmp(rlde->d_name + basenamelen, ".ready") == 0)
662 if (!found)
664 strcpy(newxlog, rlde->d_name);
665 found = true;
667 else
669 if (strcmp(rlde->d_name, newxlog) < 0)
670 strcpy(newxlog, rlde->d_name);
674 FreeDir(rldir);
676 if (found)
678 /* truncate off the .ready */
679 newxlog[strlen(newxlog) - 6] = '\0';
680 strcpy(xlog, newxlog);
682 return found;
686 * pgarch_archiveDone
688 * Emit notification that an xlog file has been successfully archived.
689 * We do this by renaming the status file from NNN.ready to NNN.done.
690 * Eventually, a checkpoint process will notice this and delete both the
691 * NNN.done file and the xlog file itself.
693 static void
694 pgarch_archiveDone(char *xlog)
696 char rlogready[MAXPGPATH];
697 char rlogdone[MAXPGPATH];
699 StatusFilePath(rlogready, xlog, ".ready");
700 StatusFilePath(rlogdone, xlog, ".done");
701 if (rename(rlogready, rlogdone) < 0)
702 ereport(WARNING,
703 (errcode_for_file_access(),
704 errmsg("could not rename file \"%s\" to \"%s\": %m",
705 rlogready, rlogdone)));