Fix a few errors in comments. Patch by Fujii Masao, plus the one in
[PostgreSQL.git] / contrib / pg_standby / pg_standby.c
blob4460255376ea057eb1a9704931cb9e424efe3c8e
1 /*
2 * $PostgreSQL$
5 * pg_standby.c
7 * Production-ready example of how to create a Warm Standby
8 * database server using continuous archiving as a
9 * replication mechanism
11 * We separate the parameters for archive and nextWALfile
12 * so that we can check the archive exists, even if the
13 * WAL file doesn't (yet).
15 * This program will be executed once in full for each file
16 * requested by the warm standby server.
18 * It is designed to cater to a variety of needs, as well
19 * providing a customizable section.
21 * Original author: Simon Riggs simon@2ndquadrant.com
22 * Current maintainer: Simon Riggs
24 #include "postgres_fe.h"
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <sys/stat.h>
29 #include <fcntl.h>
30 #include <signal.h>
32 #ifdef WIN32
33 int getopt(int argc, char *const argv[], const char *optstring);
34 #else
35 #include <sys/time.h>
36 #include <unistd.h>
38 #ifdef HAVE_GETOPT_H
39 #include <getopt.h>
40 #endif
41 #endif /* ! WIN32 */
43 extern char *optarg;
44 extern int optind;
46 const char *progname;
48 /* Options and defaults */
49 int sleeptime = 5; /* amount of time to sleep between file checks */
50 int waittime = -1; /* how long we have been waiting, -1 no wait
51 * yet */
52 int maxwaittime = 0; /* how long are we prepared to wait for? */
53 int keepfiles = 0; /* number of WAL files to keep, 0 keep all */
54 int maxretries = 3; /* number of retries on restore command */
55 bool debug = false; /* are we debugging? */
56 bool need_cleanup = false; /* do we need to remove files from
57 * archive? */
59 static volatile sig_atomic_t signaled = false;
61 char *archiveLocation; /* where to find the archive? */
62 char *triggerPath; /* where to find the trigger file? */
63 char *xlogFilePath; /* where we are going to restore to */
64 char *nextWALFileName; /* the file we need to get from archive */
65 char *restartWALFileName; /* the file from which we can restart restore */
66 char *priorWALFileName; /* the file we need to get from archive */
67 char WALFilePath[MAXPGPATH]; /* the file path including archive */
68 char restoreCommand[MAXPGPATH]; /* run this to restore */
69 char exclusiveCleanupFileName[MAXPGPATH]; /* the file we need to
70 * get from archive */
73 * Two types of failover are supported (smart and fast failover).
75 * The content of the trigger file determines the type of failover. If the
76 * trigger file contains the word "smart" (or the file is empty), smart
77 * failover is chosen: pg_standby acts as cp or ln command itself, on
78 * successful completion all the available WAL records will be applied
79 * resulting in zero data loss. But, it might take a long time to finish
80 * recovery if there's a lot of unapplied WAL.
82 * On the other hand, if the trigger file contains the word "fast", the
83 * recovery is finished immediately even if unapplied WAL files remain. Any
84 * transactions in the unapplied WAL files are lost.
86 * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
87 * fast failover. A timeout causes fast failover (smart failover would have
88 * the same effect, since if the timeout is reached there is no unapplied WAL).
90 #define NoFailover 0
91 #define SmartFailover 1
92 #define FastFailover 2
94 static int Failover = NoFailover;
96 #define RESTORE_COMMAND_COPY 0
97 #define RESTORE_COMMAND_LINK 1
98 int restoreCommandType;
100 #define XLOG_DATA 0
101 #define XLOG_HISTORY 1
102 #define XLOG_BACKUP_LABEL 2
103 int nextWALFileType;
105 #define SET_RESTORE_COMMAND(cmd, arg1, arg2) \
106 snprintf(restoreCommand, MAXPGPATH, cmd " \"%s\" \"%s\"", arg1, arg2)
108 struct stat stat_buf;
110 /* =====================================================================
112 * Customizable section
114 * =====================================================================
116 * Currently, this section assumes that the Archive is a locally
117 * accessible directory. If you want to make other assumptions,
118 * such as using a vendor-specific archive and access API, these
119 * routines are the ones you'll need to change. You're
120 * enouraged to submit any changes to pgsql-hackers@postgresql.org
121 * or personally to the current maintainer. Those changes may be
122 * folded in to later versions of this program.
125 #define XLOG_DATA_FNAME_LEN 24
126 /* Reworked from access/xlog_internal.h */
127 #define XLogFileName(fname, tli, log, seg) \
128 snprintf(fname, XLOG_DATA_FNAME_LEN + 1, "%08X%08X%08X", tli, log, seg)
131 * Initialize allows customized commands into the warm standby program.
133 * As an example, and probably the common case, we use either
134 * cp/ln commands on *nix, or copy/move command on Windows.
136 static void
137 CustomizableInitialize(void)
139 #ifdef WIN32
140 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
141 switch (restoreCommandType)
143 case RESTORE_COMMAND_LINK:
144 SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
145 break;
146 case RESTORE_COMMAND_COPY:
147 default:
148 SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
149 break;
151 #else
152 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
153 switch (restoreCommandType)
155 case RESTORE_COMMAND_LINK:
156 #if HAVE_WORKING_LINK
157 SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
158 break;
159 #endif
160 case RESTORE_COMMAND_COPY:
161 default:
162 SET_RESTORE_COMMAND("cp", WALFilePath, xlogFilePath);
163 break;
165 #endif
168 * This code assumes that archiveLocation is a directory You may wish to
169 * add code to check for tape libraries, etc.. So, since it is a
170 * directory, we use stat to test if its accessible
172 if (stat(archiveLocation, &stat_buf) != 0)
174 fprintf(stderr, "%s: archiveLocation \"%s\" does not exist\n", progname, archiveLocation);
175 fflush(stderr);
176 exit(2);
181 * CustomizableNextWALFileReady()
183 * Is the requested file ready yet?
185 static bool
186 CustomizableNextWALFileReady()
188 if (stat(WALFilePath, &stat_buf) == 0)
191 * If its a backup file, return immediately If its a regular file
192 * return only if its the right size already
194 if (strlen(nextWALFileName) > 24 &&
195 strspn(nextWALFileName, "0123456789ABCDEF") == 24 &&
196 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".backup"),
197 ".backup") == 0)
199 nextWALFileType = XLOG_BACKUP_LABEL;
200 return true;
202 else if (stat_buf.st_size == XLOG_SEG_SIZE)
204 #ifdef WIN32
207 * Windows 'cp' sets the final file size before the copy is
208 * complete, and not yet ready to be opened by pg_standby. So we
209 * wait for sleeptime secs before attempting to restore. If that
210 * is not enough, we will rely on the retry/holdoff mechanism.
211 * GNUWin32's cp does not have this problem.
213 pg_usleep(sleeptime * 1000000L);
214 #endif
215 nextWALFileType = XLOG_DATA;
216 return true;
220 * If still too small, wait until it is the correct size
222 if (stat_buf.st_size > XLOG_SEG_SIZE)
224 if (debug)
226 fprintf(stderr, "file size greater than expected\n");
227 fflush(stderr);
229 exit(3);
233 return false;
236 #define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE )
238 static void
239 CustomizableCleanupPriorWALFiles(void)
242 * Work out name of prior file from current filename
244 if (nextWALFileType == XLOG_DATA)
246 int rc;
247 DIR *xldir;
248 struct dirent *xlde;
251 * Assume its OK to keep failing. The failure situation may change
252 * over time, so we'd rather keep going on the main processing than
253 * fail because we couldnt clean up yet.
255 if ((xldir = opendir(archiveLocation)) != NULL)
257 while ((xlde = readdir(xldir)) != NULL)
260 * We ignore the timeline part of the XLOG segment identifiers
261 * in deciding whether a segment is still needed. This
262 * ensures that we won't prematurely remove a segment from a
263 * parent timeline. We could probably be a little more
264 * proactive about removing segments of non-parent timelines,
265 * but that would be a whole lot more complicated.
267 * We use the alphanumeric sorting property of the filenames
268 * to decide which ones are earlier than the
269 * exclusiveCleanupFileName file. Note that this means files
270 * are not removed in the order they were originally written,
271 * in case this worries you.
273 if (strlen(xlde->d_name) == XLOG_DATA_FNAME_LEN &&
274 strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_DATA_FNAME_LEN &&
275 strcmp(xlde->d_name + 8, exclusiveCleanupFileName + 8) < 0)
277 #ifdef WIN32
278 snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, xlde->d_name);
279 #else
280 snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, xlde->d_name);
281 #endif
283 if (debug)
284 fprintf(stderr, "\nremoving \"%s\"", WALFilePath);
286 rc = unlink(WALFilePath);
287 if (rc != 0)
289 fprintf(stderr, "\n%s: ERROR failed to remove \"%s\": %s",
290 progname, WALFilePath, strerror(errno));
291 break;
295 if (debug)
296 fprintf(stderr, "\n");
298 else
299 fprintf(stderr, "%s: archiveLocation \"%s\" open error\n", progname, archiveLocation);
301 closedir(xldir);
302 fflush(stderr);
306 /* =====================================================================
307 * End of Customizable section
308 * =====================================================================
312 * SetWALFileNameForCleanup()
314 * Set the earliest WAL filename that we want to keep on the archive
315 * and decide whether we need_cleanup
317 static bool
318 SetWALFileNameForCleanup(void)
320 uint32 tli = 1,
321 log = 0,
322 seg = 0;
323 uint32 log_diff = 0,
324 seg_diff = 0;
325 bool cleanup = false;
327 if (restartWALFileName)
330 * Don't do cleanup if the restartWALFileName provided is later than
331 * the xlog file requested. This is an error and we must not remove
332 * these files from archive. This shouldn't happen, but better safe
333 * than sorry.
335 if (strcmp(restartWALFileName, nextWALFileName) > 0)
336 return false;
338 strcpy(exclusiveCleanupFileName, restartWALFileName);
339 return true;
342 if (keepfiles > 0)
344 sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg);
345 if (tli > 0 && log >= 0 && seg > 0)
347 log_diff = keepfiles / MaxSegmentsPerLogFile;
348 seg_diff = keepfiles % MaxSegmentsPerLogFile;
349 if (seg_diff > seg)
351 log_diff++;
352 seg = MaxSegmentsPerLogFile - (seg_diff - seg);
354 else
355 seg -= seg_diff;
357 if (log >= log_diff)
359 log -= log_diff;
360 cleanup = true;
362 else
364 log = 0;
365 seg = 0;
370 XLogFileName(exclusiveCleanupFileName, tli, log, seg);
372 return cleanup;
376 * CheckForExternalTrigger()
378 * Is there a trigger file? Sets global 'Failover' variable to indicate
379 * what kind of a trigger file it was. A "fast" trigger file is turned
380 * into a "smart" file as a side-effect.
382 static void
383 CheckForExternalTrigger(void)
385 char buf[32];
386 int fd;
387 int len;
390 * Look for a trigger file, if that option has been selected
392 * We use stat() here because triggerPath is always a file rather than
393 * potentially being in an archive
395 if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
396 return;
399 * An empty trigger file performs smart failover. There's a little race
400 * condition here: if the writer of the trigger file has just created the
401 * file, but not yet written anything to it, we'll treat that as smart
402 * shutdown even if the other process was just about to write "fast" to
403 * it. But that's fine: we'll restore one more WAL file, and when we're
404 * invoked next time, we'll see the word "fast" and fail over immediately.
406 if (stat_buf.st_size == 0)
408 Failover = SmartFailover;
409 fprintf(stderr, "trigger file found: smart failover\n");
410 fflush(stderr);
411 return;
414 if ((fd = open(triggerPath, O_RDWR, 0)) < 0)
416 fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
417 triggerPath, strerror(errno));
418 fflush(stderr);
419 return;
422 if ((len = read(fd, buf, sizeof(buf))) < 0)
424 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
425 triggerPath, strerror(errno));
426 fflush(stderr);
427 close(fd);
428 return;
430 buf[len] = '\0';
432 if (strncmp(buf, "smart", 5) == 0)
434 Failover = SmartFailover;
435 fprintf(stderr, "trigger file found: smart failover\n");
436 fflush(stderr);
437 close(fd);
438 return;
441 if (strncmp(buf, "fast", 4) == 0)
443 Failover = FastFailover;
445 fprintf(stderr, "trigger file found: fast failover\n");
446 fflush(stderr);
449 * Turn it into a "smart" trigger by truncating the file. Otherwise if
450 * the server asks us again to restore a segment that was restored
451 * already, we would return "not found" and upset the server.
453 if (ftruncate(fd, 0) < 0)
455 fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
456 triggerPath, strerror(errno));
457 fflush(stderr);
459 close(fd);
461 return;
463 close(fd);
465 fprintf(stderr, "WARNING: invalid content in \"%s\"\n", triggerPath);
466 fflush(stderr);
467 return;
471 * RestoreWALFileForRecovery()
473 * Perform the action required to restore the file from archive
475 static bool
476 RestoreWALFileForRecovery(void)
478 int rc = 0;
479 int numretries = 0;
481 if (debug)
483 fprintf(stderr, "running restore :");
484 fflush(stderr);
487 while (numretries <= maxretries)
489 rc = system(restoreCommand);
490 if (rc == 0)
492 if (debug)
494 fprintf(stderr, " OK\n");
495 fflush(stderr);
497 return true;
499 pg_usleep(numretries++ * sleeptime * 1000000L);
503 * Allow caller to add additional info
505 if (debug)
506 fprintf(stderr, "not restored\n");
507 return false;
510 static void
511 usage(void)
513 printf("%s allows PostgreSQL warm standby servers to be configured.\n\n", progname);
514 printf("Usage:\n");
515 printf(" %s [OPTION]... ARCHIVELOCATION NEXTWALFILE XLOGFILEPATH [RESTARTWALFILE]\n", progname);
516 printf("\n"
517 "with main intended use as a restore_command in the recovery.conf:\n"
518 " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"
519 "e.g.\n"
520 " restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n");
521 printf("\nOptions:\n");
522 printf(" -c copies file from archive (default)\n");
523 printf(" -d generate lots of debugging output (testing only)\n");
524 printf(" -k NUMFILESTOKEEP if RESTARTWALFILE not used, removes files prior to limit\n"
525 " (0 keeps all)\n");
526 printf(" -l links into archive (leaves file in archive)\n");
527 printf(" -r MAXRETRIES max number of times to retry, with progressive wait\n"
528 " (default=3)\n");
529 printf(" -s SLEEPTIME seconds to wait between file checks (min=1, max=60,\n"
530 " default=5)\n");
531 printf(" -t TRIGGERFILE defines a trigger file to initiate failover (no default)\n");
532 printf(" -w MAXWAITTIME max seconds to wait for a file (0=no limit) (default=0)\n");
533 printf(" --help show this help, then exit\n");
534 printf(" --version output version information, then exit\n");
535 printf("\nReport bugs to <pgsql-bugs@postgresql.org>.\n");
538 static void
539 sighandler(int sig)
541 signaled = true;
544 #ifndef WIN32
545 /* We don't want SIGQUIT to core dump */
546 static void
547 sigquit_handler(int sig)
549 signal(SIGINT, SIG_DFL);
550 kill(getpid(), SIGINT);
552 #endif
554 /*------------ MAIN ----------------------------------------*/
556 main(int argc, char **argv)
558 int c;
560 progname = get_progname(argv[0]);
562 if (argc > 1)
564 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
566 usage();
567 exit(0);
569 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
571 puts("pg_standby (PostgreSQL) " PG_VERSION);
572 exit(0);
577 * You can send SIGUSR1 to trigger failover.
579 * Postmaster uses SIGQUIT to request immediate shutdown. The default
580 * action is to core dump, but we don't want that, so trap it and commit
581 * suicide without core dump.
583 * We used to use SIGINT and SIGQUIT to trigger failover, but that turned
584 * out to be a bad idea because postmaster uses SIGQUIT to request
585 * immediate shutdown. We still trap SIGINT, but that may change in a
586 * future release.
588 (void) signal(SIGUSR1, sighandler);
589 (void) signal(SIGINT, sighandler); /* deprecated, use SIGUSR1 */
590 #ifndef WIN32
591 (void) signal(SIGQUIT, sigquit_handler);
592 #endif
594 while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
596 switch (c)
598 case 'c': /* Use copy */
599 restoreCommandType = RESTORE_COMMAND_COPY;
600 break;
601 case 'd': /* Debug mode */
602 debug = true;
603 break;
604 case 'k': /* keepfiles */
605 keepfiles = atoi(optarg);
606 if (keepfiles < 0)
608 fprintf(stderr, "%s: -k keepfiles must be >= 0\n", progname);
609 exit(2);
611 break;
612 case 'l': /* Use link */
613 restoreCommandType = RESTORE_COMMAND_LINK;
614 break;
615 case 'r': /* Retries */
616 maxretries = atoi(optarg);
617 if (maxretries < 0)
619 fprintf(stderr, "%s: -r maxretries must be >= 0\n", progname);
620 exit(2);
622 break;
623 case 's': /* Sleep time */
624 sleeptime = atoi(optarg);
625 if (sleeptime <= 0 || sleeptime > 60)
627 fprintf(stderr, "%s: -s sleeptime incorrectly set\n", progname);
628 exit(2);
630 break;
631 case 't': /* Trigger file */
632 triggerPath = optarg;
633 break;
634 case 'w': /* Max wait time */
635 maxwaittime = atoi(optarg);
636 if (maxwaittime < 0)
638 fprintf(stderr, "%s: -w maxwaittime incorrectly set\n", progname);
639 exit(2);
641 break;
642 default:
643 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
644 exit(2);
645 break;
650 * Parameter checking - after checking to see if trigger file present
652 if (argc == 1)
654 fprintf(stderr, "%s: not enough command-line arguments\n", progname);
655 exit(2);
659 * We will go to the archiveLocation to get nextWALFileName.
660 * nextWALFileName may not exist yet, which would not be an error, so we
661 * separate the archiveLocation and nextWALFileName so we can check
662 * separately whether archiveLocation exists, if not that is an error
664 if (optind < argc)
666 archiveLocation = argv[optind];
667 optind++;
669 else
671 fprintf(stderr, "%s: must specify archive location\n", progname);
672 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
673 exit(2);
676 if (optind < argc)
678 nextWALFileName = argv[optind];
679 optind++;
681 else
683 fprintf(stderr, "%s: use %%f to specify nextWALFileName\n", progname);
684 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
685 exit(2);
688 if (optind < argc)
690 xlogFilePath = argv[optind];
691 optind++;
693 else
695 fprintf(stderr, "%s: use %%p to specify xlogFilePath\n", progname);
696 fprintf(stderr, "Try \"%s --help\" for more information.\n", progname);
697 exit(2);
700 if (optind < argc)
702 restartWALFileName = argv[optind];
703 optind++;
706 CustomizableInitialize();
708 need_cleanup = SetWALFileNameForCleanup();
710 if (debug)
712 fprintf(stderr, "Trigger file : %s\n", triggerPath ? triggerPath : "<not set>");
713 fprintf(stderr, "Waiting for WAL file : %s\n", nextWALFileName);
714 fprintf(stderr, "WAL file path : %s\n", WALFilePath);
715 fprintf(stderr, "Restoring to : %s\n", xlogFilePath);
716 fprintf(stderr, "Sleep interval : %d second%s\n",
717 sleeptime, (sleeptime > 1 ? "s" : " "));
718 fprintf(stderr, "Max wait interval : %d %s\n",
719 maxwaittime, (maxwaittime > 0 ? "seconds" : "forever"));
720 fprintf(stderr, "Command for restore : %s\n", restoreCommand);
721 fprintf(stderr, "Keep archive history : ");
722 if (need_cleanup)
723 fprintf(stderr, "%s and later\n", exclusiveCleanupFileName);
724 else
725 fprintf(stderr, "No cleanup required\n");
726 fflush(stderr);
730 * Check for initial history file: always the first file to be requested
731 * It's OK if the file isn't there - all other files need to wait
733 if (strlen(nextWALFileName) > 8 &&
734 strspn(nextWALFileName, "0123456789ABCDEF") == 8 &&
735 strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
736 ".history") == 0)
738 nextWALFileType = XLOG_HISTORY;
739 if (RestoreWALFileForRecovery())
740 exit(0);
741 else
743 if (debug)
745 fprintf(stderr, "history file not found\n");
746 fflush(stderr);
748 exit(1);
753 * Main wait loop
755 for (;;)
757 /* Check for trigger file or signal first */
758 CheckForExternalTrigger();
759 if (signaled)
761 Failover = FastFailover;
762 if (debug)
764 fprintf(stderr, "signaled to exit: fast failover\n");
765 fflush(stderr);
770 * Check for fast failover immediately, before checking if the
771 * requested WAL file is available
773 if (Failover == FastFailover)
774 exit(1);
776 if (CustomizableNextWALFileReady())
779 * Once we have restored this file successfully we can remove some
780 * prior WAL files. If this restore fails we musn't remove any
781 * file because some of them will be requested again immediately
782 * after the failed restore, or when we restart recovery.
784 if (RestoreWALFileForRecovery())
786 if (need_cleanup)
787 CustomizableCleanupPriorWALFiles();
789 exit(0);
791 else
793 /* Something went wrong in copying the file */
794 exit(1);
798 /* Check for smart failover if the next WAL file was not available */
799 if (Failover == SmartFailover)
800 exit(1);
802 if (sleeptime <= 60)
803 pg_usleep(sleeptime * 1000000L);
805 waittime += sleeptime;
806 if (waittime >= maxwaittime && maxwaittime > 0)
808 Failover = FastFailover;
809 if (debug)
811 fprintf(stderr, "Timed out after %d seconds: fast failover\n",
812 waittime);
813 fflush(stderr);
816 if (debug)
818 fprintf(stderr, "WAL file not present yet.");
819 if (triggerPath)
820 fprintf(stderr, " Checking for trigger file...");
821 fprintf(stderr, "\n");
822 fflush(stderr);