doc: Update links which returned 404
[pgsql.git] / src / backend / access / transam / xlogarchive.c
blob1ef1713c91a4978e851335abaa16346d2276a278
1 /*-------------------------------------------------------------------------
3 * xlogarchive.c
4 * Functions for archiving WAL files and restoring from the archive.
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/access/transam/xlogarchive.c
12 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include <sys/stat.h>
18 #include <sys/wait.h>
19 #include <signal.h>
20 #include <unistd.h>
22 #include "access/xlog.h"
23 #include "access/xlog_internal.h"
24 #include "access/xlogarchive.h"
25 #include "common/archive.h"
26 #include "common/percentrepl.h"
27 #include "miscadmin.h"
28 #include "pgstat.h"
29 #include "postmaster/pgarch.h"
30 #include "postmaster/startup.h"
31 #include "replication/walsender.h"
32 #include "storage/fd.h"
33 #include "storage/ipc.h"
36 * Attempt to retrieve the specified file from off-line archival storage.
37 * If successful, fill "path" with its complete path (note that this will be
38 * a temp file name that doesn't follow the normal naming convention), and
39 * return true.
41 * If not successful, fill "path" with the name of the normal on-line file
42 * (which may or may not actually exist, but we'll try to use it), and return
43 * false.
45 * For fixed-size files, the caller may pass the expected size as an
46 * additional crosscheck on successful recovery. If the file size is not
47 * known, set expectedSize = 0.
49 * When 'cleanupEnabled' is false, refrain from deleting any old WAL segments
50 * in the archive. This is used when fetching the initial checkpoint record,
51 * when we are not yet sure how far back we need the WAL.
53 bool
54 RestoreArchivedFile(char *path, const char *xlogfname,
55 const char *recovername, off_t expectedSize,
56 bool cleanupEnabled)
58 char xlogpath[MAXPGPATH];
59 char *xlogRestoreCmd;
60 char lastRestartPointFname[MAXPGPATH];
61 int rc;
62 struct stat stat_buf;
63 XLogSegNo restartSegNo;
64 XLogRecPtr restartRedoPtr;
65 TimeLineID restartTli;
68 * Ignore restore_command when not in archive recovery (meaning we are in
69 * crash recovery).
71 if (!ArchiveRecoveryRequested)
72 goto not_available;
74 /* In standby mode, restore_command might not be supplied */
75 if (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0)
76 goto not_available;
79 * When doing archive recovery, we always prefer an archived log file even
80 * if a file of the same name exists in XLOGDIR. The reason is that the
81 * file in XLOGDIR could be an old, un-filled or partly-filled version
82 * that was copied and restored as part of backing up $PGDATA.
84 * We could try to optimize this slightly by checking the local copy
85 * lastchange timestamp against the archived copy, but we have no API to
86 * do this, nor can we guarantee that the lastchange timestamp was
87 * preserved correctly when we copied to archive. Our aim is robustness,
88 * so we elect not to do this.
90 * If we cannot obtain the log file from the archive, however, we will try
91 * to use the XLOGDIR file if it exists. This is so that we can make use
92 * of log segments that weren't yet transferred to the archive.
94 * Notice that we don't actually overwrite any files when we copy back
95 * from archive because the restore_command may inadvertently restore
96 * inappropriate xlogs, or they may be corrupt, so we may wish to fallback
97 * to the segments remaining in current XLOGDIR later. The
98 * copy-from-archive filename is always the same, ensuring that we don't
99 * run out of disk space on long recoveries.
101 snprintf(xlogpath, MAXPGPATH, XLOGDIR "/%s", recovername);
104 * Make sure there is no existing file named recovername.
106 if (stat(xlogpath, &stat_buf) != 0)
108 if (errno != ENOENT)
109 ereport(FATAL,
110 (errcode_for_file_access(),
111 errmsg("could not stat file \"%s\": %m",
112 xlogpath)));
114 else
116 if (unlink(xlogpath) != 0)
117 ereport(FATAL,
118 (errcode_for_file_access(),
119 errmsg("could not remove file \"%s\": %m",
120 xlogpath)));
124 * Calculate the archive file cutoff point for use during log shipping
125 * replication. All files earlier than this point can be deleted from the
126 * archive, though there is no requirement to do so.
128 * If cleanup is not enabled, initialise this with the filename of
129 * InvalidXLogRecPtr, which will prevent the deletion of any WAL files
130 * from the archive because of the alphabetic sorting property of WAL
131 * filenames.
133 * Once we have successfully located the redo pointer of the checkpoint
134 * from which we start recovery we never request a file prior to the redo
135 * pointer of the last restartpoint. When redo begins we know that we have
136 * successfully located it, so there is no need for additional status
137 * flags to signify the point when we can begin deleting WAL files from
138 * the archive.
140 if (cleanupEnabled)
142 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
143 XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
144 XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
145 wal_segment_size);
146 /* we shouldn't need anything earlier than last restart point */
147 Assert(strcmp(lastRestartPointFname, xlogfname) <= 0);
149 else
150 XLogFileName(lastRestartPointFname, 0, 0, wal_segment_size);
152 /* Build the restore command to execute */
153 xlogRestoreCmd = BuildRestoreCommand(recoveryRestoreCommand,
154 xlogpath, xlogfname,
155 lastRestartPointFname);
157 ereport(DEBUG3,
158 (errmsg_internal("executing restore command \"%s\"",
159 xlogRestoreCmd)));
161 fflush(NULL);
162 pgstat_report_wait_start(WAIT_EVENT_RESTORE_COMMAND);
165 * PreRestoreCommand() informs the SIGTERM handler for the startup process
166 * that it should proc_exit() right away. This is done for the duration
167 * of the system() call because there isn't a good way to break out while
168 * it is executing. Since we might call proc_exit() in a signal handler,
169 * it is best to put any additional logic before or after the
170 * PreRestoreCommand()/PostRestoreCommand() section.
172 PreRestoreCommand();
175 * Copy xlog from archival storage to XLOGDIR
177 rc = system(xlogRestoreCmd);
179 PostRestoreCommand();
181 pgstat_report_wait_end();
182 pfree(xlogRestoreCmd);
184 if (rc == 0)
187 * command apparently succeeded, but let's make sure the file is
188 * really there now and has the correct size.
190 if (stat(xlogpath, &stat_buf) == 0)
192 if (expectedSize > 0 && stat_buf.st_size != expectedSize)
194 int elevel;
197 * If we find a partial file in standby mode, we assume it's
198 * because it's just being copied to the archive, and keep
199 * trying.
201 * Otherwise treat a wrong-sized file as FATAL to ensure the
202 * DBA would notice it, but is that too strong? We could try
203 * to plow ahead with a local copy of the file ... but the
204 * problem is that there probably isn't one, and we'd
205 * incorrectly conclude we've reached the end of WAL and we're
206 * done recovering ...
208 if (StandbyMode && stat_buf.st_size < expectedSize)
209 elevel = DEBUG1;
210 else
211 elevel = FATAL;
212 ereport(elevel,
213 (errmsg("archive file \"%s\" has wrong size: %lld instead of %lld",
214 xlogfname,
215 (long long int) stat_buf.st_size,
216 (long long int) expectedSize)));
217 return false;
219 else
221 ereport(LOG,
222 (errmsg("restored log file \"%s\" from archive",
223 xlogfname)));
224 strcpy(path, xlogpath);
225 return true;
228 else
230 /* stat failed */
231 int elevel = (errno == ENOENT) ? LOG : FATAL;
233 ereport(elevel,
234 (errcode_for_file_access(),
235 errmsg("could not stat file \"%s\": %m", xlogpath),
236 errdetail("\"restore_command\" returned a zero exit status, but stat() failed.")));
241 * Remember, we rollforward UNTIL the restore fails so failure here is
242 * just part of the process... that makes it difficult to determine
243 * whether the restore failed because there isn't an archive to restore,
244 * or because the administrator has specified the restore program
245 * incorrectly. We have to assume the former.
247 * However, if the failure was due to any sort of signal, it's best to
248 * punt and abort recovery. (If we "return false" here, upper levels will
249 * assume that recovery is complete and start up the database!) It's
250 * essential to abort on child SIGINT and SIGQUIT, because per spec
251 * system() ignores SIGINT and SIGQUIT while waiting; if we see one of
252 * those it's a good bet we should have gotten it too.
254 * On SIGTERM, assume we have received a fast shutdown request, and exit
255 * cleanly. It's pure chance whether we receive the SIGTERM first, or the
256 * child process. If we receive it first, the signal handler will call
257 * proc_exit, otherwise we do it here. If we or the child process received
258 * SIGTERM for any other reason than a fast shutdown request, postmaster
259 * will perform an immediate shutdown when it sees us exiting
260 * unexpectedly.
262 * We treat hard shell errors such as "command not found" as fatal, too.
264 if (wait_result_is_signal(rc, SIGTERM))
265 proc_exit(1);
267 ereport(wait_result_is_any_signal(rc, true) ? FATAL : DEBUG2,
268 (errmsg("could not restore file \"%s\" from archive: %s",
269 xlogfname, wait_result_to_str(rc))));
271 not_available:
274 * if an archived file is not available, there might still be a version of
275 * this file in XLOGDIR, so return that as the filename to open.
277 * In many recovery scenarios we expect this to fail also, but if so that
278 * just means we've reached the end of WAL.
280 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
281 return false;
285 * Attempt to execute an external shell command during recovery.
287 * 'command' is the shell command to be executed, 'commandName' is a
288 * human-readable name describing the command emitted in the logs. If
289 * 'failOnSignal' is true and the command is killed by a signal, a FATAL
290 * error is thrown. Otherwise a WARNING is emitted.
292 * This is currently used for recovery_end_command and archive_cleanup_command.
294 void
295 ExecuteRecoveryCommand(const char *command, const char *commandName,
296 bool failOnSignal, uint32 wait_event_info)
298 char *xlogRecoveryCmd;
299 char lastRestartPointFname[MAXPGPATH];
300 int rc;
301 XLogSegNo restartSegNo;
302 XLogRecPtr restartRedoPtr;
303 TimeLineID restartTli;
305 Assert(command && commandName);
308 * Calculate the archive file cutoff point for use during log shipping
309 * replication. All files earlier than this point can be deleted from the
310 * archive, though there is no requirement to do so.
312 GetOldestRestartPoint(&restartRedoPtr, &restartTli);
313 XLByteToSeg(restartRedoPtr, restartSegNo, wal_segment_size);
314 XLogFileName(lastRestartPointFname, restartTli, restartSegNo,
315 wal_segment_size);
318 * construct the command to be executed
320 xlogRecoveryCmd = replace_percent_placeholders(command, commandName, "r", lastRestartPointFname);
322 ereport(DEBUG3,
323 (errmsg_internal("executing %s \"%s\"", commandName, command)));
326 * execute the constructed command
328 fflush(NULL);
329 pgstat_report_wait_start(wait_event_info);
330 rc = system(xlogRecoveryCmd);
331 pgstat_report_wait_end();
333 pfree(xlogRecoveryCmd);
335 if (rc != 0)
338 * If the failure was due to any sort of signal, it's best to punt and
339 * abort recovery. See comments in RestoreArchivedFile().
341 ereport((failOnSignal && wait_result_is_any_signal(rc, true)) ? FATAL : WARNING,
342 /*------
343 translator: First %s represents a postgresql.conf parameter name like
344 "recovery_end_command", the 2nd is the value of that parameter, the
345 third an already translated error message. */
346 (errmsg("%s \"%s\": %s", commandName,
347 command, wait_result_to_str(rc))));
353 * A file was restored from the archive under a temporary filename (path),
354 * and now we want to keep it. Rename it under the permanent filename in
355 * pg_wal (xlogfname), replacing any existing file with the same name.
357 void
358 KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
360 char xlogfpath[MAXPGPATH];
361 bool reload = false;
362 struct stat statbuf;
364 snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
366 if (stat(xlogfpath, &statbuf) == 0)
368 char oldpath[MAXPGPATH];
370 #ifdef WIN32
371 static unsigned int deletedcounter = 1;
374 * On Windows, if another process (e.g a walsender process) holds the
375 * file open in FILE_SHARE_DELETE mode, unlink will succeed, but the
376 * file will still show up in directory listing until the last handle
377 * is closed, and we cannot rename the new file in its place until
378 * that. To avoid that problem, rename the old file to a temporary
379 * name first. Use a counter to create a unique filename, because the
380 * same file might be restored from the archive multiple times, and a
381 * walsender could still be holding onto an old deleted version of it.
383 snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
384 xlogfpath, deletedcounter++);
385 if (rename(xlogfpath, oldpath) != 0)
387 ereport(ERROR,
388 (errcode_for_file_access(),
389 errmsg("could not rename file \"%s\" to \"%s\": %m",
390 xlogfpath, oldpath)));
392 #else
393 /* same-size buffers, so this never truncates */
394 strlcpy(oldpath, xlogfpath, MAXPGPATH);
395 #endif
396 if (unlink(oldpath) != 0)
397 ereport(FATAL,
398 (errcode_for_file_access(),
399 errmsg("could not remove file \"%s\": %m",
400 xlogfpath)));
401 reload = true;
404 durable_rename(path, xlogfpath, ERROR);
407 * Create .done file forcibly to prevent the restored segment from being
408 * archived again later.
410 if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
411 XLogArchiveForceDone(xlogfname);
412 else
413 XLogArchiveNotify(xlogfname);
416 * If the existing file was replaced, since walsenders might have it open,
417 * request them to reload a currently-open segment. This is only required
418 * for WAL segments, walsenders don't hold other files open, but there's
419 * no harm in doing this too often, and we don't know what kind of a file
420 * we're dealing with here.
422 if (reload)
423 WalSndRqstFileReload();
426 * Signal walsender that new WAL has arrived. Again, this isn't necessary
427 * if we restored something other than a WAL segment, but it does no harm
428 * either.
430 WalSndWakeup(true, false);
434 * XLogArchiveNotify
436 * Create an archive notification file
438 * The name of the notification file is the message that will be picked up
439 * by the archiver, e.g. we write 0000000100000001000000C6.ready
440 * and the archiver then knows to archive XLOGDIR/0000000100000001000000C6,
441 * then when complete, rename it to 0000000100000001000000C6.done
443 void
444 XLogArchiveNotify(const char *xlog)
446 char archiveStatusPath[MAXPGPATH];
447 FILE *fd;
449 /* insert an otherwise empty file called <XLOG>.ready */
450 StatusFilePath(archiveStatusPath, xlog, ".ready");
451 fd = AllocateFile(archiveStatusPath, "w");
452 if (fd == NULL)
454 ereport(LOG,
455 (errcode_for_file_access(),
456 errmsg("could not create archive status file \"%s\": %m",
457 archiveStatusPath)));
458 return;
460 if (FreeFile(fd))
462 ereport(LOG,
463 (errcode_for_file_access(),
464 errmsg("could not write archive status file \"%s\": %m",
465 archiveStatusPath)));
466 return;
470 * Timeline history files are given the highest archival priority to lower
471 * the chance that a promoted standby will choose a timeline that is
472 * already in use. However, the archiver ordinarily tries to gather
473 * multiple files to archive from each scan of the archive_status
474 * directory, which means that newly created timeline history files could
475 * be left unarchived for a while. To ensure that the archiver picks up
476 * timeline history files as soon as possible, we force the archiver to
477 * scan the archive_status directory the next time it looks for a file to
478 * archive.
480 if (IsTLHistoryFileName(xlog))
481 PgArchForceDirScan();
483 /* Notify archiver that it's got something to do */
484 if (IsUnderPostmaster)
485 PgArchWakeup();
489 * Convenience routine to notify using segment number representation of filename
491 void
492 XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
494 char xlog[MAXFNAMELEN];
496 Assert(tli != 0);
498 XLogFileName(xlog, tli, segno, wal_segment_size);
499 XLogArchiveNotify(xlog);
503 * XLogArchiveForceDone
505 * Emit notification forcibly that an XLOG segment file has been successfully
506 * archived, by creating <XLOG>.done regardless of whether <XLOG>.ready
507 * exists or not.
509 void
510 XLogArchiveForceDone(const char *xlog)
512 char archiveReady[MAXPGPATH];
513 char archiveDone[MAXPGPATH];
514 struct stat stat_buf;
515 FILE *fd;
517 /* Exit if already known done */
518 StatusFilePath(archiveDone, xlog, ".done");
519 if (stat(archiveDone, &stat_buf) == 0)
520 return;
522 /* If .ready exists, rename it to .done */
523 StatusFilePath(archiveReady, xlog, ".ready");
524 if (stat(archiveReady, &stat_buf) == 0)
526 (void) durable_rename(archiveReady, archiveDone, WARNING);
527 return;
530 /* insert an otherwise empty file called <XLOG>.done */
531 fd = AllocateFile(archiveDone, "w");
532 if (fd == NULL)
534 ereport(LOG,
535 (errcode_for_file_access(),
536 errmsg("could not create archive status file \"%s\": %m",
537 archiveDone)));
538 return;
540 if (FreeFile(fd))
542 ereport(LOG,
543 (errcode_for_file_access(),
544 errmsg("could not write archive status file \"%s\": %m",
545 archiveDone)));
546 return;
551 * XLogArchiveCheckDone
553 * This is called when we are ready to delete or recycle an old XLOG segment
554 * file or backup history file. If it is okay to delete it then return true.
555 * If it is not time to delete it, make sure a .ready file exists, and return
556 * false.
558 * If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
559 * then return false; else create <XLOG>.ready and return false.
561 * The reason we do things this way is so that if the original attempt to
562 * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
564 bool
565 XLogArchiveCheckDone(const char *xlog)
567 char archiveStatusPath[MAXPGPATH];
568 struct stat stat_buf;
570 /* The file is always deletable if archive_mode is "off". */
571 if (!XLogArchivingActive())
572 return true;
575 * During archive recovery, the file is deletable if archive_mode is not
576 * "always".
578 if (!XLogArchivingAlways() &&
579 GetRecoveryState() == RECOVERY_STATE_ARCHIVE)
580 return true;
583 * At this point of the logic, note that we are either a primary with
584 * archive_mode set to "on" or "always", or a standby with archive_mode
585 * set to "always".
588 /* First check for .done --- this means archiver is done with it */
589 StatusFilePath(archiveStatusPath, xlog, ".done");
590 if (stat(archiveStatusPath, &stat_buf) == 0)
591 return true;
593 /* check for .ready --- this means archiver is still busy with it */
594 StatusFilePath(archiveStatusPath, xlog, ".ready");
595 if (stat(archiveStatusPath, &stat_buf) == 0)
596 return false;
598 /* Race condition --- maybe archiver just finished, so recheck */
599 StatusFilePath(archiveStatusPath, xlog, ".done");
600 if (stat(archiveStatusPath, &stat_buf) == 0)
601 return true;
603 /* Retry creation of the .ready file */
604 XLogArchiveNotify(xlog);
605 return false;
609 * XLogArchiveIsBusy
611 * Check to see if an XLOG segment file is still unarchived.
612 * This is almost but not quite the inverse of XLogArchiveCheckDone: in
613 * the first place we aren't chartered to recreate the .ready file, and
614 * in the second place we should consider that if the file is already gone
615 * then it's not busy. (This check is needed to handle the race condition
616 * that a checkpoint already deleted the no-longer-needed file.)
618 bool
619 XLogArchiveIsBusy(const char *xlog)
621 char archiveStatusPath[MAXPGPATH];
622 struct stat stat_buf;
624 /* First check for .done --- this means archiver is done with it */
625 StatusFilePath(archiveStatusPath, xlog, ".done");
626 if (stat(archiveStatusPath, &stat_buf) == 0)
627 return false;
629 /* check for .ready --- this means archiver is still busy with it */
630 StatusFilePath(archiveStatusPath, xlog, ".ready");
631 if (stat(archiveStatusPath, &stat_buf) == 0)
632 return true;
634 /* Race condition --- maybe archiver just finished, so recheck */
635 StatusFilePath(archiveStatusPath, xlog, ".done");
636 if (stat(archiveStatusPath, &stat_buf) == 0)
637 return false;
640 * Check to see if the WAL file has been removed by checkpoint, which
641 * implies it has already been archived, and explains why we can't see a
642 * status file for it.
644 snprintf(archiveStatusPath, MAXPGPATH, XLOGDIR "/%s", xlog);
645 if (stat(archiveStatusPath, &stat_buf) != 0 &&
646 errno == ENOENT)
647 return false;
649 return true;
653 * XLogArchiveIsReadyOrDone
655 * Check to see if an XLOG segment file has a .ready or .done file.
656 * This is similar to XLogArchiveIsBusy(), but returns true if the file
657 * is already archived or is about to be archived.
659 * This is currently only used at recovery. During normal operation this
660 * would be racy: the file might get removed or marked with .ready as we're
661 * checking it, or immediately after we return.
663 bool
664 XLogArchiveIsReadyOrDone(const char *xlog)
666 char archiveStatusPath[MAXPGPATH];
667 struct stat stat_buf;
669 /* First check for .done --- this means archiver is done with it */
670 StatusFilePath(archiveStatusPath, xlog, ".done");
671 if (stat(archiveStatusPath, &stat_buf) == 0)
672 return true;
674 /* check for .ready --- this means archiver is still busy with it */
675 StatusFilePath(archiveStatusPath, xlog, ".ready");
676 if (stat(archiveStatusPath, &stat_buf) == 0)
677 return true;
679 /* Race condition --- maybe archiver just finished, so recheck */
680 StatusFilePath(archiveStatusPath, xlog, ".done");
681 if (stat(archiveStatusPath, &stat_buf) == 0)
682 return true;
684 return false;
688 * XLogArchiveIsReady
690 * Check to see if an XLOG segment file has an archive notification (.ready)
691 * file.
693 bool
694 XLogArchiveIsReady(const char *xlog)
696 char archiveStatusPath[MAXPGPATH];
697 struct stat stat_buf;
699 StatusFilePath(archiveStatusPath, xlog, ".ready");
700 if (stat(archiveStatusPath, &stat_buf) == 0)
701 return true;
703 return false;
707 * XLogArchiveCleanup
709 * Cleanup archive notification file(s) for a particular xlog segment
711 void
712 XLogArchiveCleanup(const char *xlog)
714 char archiveStatusPath[MAXPGPATH];
716 /* Remove the .done file */
717 StatusFilePath(archiveStatusPath, xlog, ".done");
718 unlink(archiveStatusPath);
719 /* should we complain about failure? */
721 /* Remove the .ready file if present --- normally it shouldn't be */
722 StatusFilePath(archiveStatusPath, xlog, ".ready");
723 unlink(archiveStatusPath);
724 /* should we complain about failure? */