1 /*-------------------------------------------------------------------------
4 * Verify a backup against a backup manifest.
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * src/bin/pg_verifybackup/pg_verifybackup.c
11 *-------------------------------------------------------------------------
14 #include "postgres_fe.h"
21 #include "common/logging.h"
22 #include "common/parse_manifest.h"
23 #include "fe_utils/simple_list.h"
24 #include "getopt_long.h"
26 #include "pg_verifybackup.h"
30 * For efficiency, we'd like our hash table containing information about the
31 * manifest to start out with approximately the correct number of entries.
32 * There's no way to know the exact number of entries without reading the whole
33 * file, but we can get an estimate by dividing the file size by the estimated
34 * number of bytes per line.
36 * This could be off by about a factor of two in either direction, because the
37 * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
38 * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
39 * might be no checksum at all.
41 #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
44 * How many bytes should we try to read from a file at once?
46 #define READ_CHUNK_SIZE (128 * 1024)
49 * Tar file information needed for content verification.
51 typedef struct tar_file
55 pg_compress_algorithm compress_algorithm
;
58 static manifest_data
*parse_manifest_file(char *manifest_path
);
59 static void verifybackup_version_cb(JsonManifestParseContext
*context
,
60 int manifest_version
);
61 static void verifybackup_system_identifier(JsonManifestParseContext
*context
,
62 uint64 manifest_system_identifier
);
63 static void verifybackup_per_file_cb(JsonManifestParseContext
*context
,
64 const char *pathname
, uint64 size
,
65 pg_checksum_type checksum_type
,
67 uint8
*checksum_payload
);
68 static void verifybackup_per_wal_range_cb(JsonManifestParseContext
*context
,
72 static void report_manifest_error(JsonManifestParseContext
*context
,
74 pg_attribute_printf(2, 3) pg_attribute_noreturn();
76 static void verify_tar_backup(verifier_context
*context
, DIR *dir
);
77 static void verify_plain_backup_directory(verifier_context
*context
,
78 char *relpath
, char *fullpath
,
80 static void verify_plain_backup_file(verifier_context
*context
, char *relpath
,
82 static void verify_control_file(const char *controlpath
,
83 uint64 manifest_system_identifier
);
84 static void precheck_tar_backup_file(verifier_context
*context
, char *relpath
,
85 char *fullpath
, SimplePtrList
*tarfiles
);
86 static void verify_tar_file(verifier_context
*context
, char *relpath
,
87 char *fullpath
, astreamer
*streamer
);
88 static void report_extra_backup_files(verifier_context
*context
);
89 static void verify_backup_checksums(verifier_context
*context
);
90 static void verify_file_checksum(verifier_context
*context
,
91 manifest_file
*m
, char *fullpath
,
93 static void parse_required_wal(verifier_context
*context
,
94 char *pg_waldump_path
,
96 static astreamer
*create_archive_verifier(verifier_context
*context
,
99 pg_compress_algorithm compress_algo
);
101 static void progress_report(bool finished
);
102 static void usage(void);
104 static const char *progname
;
106 /* is progress reporting enabled? */
107 static bool show_progress
= false;
109 /* Progress indicators */
110 static uint64 total_size
= 0;
111 static uint64 done_size
= 0;
117 main(int argc
, char **argv
)
119 static struct option long_options
[] = {
120 {"exit-on-error", no_argument
, NULL
, 'e'},
121 {"ignore", required_argument
, NULL
, 'i'},
122 {"manifest-path", required_argument
, NULL
, 'm'},
123 {"format", required_argument
, NULL
, 'F'},
124 {"no-parse-wal", no_argument
, NULL
, 'n'},
125 {"progress", no_argument
, NULL
, 'P'},
126 {"quiet", no_argument
, NULL
, 'q'},
127 {"skip-checksums", no_argument
, NULL
, 's'},
128 {"wal-directory", required_argument
, NULL
, 'w'},
133 verifier_context context
;
134 char *manifest_path
= NULL
;
135 bool no_parse_wal
= false;
137 char *wal_directory
= NULL
;
138 char *pg_waldump_path
= NULL
;
141 pg_logging_init(argv
[0]);
142 set_pglocale_pgservice(argv
[0], PG_TEXTDOMAIN("pg_verifybackup"));
143 progname
= get_progname(argv
[0]);
145 memset(&context
, 0, sizeof(context
));
149 if (strcmp(argv
[1], "--help") == 0 || strcmp(argv
[1], "-?") == 0)
154 if (strcmp(argv
[1], "--version") == 0 || strcmp(argv
[1], "-V") == 0)
156 puts("pg_verifybackup (PostgreSQL) " PG_VERSION
);
162 * Skip certain files in the toplevel directory.
164 * Ignore the backup_manifest file, because it's not included in the
167 * Ignore the pg_wal directory, because those files are not included in
168 * the backup manifest either, since they are fetched separately from the
169 * backup itself, and verified via a separate mechanism.
171 * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
172 * because we expect that those files may sometimes be created or changed
173 * as part of the backup process. For example, pg_basebackup -R will
174 * modify postgresql.auto.conf and create standby.signal.
176 simple_string_list_append(&context
.ignore_list
, "backup_manifest");
177 simple_string_list_append(&context
.ignore_list
, "pg_wal");
178 simple_string_list_append(&context
.ignore_list
, "postgresql.auto.conf");
179 simple_string_list_append(&context
.ignore_list
, "recovery.signal");
180 simple_string_list_append(&context
.ignore_list
, "standby.signal");
182 while ((c
= getopt_long(argc
, argv
, "eF:i:m:nPqsw:", long_options
, NULL
)) != -1)
187 context
.exit_on_error
= true;
191 char *arg
= pstrdup(optarg
);
193 canonicalize_path(arg
);
194 simple_string_list_append(&context
.ignore_list
, arg
);
198 manifest_path
= pstrdup(optarg
);
199 canonicalize_path(manifest_path
);
202 if (strcmp(optarg
, "p") == 0 || strcmp(optarg
, "plain") == 0)
203 context
.format
= 'p';
204 else if (strcmp(optarg
, "t") == 0 || strcmp(optarg
, "tar") == 0)
205 context
.format
= 't';
207 pg_fatal("invalid backup format \"%s\", must be \"plain\" or \"tar\"",
214 show_progress
= true;
220 context
.skip_checksums
= true;
223 wal_directory
= pstrdup(optarg
);
224 canonicalize_path(wal_directory
);
227 /* getopt_long already emitted a complaint */
228 pg_log_error_hint("Try \"%s --help\" for more information.", progname
);
233 /* Get backup directory name */
236 pg_log_error("no backup directory specified");
237 pg_log_error_hint("Try \"%s --help\" for more information.", progname
);
240 context
.backup_directory
= pstrdup(argv
[optind
++]);
241 canonicalize_path(context
.backup_directory
);
243 /* Complain if any arguments remain */
246 pg_log_error("too many command-line arguments (first is \"%s\")",
248 pg_log_error_hint("Try \"%s --help\" for more information.", progname
);
252 /* Complain if the specified arguments conflict */
253 if (show_progress
&& quiet
)
254 pg_fatal("cannot specify both %s and %s",
255 "-P/--progress", "-q/--quiet");
257 /* Unless --no-parse-wal was specified, we will need pg_waldump. */
262 pg_waldump_path
= pg_malloc(MAXPGPATH
);
263 ret
= find_other_exec(argv
[0], "pg_waldump",
264 "pg_waldump (PostgreSQL) " PG_VERSION
"\n",
268 char full_path
[MAXPGPATH
];
270 if (find_my_exec(argv
[0], full_path
) < 0)
271 strlcpy(full_path
, progname
, sizeof(full_path
));
274 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
275 "pg_waldump", "pg_verifybackup", full_path
);
277 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
278 "pg_waldump", full_path
, "pg_verifybackup");
282 /* By default, look for the manifest in the backup directory. */
283 if (manifest_path
== NULL
)
284 manifest_path
= psprintf("%s/backup_manifest",
285 context
.backup_directory
);
287 /* By default, look for the WAL in the backup directory, too. */
288 if (wal_directory
== NULL
)
289 wal_directory
= psprintf("%s/pg_wal", context
.backup_directory
);
292 * Try to read the manifest. We treat any errors encountered while parsing
293 * the manifest as fatal; there doesn't seem to be much point in trying to
294 * verify the backup directory against a corrupted manifest.
296 context
.manifest
= parse_manifest_file(manifest_path
);
299 * If the backup directory cannot be found, treat this as a fatal error.
301 dir
= opendir(context
.backup_directory
);
303 report_fatal_error("could not open directory \"%s\": %m",
304 context
.backup_directory
);
307 * At this point, we know that the backup directory exists, so it's now
308 * reasonable to check for files immediately inside it. Thus, before going
309 * further, if the user did not specify the backup format, check for
310 * PG_VERSION to distinguish between tar and plain format.
312 if (context
.format
== '\0')
317 path
= psprintf("%s/%s", context
.backup_directory
, "PG_VERSION");
318 if (stat(path
, &sb
) == 0)
319 context
.format
= 'p';
320 else if (errno
!= ENOENT
)
322 pg_log_error("could not stat file \"%s\": %m", path
);
327 /* No PG_VERSION, so assume tar format. */
328 context
.format
= 't';
334 * XXX: In the future, we should consider enhancing pg_waldump to read WAL
335 * files from an archive.
337 if (!no_parse_wal
&& context
.format
== 't')
339 pg_log_error("pg_waldump cannot read tar files");
340 pg_log_error_hint("You must use -n or --no-parse-wal when verifying a tar-format backup.");
345 * Perform the appropriate type of verification appropriate based on the
346 * backup format. This will close 'dir'.
348 if (context
.format
== 'p')
349 verify_plain_backup_directory(&context
, NULL
, context
.backup_directory
,
352 verify_tar_backup(&context
, dir
);
355 * The "matched" flag should now be set on every entry in the hash table.
356 * Any entries for which the bit is not set are files mentioned in the
357 * manifest that don't exist on disk (or in the relevant tar files).
359 report_extra_backup_files(&context
);
362 * If this is a tar-format backup, checksums were already verified above;
363 * but if it's a plain-format backup, we postpone it until this point,
364 * since the earlier checks can be performed just by knowing which files
365 * are present, without needing to read all of them.
367 if (context
.format
== 'p' && !context
.skip_checksums
)
368 verify_backup_checksums(&context
);
371 * Try to parse the required ranges of WAL records, unless we were told
375 parse_required_wal(&context
, pg_waldump_path
, wal_directory
);
378 * If everything looks OK, tell the user this, unless we were asked to
381 if (!context
.saw_any_error
&& !quiet
)
382 printf(_("backup successfully verified\n"));
384 return context
.saw_any_error
? 1 : 0;
388 * Parse a manifest file and return a data structure describing the contents.
390 static manifest_data
*
391 parse_manifest_file(char *manifest_path
)
397 manifest_files_hash
*ht
;
400 JsonManifestParseContext context
;
401 manifest_data
*result
;
403 int chunk_size
= READ_CHUNK_SIZE
;
405 /* Open the manifest file. */
406 if ((fd
= open(manifest_path
, O_RDONLY
| PG_BINARY
, 0)) < 0)
407 report_fatal_error("could not open file \"%s\": %m", manifest_path
);
409 /* Figure out how big the manifest is. */
410 if (fstat(fd
, &statbuf
) != 0)
411 report_fatal_error("could not stat file \"%s\": %m", manifest_path
);
413 /* Guess how large to make the hash table based on the manifest size. */
414 estimate
= statbuf
.st_size
/ ESTIMATED_BYTES_PER_MANIFEST_LINE
;
415 initial_size
= Min(PG_UINT32_MAX
, Max(estimate
, 256));
417 /* Create the hash table. */
418 ht
= manifest_files_create(initial_size
, NULL
);
420 result
= pg_malloc0(sizeof(manifest_data
));
422 context
.private_data
= result
;
423 context
.version_cb
= verifybackup_version_cb
;
424 context
.system_identifier_cb
= verifybackup_system_identifier
;
425 context
.per_file_cb
= verifybackup_per_file_cb
;
426 context
.per_wal_range_cb
= verifybackup_per_wal_range_cb
;
427 context
.error_cb
= report_manifest_error
;
430 * Parse the file, in chunks if necessary.
432 if (statbuf
.st_size
<= chunk_size
)
434 buffer
= pg_malloc(statbuf
.st_size
);
435 rc
= read(fd
, buffer
, statbuf
.st_size
);
436 if (rc
!= statbuf
.st_size
)
439 pg_fatal("could not read file \"%s\": %m", manifest_path
);
441 pg_fatal("could not read file \"%s\": read %d of %lld",
442 manifest_path
, rc
, (long long int) statbuf
.st_size
);
445 /* Close the manifest file. */
448 /* Parse the manifest. */
449 json_parse_manifest(&context
, buffer
, statbuf
.st_size
);
453 int bytes_left
= statbuf
.st_size
;
454 JsonManifestParseIncrementalState
*inc_state
;
456 inc_state
= json_parse_manifest_incremental_init(&context
);
458 buffer
= pg_malloc(chunk_size
+ 1);
460 while (bytes_left
> 0)
462 int bytes_to_read
= chunk_size
;
465 * Make sure that the last chunk is sufficiently large. (i.e. at
466 * least half the chunk size) so that it will contain fully the
467 * piece at the end with the checksum.
469 if (bytes_left
< chunk_size
)
470 bytes_to_read
= bytes_left
;
471 else if (bytes_left
< 2 * chunk_size
)
472 bytes_to_read
= bytes_left
/ 2;
473 rc
= read(fd
, buffer
, bytes_to_read
);
474 if (rc
!= bytes_to_read
)
477 pg_fatal("could not read file \"%s\": %m", manifest_path
);
479 pg_fatal("could not read file \"%s\": read %lld of %lld",
481 (long long int) (statbuf
.st_size
+ rc
- bytes_left
),
482 (long long int) statbuf
.st_size
);
485 json_parse_manifest_incremental_chunk(inc_state
, buffer
, rc
,
489 /* Release the incremental state memory */
490 json_parse_manifest_incremental_shutdown(inc_state
);
495 /* Done with the buffer. */
502 * Report an error while parsing the manifest.
504 * We consider all such errors to be fatal errors. The manifest parser
505 * expects this function not to return.
508 report_manifest_error(JsonManifestParseContext
*context
, const char *fmt
,...)
513 pg_log_generic_v(PG_LOG_ERROR
, PG_LOG_PRIMARY
, gettext(fmt
), ap
);
520 * Record details extracted from the backup manifest.
523 verifybackup_version_cb(JsonManifestParseContext
*context
,
524 int manifest_version
)
526 manifest_data
*manifest
= context
->private_data
;
528 /* Validation will be at the later stage */
529 manifest
->version
= manifest_version
;
533 * Record details extracted from the backup manifest.
536 verifybackup_system_identifier(JsonManifestParseContext
*context
,
537 uint64 manifest_system_identifier
)
539 manifest_data
*manifest
= context
->private_data
;
541 /* Validation will be at the later stage */
542 manifest
->system_identifier
= manifest_system_identifier
;
546 * Record details extracted from the backup manifest for one file.
549 verifybackup_per_file_cb(JsonManifestParseContext
*context
,
550 const char *pathname
, uint64 size
,
551 pg_checksum_type checksum_type
,
552 int checksum_length
, uint8
*checksum_payload
)
554 manifest_data
*manifest
= context
->private_data
;
555 manifest_files_hash
*ht
= manifest
->files
;
559 /* Make a new entry in the hash table for this file. */
560 m
= manifest_files_insert(ht
, pathname
, &found
);
562 report_fatal_error("duplicate path name in backup manifest: \"%s\"",
565 /* Initialize the entry. */
567 m
->checksum_type
= checksum_type
;
568 m
->checksum_length
= checksum_length
;
569 m
->checksum_payload
= checksum_payload
;
575 * Record details extracted from the backup manifest for one WAL range.
578 verifybackup_per_wal_range_cb(JsonManifestParseContext
*context
,
580 XLogRecPtr start_lsn
, XLogRecPtr end_lsn
)
582 manifest_data
*manifest
= context
->private_data
;
583 manifest_wal_range
*range
;
585 /* Allocate and initialize a struct describing this WAL range. */
586 range
= palloc(sizeof(manifest_wal_range
));
588 range
->start_lsn
= start_lsn
;
589 range
->end_lsn
= end_lsn
;
590 range
->prev
= manifest
->last_wal_range
;
593 /* Add it to the end of the list. */
594 if (manifest
->first_wal_range
== NULL
)
595 manifest
->first_wal_range
= range
;
597 manifest
->last_wal_range
->next
= range
;
598 manifest
->last_wal_range
= range
;
602 * Verify one directory of a plain-format backup.
604 * 'relpath' is NULL if we are to verify the top-level backup directory,
605 * and otherwise the relative path to the directory that is to be verified.
607 * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
608 * filesystem path at which it can be found.
610 * 'dir' is an open directory handle, or NULL if the caller wants us to
611 * open it. If the caller chooses to pass a handle, we'll close it when
612 * we're done with it.
615 verify_plain_backup_directory(verifier_context
*context
, char *relpath
,
616 char *fullpath
, DIR *dir
)
618 struct dirent
*dirent
;
620 /* Open the directory unless the caller did it. */
621 if (dir
== NULL
&& ((dir
= opendir(fullpath
)) == NULL
))
623 report_backup_error(context
,
624 "could not open directory \"%s\": %m", fullpath
);
625 simple_string_list_append(&context
->ignore_list
, relpath
);
630 while (errno
= 0, (dirent
= readdir(dir
)) != NULL
)
632 char *filename
= dirent
->d_name
;
633 char *newfullpath
= psprintf("%s/%s", fullpath
, filename
);
636 /* Skip "." and ".." */
637 if (filename
[0] == '.' && (filename
[1] == '\0'
638 || strcmp(filename
, "..") == 0))
642 newrelpath
= pstrdup(filename
);
644 newrelpath
= psprintf("%s/%s", relpath
, filename
);
646 if (!should_ignore_relpath(context
, newrelpath
))
647 verify_plain_backup_file(context
, newrelpath
, newfullpath
);
655 report_backup_error(context
,
656 "could not close directory \"%s\": %m", fullpath
);
662 * Verify one file (which might actually be a directory or a symlink).
664 * The arguments to this function have the same meaning as the similarly named
665 * arguments to verify_plain_backup_directory.
668 verify_plain_backup_file(verifier_context
*context
, char *relpath
,
674 if (stat(fullpath
, &sb
) != 0)
676 report_backup_error(context
,
677 "could not stat file or directory \"%s\": %m",
681 * Suppress further errors related to this path name and, if it's a
682 * directory, anything underneath it.
684 simple_string_list_append(&context
->ignore_list
, relpath
);
689 /* If it's a directory, just recurse. */
690 if (S_ISDIR(sb
.st_mode
))
692 verify_plain_backup_directory(context
, relpath
, fullpath
, NULL
);
696 /* If it's not a directory, it should be a plain file. */
697 if (!S_ISREG(sb
.st_mode
))
699 report_backup_error(context
,
700 "\"%s\" is not a file or directory",
705 /* Check whether there's an entry in the manifest hash. */
706 m
= manifest_files_lookup(context
->manifest
->files
, relpath
);
709 report_backup_error(context
,
710 "\"%s\" is present on disk but not in the manifest",
715 /* Flag this entry as having been encountered in the filesystem. */
718 /* Check that the size matches. */
719 if (m
->size
!= sb
.st_size
)
721 report_backup_error(context
,
722 "\"%s\" has size %llu on disk but size %llu in the manifest",
723 relpath
, (unsigned long long) sb
.st_size
,
724 (unsigned long long) m
->size
);
729 * Validate the manifest system identifier, not available in manifest
732 if (context
->manifest
->version
!= 1 &&
733 strcmp(relpath
, "global/pg_control") == 0)
734 verify_control_file(fullpath
, context
->manifest
->system_identifier
);
736 /* Update statistics for progress report, if necessary */
737 if (show_progress
&& !context
->skip_checksums
&&
738 should_verify_checksum(m
))
739 total_size
+= m
->size
;
742 * We don't verify checksums at this stage. We first finish verifying that
743 * we have the expected set of files with the expected sizes, and only
744 * afterwards verify the checksums. That's because computing checksums may
745 * take a while, and we'd like to report more obvious problems quickly.
750 * Sanity check control file and validate system identifier against manifest
754 verify_control_file(const char *controlpath
, uint64 manifest_system_identifier
)
756 ControlFileData
*control_file
;
759 pg_log_debug("reading \"%s\"", controlpath
);
760 control_file
= get_controlfile_by_exact_path(controlpath
, &crc_ok
);
762 /* Control file contents not meaningful if CRC is bad. */
764 report_fatal_error("%s: CRC is incorrect", controlpath
);
766 /* Can't interpret control file if not current version. */
767 if (control_file
->pg_control_version
!= PG_CONTROL_VERSION
)
768 report_fatal_error("%s: unexpected control file version",
771 /* System identifiers should match. */
772 if (manifest_system_identifier
!= control_file
->system_identifier
)
773 report_fatal_error("%s: manifest system identifier is %llu, but control file has %llu",
775 (unsigned long long) manifest_system_identifier
,
776 (unsigned long long) control_file
->system_identifier
);
778 /* Release memory. */
785 * The caller should pass a handle to the target directory, which we will
786 * close when we're done with it.
789 verify_tar_backup(verifier_context
*context
, DIR *dir
)
791 struct dirent
*dirent
;
792 SimplePtrList tarfiles
= {NULL
, NULL
};
793 SimplePtrListCell
*cell
;
795 Assert(context
->format
!= 'p');
797 progress_report(false);
799 /* First pass: scan the directory for tar files. */
800 while (errno
= 0, (dirent
= readdir(dir
)) != NULL
)
802 char *filename
= dirent
->d_name
;
804 /* Skip "." and ".." */
805 if (filename
[0] == '.' && (filename
[1] == '\0'
806 || strcmp(filename
, "..") == 0))
810 * Unless it's something we should ignore, perform prechecks and add
813 if (!should_ignore_relpath(context
, filename
))
817 fullpath
= psprintf("%s/%s", context
->backup_directory
, filename
);
818 precheck_tar_backup_file(context
, filename
, fullpath
, &tarfiles
);
825 report_backup_error(context
,
826 "could not close directory \"%s\": %m",
827 context
->backup_directory
);
831 /* Second pass: Perform the final verification of the tar contents. */
832 for (cell
= tarfiles
.head
; cell
!= NULL
; cell
= cell
->next
)
834 tar_file
*tar
= (tar_file
*) cell
->ptr
;
839 * Prepares the archive streamer stack according to the tar
840 * compression format.
842 streamer
= create_archive_verifier(context
,
845 tar
->compress_algorithm
);
847 /* Compute the full pathname to the target file. */
848 fullpath
= psprintf("%s/%s", context
->backup_directory
,
851 /* Invoke the streamer for reading, decompressing, and verifying. */
852 verify_tar_file(context
, tar
->relpath
, fullpath
, streamer
);
859 astreamer_finalize(streamer
);
860 astreamer_free(streamer
);
862 simple_ptr_list_destroy(&tarfiles
);
864 progress_report(true);
868 * Preparatory steps for verifying files in tar format backups.
870 * Carries out basic validation of the tar format backup file, detects the
871 * compression type, and appends that information to the tarfiles list. An
872 * error will be reported if the tar file is inaccessible, or if the file type,
873 * name, or compression type is not as expected.
875 * The arguments to this function are mostly the same as the
876 * verify_plain_backup_file. The additional argument outputs a list of valid
880 precheck_tar_backup_file(verifier_context
*context
, char *relpath
,
881 char *fullpath
, SimplePtrList
*tarfiles
)
884 Oid tblspc_oid
= InvalidOid
;
885 pg_compress_algorithm compress_algorithm
;
889 /* Should be tar format backup */
890 Assert(context
->format
== 't');
892 /* Get file information */
893 if (stat(fullpath
, &sb
) != 0)
895 report_backup_error(context
,
896 "could not stat file or directory \"%s\": %m",
901 /* In a tar format backup, we expect only plain files. */
902 if (!S_ISREG(sb
.st_mode
))
904 report_backup_error(context
,
905 "\"%s\" is not a plain file",
911 * We expect tar files for backing up the main directory, tablespace, and
914 * pg_basebackup writes the main data directory to an archive file named
915 * base.tar, the pg_wal directory to pg_wal.tar, and the tablespace
916 * directory to <tablespaceoid>.tar, each followed by a compression type
917 * extension such as .gz, .lz4, or .zst.
919 if (strncmp("base", relpath
, 4) == 0)
920 suffix
= relpath
+ 4;
921 else if (strncmp("pg_wal", relpath
, 6) == 0)
922 suffix
= relpath
+ 6;
925 /* Expected a <tablespaceoid>.tar file here. */
926 uint64 num
= strtoul(relpath
, &suffix
, 10);
929 * Report an error if we didn't consume at least one character, if the
930 * result is 0, or if the value is too large to be a valid OID.
932 if (suffix
== NULL
|| num
<= 0 || num
> OID_MAX
)
934 report_backup_error(context
,
935 "file \"%s\" is not expected in a tar format backup",
939 tblspc_oid
= (Oid
) num
;
942 /* Now, check the compression type of the tar */
943 if (strcmp(suffix
, ".tar") == 0)
944 compress_algorithm
= PG_COMPRESSION_NONE
;
945 else if (strcmp(suffix
, ".tgz") == 0)
946 compress_algorithm
= PG_COMPRESSION_GZIP
;
947 else if (strcmp(suffix
, ".tar.gz") == 0)
948 compress_algorithm
= PG_COMPRESSION_GZIP
;
949 else if (strcmp(suffix
, ".tar.lz4") == 0)
950 compress_algorithm
= PG_COMPRESSION_LZ4
;
951 else if (strcmp(suffix
, ".tar.zst") == 0)
952 compress_algorithm
= PG_COMPRESSION_ZSTD
;
955 report_backup_error(context
,
956 "file \"%s\" is not expected in a tar format backup",
962 * Ignore WALs, as reading and verification will be handled through
965 if (strncmp("pg_wal", relpath
, 6) == 0)
969 * Append the information to the list for complete verification at a later
972 tar
= pg_malloc(sizeof(tar_file
));
973 tar
->relpath
= pstrdup(relpath
);
974 tar
->tblspc_oid
= tblspc_oid
;
975 tar
->compress_algorithm
= compress_algorithm
;
977 simple_ptr_list_append(tarfiles
, tar
);
979 /* Update statistics for progress report, if necessary */
981 total_size
+= sb
.st_size
;
985 * Verification of a single tar file content.
987 * It reads a given tar archive in predefined chunks and passes it to the
988 * streamer, which initiates routines for decompression (if necessary) and then
989 * verifies each member within the tar file.
992 verify_tar_file(verifier_context
*context
, char *relpath
, char *fullpath
,
999 pg_log_debug("reading \"%s\"", fullpath
);
1001 /* Open the target file. */
1002 if ((fd
= open(fullpath
, O_RDONLY
| PG_BINARY
, 0)) < 0)
1004 report_backup_error(context
, "could not open file \"%s\": %m",
1009 buffer
= pg_malloc(READ_CHUNK_SIZE
* sizeof(uint8
));
1011 /* Perform the reads */
1012 while ((rc
= read(fd
, buffer
, READ_CHUNK_SIZE
)) > 0)
1014 astreamer_content(streamer
, NULL
, buffer
, rc
, ASTREAMER_UNKNOWN
);
1016 /* Report progress */
1018 progress_report(false);
1024 report_backup_error(context
, "could not read file \"%s\": %m",
1027 /* Close the file. */
1029 report_backup_error(context
, "could not close file \"%s\": %m",
1034 * Scan the hash table for entries where the 'matched' flag is not set; report
1035 * that such files are present in the manifest but not on disk.
1038 report_extra_backup_files(verifier_context
*context
)
1040 manifest_data
*manifest
= context
->manifest
;
1041 manifest_files_iterator it
;
1044 manifest_files_start_iterate(manifest
->files
, &it
);
1045 while ((m
= manifest_files_iterate(manifest
->files
, &it
)) != NULL
)
1046 if (!m
->matched
&& !should_ignore_relpath(context
, m
->pathname
))
1047 report_backup_error(context
,
1048 "\"%s\" is present in the manifest but not on disk",
1053 * Verify checksums for hash table entries that are otherwise unproblematic.
1054 * If we've already reported some problem related to a hash table entry, or
1055 * if it has no checksum, just skip it.
1058 verify_backup_checksums(verifier_context
*context
)
1060 manifest_data
*manifest
= context
->manifest
;
1061 manifest_files_iterator it
;
1065 progress_report(false);
1067 buffer
= pg_malloc(READ_CHUNK_SIZE
* sizeof(uint8
));
1069 manifest_files_start_iterate(manifest
->files
, &it
);
1070 while ((m
= manifest_files_iterate(manifest
->files
, &it
)) != NULL
)
1072 if (should_verify_checksum(m
) &&
1073 !should_ignore_relpath(context
, m
->pathname
))
1077 /* Compute the full pathname to the target file. */
1078 fullpath
= psprintf("%s/%s", context
->backup_directory
,
1081 /* Do the actual checksum verification. */
1082 verify_file_checksum(context
, m
, fullpath
, buffer
);
1084 /* Avoid leaking memory. */
1091 progress_report(true);
1095 * Verify the checksum of a single file.
1098 verify_file_checksum(verifier_context
*context
, manifest_file
*m
,
1099 char *fullpath
, uint8
*buffer
)
1101 pg_checksum_context checksum_ctx
;
1102 const char *relpath
= m
->pathname
;
1105 uint64 bytes_read
= 0;
1106 uint8 checksumbuf
[PG_CHECKSUM_MAX_LENGTH
];
1109 /* Open the target file. */
1110 if ((fd
= open(fullpath
, O_RDONLY
| PG_BINARY
, 0)) < 0)
1112 report_backup_error(context
, "could not open file \"%s\": %m",
1117 /* Initialize checksum context. */
1118 if (pg_checksum_init(&checksum_ctx
, m
->checksum_type
) < 0)
1120 report_backup_error(context
, "could not initialize checksum of file \"%s\"",
1126 /* Read the file chunk by chunk, updating the checksum as we go. */
1127 while ((rc
= read(fd
, buffer
, READ_CHUNK_SIZE
)) > 0)
1130 if (pg_checksum_update(&checksum_ctx
, buffer
, rc
) < 0)
1132 report_backup_error(context
, "could not update checksum of file \"%s\"",
1138 /* Report progress */
1140 progress_report(false);
1143 report_backup_error(context
, "could not read file \"%s\": %m",
1146 /* Close the file. */
1149 report_backup_error(context
, "could not close file \"%s\": %m",
1154 /* If we didn't manage to read the whole file, bail out now. */
1159 * Double-check that we read the expected number of bytes from the file.
1160 * Normally, mismatches would be caught in verify_plain_backup_file and
1161 * this check would never be reached, but this provides additional safety
1162 * and clarity in the event of concurrent modifications or filesystem
1165 if (bytes_read
!= m
->size
)
1167 report_backup_error(context
,
1168 "file \"%s\" should contain %llu bytes, but read %llu bytes",
1169 relpath
, (unsigned long long) m
->size
,
1170 (unsigned long long) bytes_read
);
1174 /* Get the final checksum. */
1175 checksumlen
= pg_checksum_final(&checksum_ctx
, checksumbuf
);
1176 if (checksumlen
< 0)
1178 report_backup_error(context
,
1179 "could not finalize checksum of file \"%s\"",
1184 /* And check it against the manifest. */
1185 if (checksumlen
!= m
->checksum_length
)
1186 report_backup_error(context
,
1187 "file \"%s\" has checksum of length %d, but expected %d",
1188 relpath
, m
->checksum_length
, checksumlen
);
1189 else if (memcmp(checksumbuf
, m
->checksum_payload
, checksumlen
) != 0)
1190 report_backup_error(context
,
1191 "checksum mismatch for file \"%s\"",
1196 * Attempt to parse the WAL files required to restore from backup using
1200 parse_required_wal(verifier_context
*context
, char *pg_waldump_path
,
1201 char *wal_directory
)
1203 manifest_data
*manifest
= context
->manifest
;
1204 manifest_wal_range
*this_wal_range
= manifest
->first_wal_range
;
1206 while (this_wal_range
!= NULL
)
1208 char *pg_waldump_cmd
;
1210 pg_waldump_cmd
= psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n",
1211 pg_waldump_path
, wal_directory
, this_wal_range
->tli
,
1212 LSN_FORMAT_ARGS(this_wal_range
->start_lsn
),
1213 LSN_FORMAT_ARGS(this_wal_range
->end_lsn
));
1215 if (system(pg_waldump_cmd
) != 0)
1216 report_backup_error(context
,
1217 "WAL parsing failed for timeline %u",
1218 this_wal_range
->tli
);
1220 this_wal_range
= this_wal_range
->next
;
1225 * Report a problem with the backup.
1227 * Update the context to indicate that we saw an error, and exit if the
1228 * context says we should.
1231 report_backup_error(verifier_context
*context
, const char *pg_restrict fmt
,...)
1236 pg_log_generic_v(PG_LOG_ERROR
, PG_LOG_PRIMARY
, gettext(fmt
), ap
);
1239 context
->saw_any_error
= true;
1240 if (context
->exit_on_error
)
1245 * Report a fatal error and exit
1248 report_fatal_error(const char *pg_restrict fmt
,...)
1253 pg_log_generic_v(PG_LOG_ERROR
, PG_LOG_PRIMARY
, gettext(fmt
), ap
);
1260 * Is the specified relative path, or some prefix of it, listed in the set
1261 * of paths to ignore?
1263 * Note that by "prefix" we mean a parent directory; for this purpose,
1264 * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc".
1267 should_ignore_relpath(verifier_context
*context
, const char *relpath
)
1269 SimpleStringListCell
*cell
;
1271 for (cell
= context
->ignore_list
.head
; cell
!= NULL
; cell
= cell
->next
)
1273 const char *r
= relpath
;
1274 char *v
= cell
->val
;
1276 while (*v
!= '\0' && *r
== *v
)
1279 if (*v
== '\0' && (*r
== '\0' || *r
== '/'))
1287 * Create a chain of archive streamers appropriate for verifying a given
1291 create_archive_verifier(verifier_context
*context
, char *archive_name
,
1292 Oid tblspc_oid
, pg_compress_algorithm compress_algo
)
1294 astreamer
*streamer
= NULL
;
1296 /* Should be here only for tar backup */
1297 Assert(context
->format
== 't');
1299 /* Last step is the actual verification. */
1300 streamer
= astreamer_verify_content_new(streamer
, context
, archive_name
,
1303 /* Before that we must parse the tar file. */
1304 streamer
= astreamer_tar_parser_new(streamer
);
1306 /* Before that we must decompress, if archive is compressed. */
1307 if (compress_algo
== PG_COMPRESSION_GZIP
)
1308 streamer
= astreamer_gzip_decompressor_new(streamer
);
1309 else if (compress_algo
== PG_COMPRESSION_LZ4
)
1310 streamer
= astreamer_lz4_decompressor_new(streamer
);
1311 else if (compress_algo
== PG_COMPRESSION_ZSTD
)
1312 streamer
= astreamer_zstd_decompressor_new(streamer
);
1318 * Print a progress report based on the global variables.
1320 * Progress report is written at maximum once per second, unless the finished
1321 * parameter is set to true.
1323 * If finished is set to true, this is the last progress report. The cursor
1324 * is moved to the next line.
1327 progress_report(bool finished
)
1329 static pg_time_t last_progress_report
= 0;
1331 int percent_size
= 0;
1332 char totalsize_str
[32];
1333 char donesize_str
[32];
1339 if (now
== last_progress_report
&& !finished
)
1340 return; /* Max once per second */
1342 last_progress_report
= now
;
1343 percent_size
= total_size
? (int) ((done_size
* 100 / total_size
)) : 0;
1345 snprintf(totalsize_str
, sizeof(totalsize_str
), UINT64_FORMAT
,
1347 snprintf(donesize_str
, sizeof(donesize_str
), UINT64_FORMAT
,
1351 _("%*s/%s kB (%d%%) verified"),
1352 (int) strlen(totalsize_str
),
1353 donesize_str
, totalsize_str
, percent_size
);
1356 * Stay on the same line if reporting to a terminal and we're not done
1359 fputc((!finished
&& isatty(fileno(stderr
))) ? '\r' : '\n', stderr
);
1363 * Print out usage information and exit.
1368 printf(_("%s verifies a backup against the backup manifest.\n\n"), progname
);
1369 printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname
);
1370 printf(_("Options:\n"));
1371 printf(_(" -e, --exit-on-error exit immediately on error\n"));
1372 printf(_(" -F, --format=p|t backup format (plain, tar)\n"));
1373 printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
1374 printf(_(" -m, --manifest-path=PATH use specified path for manifest\n"));
1375 printf(_(" -n, --no-parse-wal do not try to parse WAL files\n"));
1376 printf(_(" -P, --progress show progress information\n"));
1377 printf(_(" -q, --quiet do not print any output, except for errors\n"));
1378 printf(_(" -s, --skip-checksums skip checksum verification\n"));
1379 printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n"));
1380 printf(_(" -V, --version output version information, then exit\n"));
1381 printf(_(" -?, --help show this help, then exit\n"));
1382 printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT
);
1383 printf(_("%s home page: <%s>\n"), PACKAGE_NAME
, PACKAGE_URL
);