1 /*-------------------------------------------------------------------------
4 * code for taking a base backup and streaming it to a standby
6 * Portions Copyright (c) 2010-2022, PostgreSQL Global Development Group
9 * src/backend/replication/basebackup.c
11 *-------------------------------------------------------------------------
19 #include "access/xlog_internal.h" /* for pg_backup_start/stop */
20 #include "common/compression.h"
21 #include "common/file_perm.h"
22 #include "commands/defrem.h"
23 #include "lib/stringinfo.h"
24 #include "miscadmin.h"
25 #include "nodes/pg_list.h"
29 #include "postmaster/syslogger.h"
30 #include "replication/basebackup.h"
31 #include "replication/basebackup_sink.h"
32 #include "replication/basebackup_target.h"
33 #include "replication/backup_manifest.h"
34 #include "replication/walsender.h"
35 #include "replication/walsender_private.h"
36 #include "storage/bufpage.h"
37 #include "storage/checksum.h"
38 #include "storage/dsm_impl.h"
39 #include "storage/fd.h"
40 #include "storage/ipc.h"
41 #include "storage/reinit.h"
42 #include "utils/builtins.h"
43 #include "utils/ps_status.h"
44 #include "utils/relcache.h"
45 #include "utils/resowner.h"
46 #include "utils/timestamp.h"
49 * How much data do we want to send in one CopyData message? Note that
50 * this may also result in reading the underlying files in chunks of this
53 * NB: The buffer size is required to be a multiple of the system block
54 * size, so use that value instead if it's bigger than our preference.
56 #define SINK_BUFFER_LENGTH Max(32768, BLCKSZ)
66 bool sendtblspcmapfile
;
69 BaseBackupTargetHandle
*target_handle
;
70 backup_manifest_option manifest
;
71 pg_compress_algorithm compression
;
72 pg_compress_specification compression_specification
;
73 pg_checksum_type manifest_checksum_type
;
76 static int64
sendTablespace(bbsink
*sink
, char *path
, char *oid
, bool sizeonly
,
77 struct backup_manifest_info
*manifest
);
78 static int64
sendDir(bbsink
*sink
, const char *path
, int basepathlen
, bool sizeonly
,
79 List
*tablespaces
, bool sendtblspclinks
,
80 backup_manifest_info
*manifest
, const char *spcoid
);
81 static bool sendFile(bbsink
*sink
, const char *readfilename
, const char *tarfilename
,
82 struct stat
*statbuf
, bool missing_ok
, Oid dboid
,
83 backup_manifest_info
*manifest
, const char *spcoid
);
84 static void sendFileWithContent(bbsink
*sink
, const char *filename
,
86 backup_manifest_info
*manifest
);
87 static int64
_tarWriteHeader(bbsink
*sink
, const char *filename
,
88 const char *linktarget
, struct stat
*statbuf
,
90 static void _tarWritePadding(bbsink
*sink
, int len
);
91 static void convert_link_to_directory(const char *pathbuf
, struct stat
*statbuf
);
92 static void perform_base_backup(basebackup_options
*opt
, bbsink
*sink
);
93 static void parse_basebackup_options(List
*options
, basebackup_options
*opt
);
94 static int compareWalFileNames(const ListCell
*a
, const ListCell
*b
);
95 static bool is_checksummed_file(const char *fullpath
, const char *filename
);
96 static int basebackup_read_file(int fd
, char *buf
, size_t nbytes
, off_t offset
,
97 const char *filename
, bool partial_read_ok
);
99 /* Was the backup currently in-progress initiated in recovery mode? */
100 static bool backup_started_in_recovery
= false;
102 /* Total number of checksum failures during base backup. */
103 static long long int total_checksum_failures
;
105 /* Do not verify checksums. */
106 static bool noverify_checksums
= false;
109 * Definition of one element part of an exclusion list, used for paths part
110 * of checksum validation or base backups. "name" is the name of the file
111 * or path to check for exclusion. If "match_prefix" is true, any items
112 * matching the name as prefix are excluded.
114 struct exclude_list_item
121 * The contents of these directories are removed or recreated during server
122 * start so they are not included in backups. The directories themselves are
123 * kept and included as empty to preserve access permissions.
125 * Note: this list should be kept in sync with the filter lists in pg_rewind's
128 static const char *const excludeDirContents
[] =
131 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
132 * because extensions like pg_stat_statements store data there.
137 * It is generally not useful to backup the contents of this directory
138 * even if the intention is to restore to another primary. See backup.sgml
139 * for a more detailed description.
143 /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
146 /* Contents removed on startup, see AsyncShmemInit(). */
150 * Old contents are loaded for possible debugging but are not required for
151 * normal operation, see SerialInit().
155 /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
158 /* Contents zeroed on startup, see StartupSUBTRANS(). */
166 * List of files excluded from backups.
168 static const struct exclude_list_item excludeFiles
[] =
170 /* Skip auto conf temporary file. */
171 {PG_AUTOCONF_FILENAME
".tmp", false},
173 /* Skip current log file temporary file */
174 {LOG_METAINFO_DATAFILE_TMP
, false},
177 * Skip relation cache because it is rebuilt on startup. This includes
180 {RELCACHE_INIT_FILENAME
, true},
183 * backup_label and tablespace_map should not exist in a running cluster
184 * capable of doing an online backup, but exclude them just in case.
186 {BACKUP_LABEL_FILE
, false},
187 {TABLESPACE_MAP
, false},
190 * If there's a backup_manifest, it belongs to a backup that was used to
191 * start this server. It is *not* correct for this backup. Our
192 * backup_manifest is injected into the backup separately if users want
195 {"backup_manifest", false},
197 {"postmaster.pid", false},
198 {"postmaster.opts", false},
205 * List of files excluded from checksum validation.
207 * Note: this list should be kept in sync with what pg_checksums.c
210 static const struct exclude_list_item noChecksumFiles
[] = {
211 {"pg_control", false},
212 {"pg_filenode.map", false},
213 {"pg_internal.init", true},
214 {"PG_VERSION", false},
216 {"config_exec_params", true},
222 * Actually do a base backup for the specified tablespaces.
224 * This is split out mainly to avoid complaints about "variable might be
225 * clobbered by longjmp" from stupider versions of gcc.
228 perform_base_backup(basebackup_options
*opt
, bbsink
*sink
)
233 StringInfo labelfile
;
234 StringInfo tblspc_map_file
;
235 backup_manifest_info manifest
;
237 /* Initial backup state, insofar as we know it now. */
238 state
.tablespaces
= NIL
;
239 state
.tablespace_num
= 0;
240 state
.bytes_done
= 0;
241 state
.bytes_total
= 0;
242 state
.bytes_total_is_valid
= false;
244 /* we're going to use a BufFile, so we need a ResourceOwner */
245 Assert(CurrentResourceOwner
== NULL
);
246 CurrentResourceOwner
= ResourceOwnerCreate(NULL
, "base backup");
248 backup_started_in_recovery
= RecoveryInProgress();
250 labelfile
= makeStringInfo();
251 tblspc_map_file
= makeStringInfo();
252 InitializeBackupManifest(&manifest
, opt
->manifest
,
253 opt
->manifest_checksum_type
);
255 total_checksum_failures
= 0;
257 basebackup_progress_wait_checkpoint();
258 state
.startptr
= do_pg_backup_start(opt
->label
, opt
->fastcheckpoint
,
260 labelfile
, &state
.tablespaces
,
264 * Once do_pg_backup_start has been called, ensure that any failure causes
265 * us to abort the backup so we don't "leak" a backup counter. For this
266 * reason, *all* functionality between do_pg_backup_start() and the end of
267 * do_pg_backup_stop() should be inside the error cleanup block!
270 PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup
, BoolGetDatum(false));
275 /* Add a node for the base directory at the end */
276 ti
= palloc0(sizeof(tablespaceinfo
));
278 state
.tablespaces
= lappend(state
.tablespaces
, ti
);
281 * Calculate the total backup size by summing up the size of each
286 basebackup_progress_estimate_backup_size();
288 foreach(lc
, state
.tablespaces
)
290 tablespaceinfo
*tmp
= (tablespaceinfo
*) lfirst(lc
);
292 if (tmp
->path
== NULL
)
293 tmp
->size
= sendDir(sink
, ".", 1, true, state
.tablespaces
,
296 tmp
->size
= sendTablespace(sink
, tmp
->path
, tmp
->oid
, true,
298 state
.bytes_total
+= tmp
->size
;
300 state
.bytes_total_is_valid
= true;
303 /* notify basebackup sink about start of backup */
304 bbsink_begin_backup(sink
, &state
, SINK_BUFFER_LENGTH
);
306 /* Send off our tablespaces one by one */
307 foreach(lc
, state
.tablespaces
)
309 tablespaceinfo
*ti
= (tablespaceinfo
*) lfirst(lc
);
311 if (ti
->path
== NULL
)
314 bool sendtblspclinks
= true;
316 bbsink_begin_archive(sink
, "base.tar");
318 /* In the main tar, include the backup_label first... */
319 sendFileWithContent(sink
, BACKUP_LABEL_FILE
, labelfile
->data
,
322 /* Then the tablespace_map file, if required... */
323 if (opt
->sendtblspcmapfile
)
325 sendFileWithContent(sink
, TABLESPACE_MAP
, tblspc_map_file
->data
,
327 sendtblspclinks
= false;
330 /* Then the bulk of the files... */
331 sendDir(sink
, ".", 1, false, state
.tablespaces
,
332 sendtblspclinks
, &manifest
, NULL
);
334 /* ... and pg_control after everything else. */
335 if (lstat(XLOG_CONTROL_FILE
, &statbuf
) != 0)
337 (errcode_for_file_access(),
338 errmsg("could not stat file \"%s\": %m",
339 XLOG_CONTROL_FILE
)));
340 sendFile(sink
, XLOG_CONTROL_FILE
, XLOG_CONTROL_FILE
, &statbuf
,
341 false, InvalidOid
, &manifest
, NULL
);
345 char *archive_name
= psprintf("%s.tar", ti
->oid
);
347 bbsink_begin_archive(sink
, archive_name
);
349 sendTablespace(sink
, ti
->path
, ti
->oid
, false, &manifest
);
353 * If we're including WAL, and this is the main data directory we
354 * don't treat this as the end of the tablespace. Instead, we will
355 * include the xlog files below and stop afterwards. This is safe
356 * since the main data directory is always sent *last*.
358 if (opt
->includewal
&& ti
->path
== NULL
)
360 Assert(lnext(state
.tablespaces
, lc
) == NULL
);
364 /* Properly terminate the tarfile. */
365 StaticAssertStmt(2 * TAR_BLOCK_SIZE
<= BLCKSZ
,
366 "BLCKSZ too small for 2 tar blocks");
367 memset(sink
->bbs_buffer
, 0, 2 * TAR_BLOCK_SIZE
);
368 bbsink_archive_contents(sink
, 2 * TAR_BLOCK_SIZE
);
370 /* OK, that's the end of the archive. */
371 bbsink_end_archive(sink
);
375 basebackup_progress_wait_wal_archive(&state
);
376 endptr
= do_pg_backup_stop(labelfile
->data
, !opt
->nowait
, &endtli
);
378 PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup
, BoolGetDatum(false));
384 * We've left the last tar file "open", so we can now append the
385 * required WAL files to it.
387 char pathbuf
[MAXPGPATH
];
389 XLogSegNo startsegno
;
392 List
*historyFileList
= NIL
;
393 List
*walFileList
= NIL
;
394 char firstoff
[MAXFNAMELEN
];
395 char lastoff
[MAXFNAMELEN
];
401 basebackup_progress_transfer_wal();
404 * I'd rather not worry about timelines here, so scan pg_wal and
405 * include all WAL files in the range between 'startptr' and 'endptr',
406 * regardless of the timeline the file is stamped with. If there are
407 * some spurious WAL files belonging to timelines that don't belong in
408 * this server's history, they will be included too. Normally there
409 * shouldn't be such files, but if there are, there's little harm in
412 XLByteToSeg(state
.startptr
, startsegno
, wal_segment_size
);
413 XLogFileName(firstoff
, state
.starttli
, startsegno
, wal_segment_size
);
414 XLByteToPrevSeg(endptr
, endsegno
, wal_segment_size
);
415 XLogFileName(lastoff
, endtli
, endsegno
, wal_segment_size
);
417 dir
= AllocateDir("pg_wal");
418 while ((de
= ReadDir(dir
, "pg_wal")) != NULL
)
420 /* Does it look like a WAL segment, and is it in the range? */
421 if (IsXLogFileName(de
->d_name
) &&
422 strcmp(de
->d_name
+ 8, firstoff
+ 8) >= 0 &&
423 strcmp(de
->d_name
+ 8, lastoff
+ 8) <= 0)
425 walFileList
= lappend(walFileList
, pstrdup(de
->d_name
));
427 /* Does it look like a timeline history file? */
428 else if (IsTLHistoryFileName(de
->d_name
))
430 historyFileList
= lappend(historyFileList
, pstrdup(de
->d_name
));
436 * Before we go any further, check that none of the WAL segments we
439 CheckXLogRemoved(startsegno
, state
.starttli
);
442 * Sort the WAL filenames. We want to send the files in order from
443 * oldest to newest, to reduce the chance that a file is recycled
444 * before we get a chance to send it over.
446 list_sort(walFileList
, compareWalFileNames
);
449 * There must be at least one xlog file in the pg_wal directory, since
450 * we are doing backup-including-xlog.
452 if (walFileList
== NIL
)
454 (errmsg("could not find any WAL files")));
457 * Sanity check: the first and last segment should cover startptr and
458 * endptr, with no gaps in between.
460 XLogFromFileName((char *) linitial(walFileList
),
461 &tli
, &segno
, wal_segment_size
);
462 if (segno
!= startsegno
)
464 char startfname
[MAXFNAMELEN
];
466 XLogFileName(startfname
, state
.starttli
, startsegno
,
469 (errmsg("could not find WAL file \"%s\"", startfname
)));
471 foreach(lc
, walFileList
)
473 char *walFileName
= (char *) lfirst(lc
);
474 XLogSegNo currsegno
= segno
;
475 XLogSegNo nextsegno
= segno
+ 1;
477 XLogFromFileName(walFileName
, &tli
, &segno
, wal_segment_size
);
478 if (!(nextsegno
== segno
|| currsegno
== segno
))
480 char nextfname
[MAXFNAMELEN
];
482 XLogFileName(nextfname
, tli
, nextsegno
, wal_segment_size
);
484 (errmsg("could not find WAL file \"%s\"", nextfname
)));
487 if (segno
!= endsegno
)
489 char endfname
[MAXFNAMELEN
];
491 XLogFileName(endfname
, endtli
, endsegno
, wal_segment_size
);
493 (errmsg("could not find WAL file \"%s\"", endfname
)));
496 /* Ok, we have everything we need. Send the WAL files. */
497 foreach(lc
, walFileList
)
499 char *walFileName
= (char *) lfirst(lc
);
504 snprintf(pathbuf
, MAXPGPATH
, XLOGDIR
"/%s", walFileName
);
505 XLogFromFileName(walFileName
, &tli
, &segno
, wal_segment_size
);
507 fd
= OpenTransientFile(pathbuf
, O_RDONLY
| PG_BINARY
);
510 int save_errno
= errno
;
513 * Most likely reason for this is that the file was already
514 * removed by a checkpoint, so check for that to get a better
517 CheckXLogRemoved(segno
, tli
);
521 (errcode_for_file_access(),
522 errmsg("could not open file \"%s\": %m", pathbuf
)));
525 if (fstat(fd
, &statbuf
) != 0)
527 (errcode_for_file_access(),
528 errmsg("could not stat file \"%s\": %m",
530 if (statbuf
.st_size
!= wal_segment_size
)
532 CheckXLogRemoved(segno
, tli
);
534 (errcode_for_file_access(),
535 errmsg("unexpected WAL file size \"%s\"", walFileName
)));
538 /* send the WAL file itself */
539 _tarWriteHeader(sink
, pathbuf
, NULL
, &statbuf
, false);
541 while ((cnt
= basebackup_read_file(fd
, sink
->bbs_buffer
,
542 Min(sink
->bbs_buffer_length
,
543 wal_segment_size
- len
),
544 len
, pathbuf
, true)) > 0)
546 CheckXLogRemoved(segno
, tli
);
547 bbsink_archive_contents(sink
, cnt
);
551 if (len
== wal_segment_size
)
555 if (len
!= wal_segment_size
)
557 CheckXLogRemoved(segno
, tli
);
559 (errcode_for_file_access(),
560 errmsg("unexpected WAL file size \"%s\"", walFileName
)));
564 * wal_segment_size is a multiple of TAR_BLOCK_SIZE, so no need
567 Assert(wal_segment_size
% TAR_BLOCK_SIZE
== 0);
569 CloseTransientFile(fd
);
572 * Mark file as archived, otherwise files can get archived again
573 * after promotion of a new node. This is in line with
574 * walreceiver.c always doing an XLogArchiveForceDone() after a
577 StatusFilePath(pathbuf
, walFileName
, ".done");
578 sendFileWithContent(sink
, pathbuf
, "", &manifest
);
582 * Send timeline history files too. Only the latest timeline history
583 * file is required for recovery, and even that only if there happens
584 * to be a timeline switch in the first WAL segment that contains the
585 * checkpoint record, or if we're taking a base backup from a standby
586 * server and the target timeline changes while the backup is taken.
587 * But they are small and highly useful for debugging purposes, so
588 * better include them all, always.
590 foreach(lc
, historyFileList
)
592 char *fname
= lfirst(lc
);
594 snprintf(pathbuf
, MAXPGPATH
, XLOGDIR
"/%s", fname
);
596 if (lstat(pathbuf
, &statbuf
) != 0)
598 (errcode_for_file_access(),
599 errmsg("could not stat file \"%s\": %m", pathbuf
)));
601 sendFile(sink
, pathbuf
, pathbuf
, &statbuf
, false, InvalidOid
,
604 /* unconditionally mark file as archived */
605 StatusFilePath(pathbuf
, fname
, ".done");
606 sendFileWithContent(sink
, pathbuf
, "", &manifest
);
609 /* Properly terminate the tar file. */
610 StaticAssertStmt(2 * TAR_BLOCK_SIZE
<= BLCKSZ
,
611 "BLCKSZ too small for 2 tar blocks");
612 memset(sink
->bbs_buffer
, 0, 2 * TAR_BLOCK_SIZE
);
613 bbsink_archive_contents(sink
, 2 * TAR_BLOCK_SIZE
);
615 /* OK, that's the end of the archive. */
616 bbsink_end_archive(sink
);
619 AddWALInfoToBackupManifest(&manifest
, state
.startptr
, state
.starttli
,
622 SendBackupManifest(&manifest
, sink
);
624 bbsink_end_backup(sink
, endptr
, endtli
);
626 if (total_checksum_failures
)
628 if (total_checksum_failures
> 1)
630 (errmsg_plural("%lld total checksum verification failure",
631 "%lld total checksum verification failures",
632 total_checksum_failures
,
633 total_checksum_failures
)));
636 (errcode(ERRCODE_DATA_CORRUPTED
),
637 errmsg("checksum verification failure during base backup")));
641 * Make sure to free the manifest before the resource owners as manifests
642 * use cryptohash contexts that may depend on resource owners (like
645 FreeBackupManifest(&manifest
);
647 /* clean up the resource owner we created */
648 WalSndResourceCleanup(true);
650 basebackup_progress_done();
654 * list_sort comparison function, to compare log/seg portion of WAL segment
655 * filenames, ignoring the timeline portion.
658 compareWalFileNames(const ListCell
*a
, const ListCell
*b
)
660 char *fna
= (char *) lfirst(a
);
661 char *fnb
= (char *) lfirst(b
);
663 return strcmp(fna
+ 8, fnb
+ 8);
667 * Parse the base backup options passed down by the parser
670 parse_basebackup_options(List
*options
, basebackup_options
*opt
)
673 bool o_label
= false;
674 bool o_progress
= false;
675 bool o_checkpoint
= false;
676 bool o_nowait
= false;
678 bool o_maxrate
= false;
679 bool o_tablespace_map
= false;
680 bool o_noverify_checksums
= false;
681 bool o_manifest
= false;
682 bool o_manifest_checksums
= false;
683 bool o_target
= false;
684 bool o_target_detail
= false;
685 char *target_str
= NULL
;
686 char *target_detail_str
= NULL
;
687 bool o_compression
= false;
688 bool o_compression_detail
= false;
689 char *compression_detail_str
= NULL
;
691 MemSet(opt
, 0, sizeof(*opt
));
692 opt
->manifest
= MANIFEST_OPTION_NO
;
693 opt
->manifest_checksum_type
= CHECKSUM_TYPE_CRC32C
;
694 opt
->compression
= PG_COMPRESSION_NONE
;
695 opt
->compression_specification
.algorithm
= PG_COMPRESSION_NONE
;
697 foreach(lopt
, options
)
699 DefElem
*defel
= (DefElem
*) lfirst(lopt
);
701 if (strcmp(defel
->defname
, "label") == 0)
705 (errcode(ERRCODE_SYNTAX_ERROR
),
706 errmsg("duplicate option \"%s\"", defel
->defname
)));
707 opt
->label
= defGetString(defel
);
710 else if (strcmp(defel
->defname
, "progress") == 0)
714 (errcode(ERRCODE_SYNTAX_ERROR
),
715 errmsg("duplicate option \"%s\"", defel
->defname
)));
716 opt
->progress
= defGetBoolean(defel
);
719 else if (strcmp(defel
->defname
, "checkpoint") == 0)
721 char *optval
= defGetString(defel
);
725 (errcode(ERRCODE_SYNTAX_ERROR
),
726 errmsg("duplicate option \"%s\"", defel
->defname
)));
727 if (pg_strcasecmp(optval
, "fast") == 0)
728 opt
->fastcheckpoint
= true;
729 else if (pg_strcasecmp(optval
, "spread") == 0)
730 opt
->fastcheckpoint
= false;
733 (errcode(ERRCODE_SYNTAX_ERROR
),
734 errmsg("unrecognized checkpoint type: \"%s\"",
738 else if (strcmp(defel
->defname
, "wait") == 0)
742 (errcode(ERRCODE_SYNTAX_ERROR
),
743 errmsg("duplicate option \"%s\"", defel
->defname
)));
744 opt
->nowait
= !defGetBoolean(defel
);
747 else if (strcmp(defel
->defname
, "wal") == 0)
751 (errcode(ERRCODE_SYNTAX_ERROR
),
752 errmsg("duplicate option \"%s\"", defel
->defname
)));
753 opt
->includewal
= defGetBoolean(defel
);
756 else if (strcmp(defel
->defname
, "max_rate") == 0)
762 (errcode(ERRCODE_SYNTAX_ERROR
),
763 errmsg("duplicate option \"%s\"", defel
->defname
)));
765 maxrate
= defGetInt64(defel
);
766 if (maxrate
< MAX_RATE_LOWER
|| maxrate
> MAX_RATE_UPPER
)
768 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE
),
769 errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
770 (int) maxrate
, "MAX_RATE", MAX_RATE_LOWER
, MAX_RATE_UPPER
)));
772 opt
->maxrate
= (uint32
) maxrate
;
775 else if (strcmp(defel
->defname
, "tablespace_map") == 0)
777 if (o_tablespace_map
)
779 (errcode(ERRCODE_SYNTAX_ERROR
),
780 errmsg("duplicate option \"%s\"", defel
->defname
)));
781 opt
->sendtblspcmapfile
= defGetBoolean(defel
);
782 o_tablespace_map
= true;
784 else if (strcmp(defel
->defname
, "verify_checksums") == 0)
786 if (o_noverify_checksums
)
788 (errcode(ERRCODE_SYNTAX_ERROR
),
789 errmsg("duplicate option \"%s\"", defel
->defname
)));
790 noverify_checksums
= !defGetBoolean(defel
);
791 o_noverify_checksums
= true;
793 else if (strcmp(defel
->defname
, "manifest") == 0)
795 char *optval
= defGetString(defel
);
800 (errcode(ERRCODE_SYNTAX_ERROR
),
801 errmsg("duplicate option \"%s\"", defel
->defname
)));
802 if (parse_bool(optval
, &manifest_bool
))
805 opt
->manifest
= MANIFEST_OPTION_YES
;
807 opt
->manifest
= MANIFEST_OPTION_NO
;
809 else if (pg_strcasecmp(optval
, "force-encode") == 0)
810 opt
->manifest
= MANIFEST_OPTION_FORCE_ENCODE
;
813 (errcode(ERRCODE_SYNTAX_ERROR
),
814 errmsg("unrecognized manifest option: \"%s\"",
818 else if (strcmp(defel
->defname
, "manifest_checksums") == 0)
820 char *optval
= defGetString(defel
);
822 if (o_manifest_checksums
)
824 (errcode(ERRCODE_SYNTAX_ERROR
),
825 errmsg("duplicate option \"%s\"", defel
->defname
)));
826 if (!pg_checksum_parse_type(optval
,
827 &opt
->manifest_checksum_type
))
829 (errcode(ERRCODE_SYNTAX_ERROR
),
830 errmsg("unrecognized checksum algorithm: \"%s\"",
832 o_manifest_checksums
= true;
834 else if (strcmp(defel
->defname
, "target") == 0)
838 (errcode(ERRCODE_SYNTAX_ERROR
),
839 errmsg("duplicate option \"%s\"", defel
->defname
)));
840 target_str
= defGetString(defel
);
843 else if (strcmp(defel
->defname
, "target_detail") == 0)
845 char *optval
= defGetString(defel
);
849 (errcode(ERRCODE_SYNTAX_ERROR
),
850 errmsg("duplicate option \"%s\"", defel
->defname
)));
851 target_detail_str
= optval
;
852 o_target_detail
= true;
854 else if (strcmp(defel
->defname
, "compression") == 0)
856 char *optval
= defGetString(defel
);
860 (errcode(ERRCODE_SYNTAX_ERROR
),
861 errmsg("duplicate option \"%s\"", defel
->defname
)));
862 if (!parse_compress_algorithm(optval
, &opt
->compression
))
864 (errcode(ERRCODE_SYNTAX_ERROR
),
865 errmsg("unrecognized compression algorithm \"%s\"",
867 o_compression
= true;
869 else if (strcmp(defel
->defname
, "compression_detail") == 0)
871 if (o_compression_detail
)
873 (errcode(ERRCODE_SYNTAX_ERROR
),
874 errmsg("duplicate option \"%s\"", defel
->defname
)));
875 compression_detail_str
= defGetString(defel
);
876 o_compression_detail
= true;
880 (errcode(ERRCODE_SYNTAX_ERROR
),
881 errmsg("unrecognized base backup option: \"%s\"",
885 if (opt
->label
== NULL
)
886 opt
->label
= "base backup";
887 if (opt
->manifest
== MANIFEST_OPTION_NO
)
889 if (o_manifest_checksums
)
891 (errcode(ERRCODE_SYNTAX_ERROR
),
892 errmsg("manifest checksums require a backup manifest")));
893 opt
->manifest_checksum_type
= CHECKSUM_TYPE_NONE
;
896 if (target_str
== NULL
)
898 if (target_detail_str
!= NULL
)
900 (errcode(ERRCODE_SYNTAX_ERROR
),
901 errmsg("target detail cannot be used without target")));
902 opt
->use_copytblspc
= true;
903 opt
->send_to_client
= true;
905 else if (strcmp(target_str
, "client") == 0)
907 if (target_detail_str
!= NULL
)
909 (errcode(ERRCODE_SYNTAX_ERROR
),
910 errmsg("target '%s' does not accept a target detail",
912 opt
->send_to_client
= true;
916 BaseBackupGetTargetHandle(target_str
, target_detail_str
);
918 if (o_compression_detail
&& !o_compression
)
920 (errcode(ERRCODE_SYNTAX_ERROR
),
921 errmsg("compression detail requires compression")));
927 parse_compress_specification(opt
->compression
, compression_detail_str
,
928 &opt
->compression_specification
);
930 validate_compress_specification(&opt
->compression_specification
);
931 if (error_detail
!= NULL
)
933 errcode(ERRCODE_SYNTAX_ERROR
),
934 errmsg("invalid compression specification: %s",
941 * SendBaseBackup() - send a complete base backup.
943 * The function will put the system into backup mode like pg_backup_start()
944 * does, so that the backup is consistent even though we read directly from
945 * the filesystem, bypassing the buffer cache.
948 SendBaseBackup(BaseBackupCmd
*cmd
)
950 basebackup_options opt
;
952 SessionBackupState status
= get_backup_status();
954 if (status
== SESSION_BACKUP_RUNNING
)
956 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
957 errmsg("a backup is already in progress in this session")));
959 parse_basebackup_options(cmd
->options
, &opt
);
961 WalSndSetState(WALSNDSTATE_BACKUP
);
963 if (update_process_title
)
965 char activitymsg
[50];
967 snprintf(activitymsg
, sizeof(activitymsg
), "sending backup \"%s\"",
969 set_ps_display(activitymsg
);
973 * If the target is specifically 'client' then set up to stream the backup
974 * to the client; otherwise, it's being sent someplace else and should not
975 * be sent to the client. BaseBackupGetSink has the job of setting up a
976 * sink to send the backup data wherever it needs to go.
978 sink
= bbsink_copystream_new(opt
.send_to_client
);
979 if (opt
.target_handle
!= NULL
)
980 sink
= BaseBackupGetSink(opt
.target_handle
, sink
);
982 /* Set up network throttling, if client requested it */
984 sink
= bbsink_throttle_new(sink
, opt
.maxrate
);
986 /* Set up server-side compression, if client requested it */
987 if (opt
.compression
== PG_COMPRESSION_GZIP
)
988 sink
= bbsink_gzip_new(sink
, &opt
.compression_specification
);
989 else if (opt
.compression
== PG_COMPRESSION_LZ4
)
990 sink
= bbsink_lz4_new(sink
, &opt
.compression_specification
);
991 else if (opt
.compression
== PG_COMPRESSION_ZSTD
)
992 sink
= bbsink_zstd_new(sink
, &opt
.compression_specification
);
994 /* Set up progress reporting. */
995 sink
= bbsink_progress_new(sink
, opt
.progress
);
998 * Perform the base backup, but make sure we clean up the bbsink even if
1003 perform_base_backup(&opt
, sink
);
1007 bbsink_cleanup(sink
);
1013 * Inject a file with given name and content in the output tar stream.
1016 sendFileWithContent(bbsink
*sink
, const char *filename
, const char *content
,
1017 backup_manifest_info
*manifest
)
1019 struct stat statbuf
;
1022 pg_checksum_context checksum_ctx
;
1024 if (pg_checksum_init(&checksum_ctx
, manifest
->checksum_type
) < 0)
1025 elog(ERROR
, "could not initialize checksum of file \"%s\"",
1028 len
= strlen(content
);
1031 * Construct a stat struct for the backup_label file we're injecting in
1034 /* Windows doesn't have the concept of uid and gid */
1039 statbuf
.st_uid
= geteuid();
1040 statbuf
.st_gid
= getegid();
1042 statbuf
.st_mtime
= time(NULL
);
1043 statbuf
.st_mode
= pg_file_create_mode
;
1044 statbuf
.st_size
= len
;
1046 _tarWriteHeader(sink
, filename
, NULL
, &statbuf
, false);
1048 if (pg_checksum_update(&checksum_ctx
, (uint8
*) content
, len
) < 0)
1049 elog(ERROR
, "could not update checksum of file \"%s\"",
1052 while (bytes_done
< len
)
1054 size_t remaining
= len
- bytes_done
;
1055 size_t nbytes
= Min(sink
->bbs_buffer_length
, remaining
);
1057 memcpy(sink
->bbs_buffer
, content
, nbytes
);
1058 bbsink_archive_contents(sink
, nbytes
);
1059 bytes_done
+= nbytes
;
1062 _tarWritePadding(sink
, len
);
1064 AddFileToBackupManifest(manifest
, NULL
, filename
, len
,
1065 (pg_time_t
) statbuf
.st_mtime
, &checksum_ctx
);
1069 * Include the tablespace directory pointed to by 'path' in the output tar
1070 * stream. If 'sizeonly' is true, we just calculate a total length and return
1071 * it, without actually sending anything.
1073 * Only used to send auxiliary tablespaces, not PGDATA.
1076 sendTablespace(bbsink
*sink
, char *path
, char *spcoid
, bool sizeonly
,
1077 backup_manifest_info
*manifest
)
1080 char pathbuf
[MAXPGPATH
];
1081 struct stat statbuf
;
1084 * 'path' points to the tablespace location, but we only want to include
1085 * the version directory in it that belongs to us.
1087 snprintf(pathbuf
, sizeof(pathbuf
), "%s/%s", path
,
1088 TABLESPACE_VERSION_DIRECTORY
);
1091 * Store a directory entry in the tar file so we get the permissions
1094 if (lstat(pathbuf
, &statbuf
) != 0)
1096 if (errno
!= ENOENT
)
1098 (errcode_for_file_access(),
1099 errmsg("could not stat file or directory \"%s\": %m",
1102 /* If the tablespace went away while scanning, it's no error. */
1106 size
= _tarWriteHeader(sink
, TABLESPACE_VERSION_DIRECTORY
, NULL
, &statbuf
,
1109 /* Send all the files in the tablespace version directory */
1110 size
+= sendDir(sink
, pathbuf
, strlen(path
), sizeonly
, NIL
, true, manifest
,
1117 * Include all files from the given directory in the output tar stream. If
1118 * 'sizeonly' is true, we just calculate a total length and return it, without
1119 * actually sending anything.
1121 * Omit any directory in the tablespaces list, to avoid backing up
1122 * tablespaces twice when they were created inside PGDATA.
1124 * If sendtblspclinks is true, we need to include symlink
1125 * information in the tar file. If not, we can skip that
1126 * as it will be sent separately in the tablespace_map file.
1129 sendDir(bbsink
*sink
, const char *path
, int basepathlen
, bool sizeonly
,
1130 List
*tablespaces
, bool sendtblspclinks
, backup_manifest_info
*manifest
,
1135 char pathbuf
[MAXPGPATH
* 2];
1136 struct stat statbuf
;
1138 const char *lastDir
; /* Split last dir from parent path. */
1139 bool isDbDir
= false; /* Does this directory contain relations? */
1142 * Determine if the current path is a database directory that can contain
1145 * Start by finding the location of the delimiter between the parent path
1146 * and the current path.
1148 lastDir
= last_dir_separator(path
);
1150 /* Does this path look like a database path (i.e. all digits)? */
1151 if (lastDir
!= NULL
&&
1152 strspn(lastDir
+ 1, "0123456789") == strlen(lastDir
+ 1))
1154 /* Part of path that contains the parent directory. */
1155 int parentPathLen
= lastDir
- path
;
1158 * Mark path as a database directory if the parent path is either
1159 * $PGDATA/base or a tablespace version path.
1161 if (strncmp(path
, "./base", parentPathLen
) == 0 ||
1162 (parentPathLen
>= (sizeof(TABLESPACE_VERSION_DIRECTORY
) - 1) &&
1163 strncmp(lastDir
- (sizeof(TABLESPACE_VERSION_DIRECTORY
) - 1),
1164 TABLESPACE_VERSION_DIRECTORY
,
1165 sizeof(TABLESPACE_VERSION_DIRECTORY
) - 1) == 0))
1169 dir
= AllocateDir(path
);
1170 while ((de
= ReadDir(dir
, path
)) != NULL
)
1174 ForkNumber relForkNum
; /* Type of fork if file is a relation */
1175 int relOidChars
; /* Chars in filename that are the rel oid */
1177 /* Skip special stuff */
1178 if (strcmp(de
->d_name
, ".") == 0 || strcmp(de
->d_name
, "..") == 0)
1181 /* Skip temporary files */
1182 if (strncmp(de
->d_name
,
1183 PG_TEMP_FILE_PREFIX
,
1184 strlen(PG_TEMP_FILE_PREFIX
)) == 0)
1188 * Check if the postmaster has signaled us to exit, and abort with an
1189 * error in that case. The error handler further up will call
1190 * do_pg_abort_backup() for us. Also check that if the backup was
1191 * started while still in recovery, the server wasn't promoted.
1192 * do_pg_backup_stop() will check that too, but it's better to stop
1193 * the backup early than continue to the end and fail there.
1195 CHECK_FOR_INTERRUPTS();
1196 if (RecoveryInProgress() != backup_started_in_recovery
)
1198 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
1199 errmsg("the standby was promoted during online backup"),
1200 errhint("This means that the backup being taken is corrupt "
1201 "and should not be used. "
1202 "Try taking another online backup.")));
1204 /* Scan for files that should be excluded */
1205 excludeFound
= false;
1206 for (excludeIdx
= 0; excludeFiles
[excludeIdx
].name
!= NULL
; excludeIdx
++)
1208 int cmplen
= strlen(excludeFiles
[excludeIdx
].name
);
1210 if (!excludeFiles
[excludeIdx
].match_prefix
)
1212 if (strncmp(de
->d_name
, excludeFiles
[excludeIdx
].name
, cmplen
) == 0)
1214 elog(DEBUG1
, "file \"%s\" excluded from backup", de
->d_name
);
1215 excludeFound
= true;
1223 /* Exclude all forks for unlogged tables except the init fork */
1225 parse_filename_for_nontemp_relation(de
->d_name
, &relOidChars
,
1228 /* Never exclude init forks */
1229 if (relForkNum
!= INIT_FORKNUM
)
1231 char initForkFile
[MAXPGPATH
];
1232 char relOid
[OIDCHARS
+ 1];
1235 * If any other type of fork, check if there is an init fork
1236 * with the same OID. If so, the file can be excluded.
1238 memcpy(relOid
, de
->d_name
, relOidChars
);
1239 relOid
[relOidChars
] = '\0';
1240 snprintf(initForkFile
, sizeof(initForkFile
), "%s/%s_init",
1243 if (lstat(initForkFile
, &statbuf
) == 0)
1246 "unlogged relation file \"%s\" excluded from backup",
1254 /* Exclude temporary relations */
1255 if (isDbDir
&& looks_like_temp_rel_name(de
->d_name
))
1258 "temporary relation file \"%s\" excluded from backup",
1264 snprintf(pathbuf
, sizeof(pathbuf
), "%s/%s", path
, de
->d_name
);
1266 /* Skip pg_control here to back up it last */
1267 if (strcmp(pathbuf
, "./global/pg_control") == 0)
1270 if (lstat(pathbuf
, &statbuf
) != 0)
1272 if (errno
!= ENOENT
)
1274 (errcode_for_file_access(),
1275 errmsg("could not stat file or directory \"%s\": %m",
1278 /* If the file went away while scanning, it's not an error. */
1282 /* Scan for directories whose contents should be excluded */
1283 excludeFound
= false;
1284 for (excludeIdx
= 0; excludeDirContents
[excludeIdx
] != NULL
; excludeIdx
++)
1286 if (strcmp(de
->d_name
, excludeDirContents
[excludeIdx
]) == 0)
1288 elog(DEBUG1
, "contents of directory \"%s\" excluded from backup", de
->d_name
);
1289 convert_link_to_directory(pathbuf
, &statbuf
);
1290 size
+= _tarWriteHeader(sink
, pathbuf
+ basepathlen
+ 1, NULL
,
1291 &statbuf
, sizeonly
);
1292 excludeFound
= true;
1301 * We can skip pg_wal, the WAL segments need to be fetched from the
1302 * WAL archive anyway. But include it as an empty directory anyway, so
1303 * we get permissions right.
1305 if (strcmp(pathbuf
, "./pg_wal") == 0)
1307 /* If pg_wal is a symlink, write it as a directory anyway */
1308 convert_link_to_directory(pathbuf
, &statbuf
);
1309 size
+= _tarWriteHeader(sink
, pathbuf
+ basepathlen
+ 1, NULL
,
1310 &statbuf
, sizeonly
);
1313 * Also send archive_status directory (by hackishly reusing
1314 * statbuf from above ...).
1316 size
+= _tarWriteHeader(sink
, "./pg_wal/archive_status", NULL
,
1317 &statbuf
, sizeonly
);
1319 continue; /* don't recurse into pg_wal */
1322 /* Allow symbolic links in pg_tblspc only */
1323 if (strcmp(path
, "./pg_tblspc") == 0 &&
1325 S_ISLNK(statbuf
.st_mode
)
1327 pgwin32_is_junction(pathbuf
)
1331 #if defined(HAVE_READLINK) || defined(WIN32)
1332 char linkpath
[MAXPGPATH
];
1335 rllen
= readlink(pathbuf
, linkpath
, sizeof(linkpath
));
1338 (errcode_for_file_access(),
1339 errmsg("could not read symbolic link \"%s\": %m",
1341 if (rllen
>= sizeof(linkpath
))
1343 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED
),
1344 errmsg("symbolic link \"%s\" target is too long",
1346 linkpath
[rllen
] = '\0';
1348 size
+= _tarWriteHeader(sink
, pathbuf
+ basepathlen
+ 1, linkpath
,
1349 &statbuf
, sizeonly
);
1353 * If the platform does not have symbolic links, it should not be
1354 * possible to have tablespaces - clearly somebody else created
1355 * them. Warn about it and ignore.
1358 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
1359 errmsg("tablespaces are not supported on this platform")));
1361 #endif /* HAVE_READLINK */
1363 else if (S_ISDIR(statbuf
.st_mode
))
1365 bool skip_this_dir
= false;
1369 * Store a directory entry in the tar file so we can get the
1370 * permissions right.
1372 size
+= _tarWriteHeader(sink
, pathbuf
+ basepathlen
+ 1, NULL
, &statbuf
,
1376 * Call ourselves recursively for a directory, unless it happens
1377 * to be a separate tablespace located within PGDATA.
1379 foreach(lc
, tablespaces
)
1381 tablespaceinfo
*ti
= (tablespaceinfo
*) lfirst(lc
);
1384 * ti->rpath is the tablespace relative path within PGDATA, or
1385 * NULL if the tablespace has been properly located somewhere
1388 * Skip past the leading "./" in pathbuf when comparing.
1390 if (ti
->rpath
&& strcmp(ti
->rpath
, pathbuf
+ 2) == 0)
1392 skip_this_dir
= true;
1398 * skip sending directories inside pg_tblspc, if not required.
1400 if (strcmp(pathbuf
, "./pg_tblspc") == 0 && !sendtblspclinks
)
1401 skip_this_dir
= true;
1404 size
+= sendDir(sink
, pathbuf
, basepathlen
, sizeonly
, tablespaces
,
1405 sendtblspclinks
, manifest
, spcoid
);
1407 else if (S_ISREG(statbuf
.st_mode
))
1412 sent
= sendFile(sink
, pathbuf
, pathbuf
+ basepathlen
+ 1, &statbuf
,
1413 true, isDbDir
? atooid(lastDir
+ 1) : InvalidOid
,
1416 if (sent
|| sizeonly
)
1419 size
+= statbuf
.st_size
;
1421 /* Pad to a multiple of the tar block size. */
1422 size
+= tarPaddingBytesRequired(statbuf
.st_size
);
1424 /* Size of the header for the file. */
1425 size
+= TAR_BLOCK_SIZE
;
1430 (errmsg("skipping special file \"%s\"", pathbuf
)));
1437 * Check if a file should have its checksum validated.
1438 * We validate checksums on files in regular tablespaces
1439 * (including global and default) only, and in those there
1440 * are some files that are explicitly excluded.
1443 is_checksummed_file(const char *fullpath
, const char *filename
)
1445 /* Check that the file is in a tablespace */
1446 if (strncmp(fullpath
, "./global/", 9) == 0 ||
1447 strncmp(fullpath
, "./base/", 7) == 0 ||
1448 strncmp(fullpath
, "/", 1) == 0)
1452 /* Compare file against noChecksumFiles skip list */
1453 for (excludeIdx
= 0; noChecksumFiles
[excludeIdx
].name
!= NULL
; excludeIdx
++)
1455 int cmplen
= strlen(noChecksumFiles
[excludeIdx
].name
);
1457 if (!noChecksumFiles
[excludeIdx
].match_prefix
)
1459 if (strncmp(filename
, noChecksumFiles
[excludeIdx
].name
,
1471 * Functions for handling tar file format
1473 * Copied from pg_dump, but modified to work with libpq for sending
1478 * Given the member, write the TAR header & send the file.
1480 * If 'missing_ok' is true, will not throw an error if the file is not found.
1482 * If dboid is anything other than InvalidOid then any checksum failures
1483 * detected will get reported to the cumulative stats system.
1485 * Returns true if the file was successfully sent, false if 'missing_ok',
1486 * and the file did not exist.
1489 sendFile(bbsink
*sink
, const char *readfilename
, const char *tarfilename
,
1490 struct stat
*statbuf
, bool missing_ok
, Oid dboid
,
1491 backup_manifest_info
*manifest
, const char *spcoid
)
1494 BlockNumber blkno
= 0;
1495 bool block_retry
= false;
1497 int checksum_failures
= 0;
1505 bool verify_checksum
= false;
1506 pg_checksum_context checksum_ctx
;
1508 if (pg_checksum_init(&checksum_ctx
, manifest
->checksum_type
) < 0)
1509 elog(ERROR
, "could not initialize checksum of file \"%s\"",
1512 fd
= OpenTransientFile(readfilename
, O_RDONLY
| PG_BINARY
);
1515 if (errno
== ENOENT
&& missing_ok
)
1518 (errcode_for_file_access(),
1519 errmsg("could not open file \"%s\": %m", readfilename
)));
1522 _tarWriteHeader(sink
, tarfilename
, NULL
, statbuf
, false);
1524 if (!noverify_checksums
&& DataChecksumsEnabled())
1529 * Get the filename (excluding path). As last_dir_separator()
1530 * includes the last directory separator, we chop that off by
1531 * incrementing the pointer.
1533 filename
= last_dir_separator(readfilename
) + 1;
1535 if (is_checksummed_file(readfilename
, filename
))
1537 verify_checksum
= true;
1540 * Cut off at the segment boundary (".") to get the segment number
1541 * in order to mix it into the checksum.
1543 segmentpath
= strstr(filename
, ".");
1544 if (segmentpath
!= NULL
)
1546 segmentno
= atoi(segmentpath
+ 1);
1549 (errmsg("invalid segment number %d in file \"%s\"",
1550 segmentno
, filename
)));
1556 * Loop until we read the amount of data the caller told us to expect. The
1557 * file could be longer, if it was extended while we were sending it, but
1558 * for a base backup we can ignore such extended data. It will be restored
1561 while (len
< statbuf
->st_size
)
1563 size_t remaining
= statbuf
->st_size
- len
;
1565 /* Try to read some more data. */
1566 cnt
= basebackup_read_file(fd
, sink
->bbs_buffer
,
1567 Min(sink
->bbs_buffer_length
, remaining
),
1568 len
, readfilename
, true);
1571 * If we hit end-of-file, a concurrent truncation must have occurred.
1572 * That's not an error condition, because WAL replay will fix things
1579 * The checksums are verified at block level, so we iterate over the
1580 * buffer in chunks of BLCKSZ, after making sure that
1581 * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1584 Assert((sink
->bbs_buffer_length
% BLCKSZ
) == 0);
1586 if (verify_checksum
&& (cnt
% BLCKSZ
!= 0))
1589 (errmsg("could not verify checksum in file \"%s\", block "
1590 "%u: read buffer size %d and page size %d "
1592 readfilename
, blkno
, (int) cnt
, BLCKSZ
)));
1593 verify_checksum
= false;
1596 if (verify_checksum
)
1598 for (i
= 0; i
< cnt
/ BLCKSZ
; i
++)
1600 page
= sink
->bbs_buffer
+ BLCKSZ
* i
;
1603 * Only check pages which have not been modified since the
1604 * start of the base backup. Otherwise, they might have been
1605 * written only halfway and the checksum would not be valid.
1606 * However, replaying WAL would reinstate the correct page in
1607 * this case. We also skip completely new pages, since they
1608 * don't have a checksum yet.
1610 if (!PageIsNew(page
) && PageGetLSN(page
) < sink
->bbs_state
->startptr
)
1612 checksum
= pg_checksum_page((char *) page
, blkno
+ segmentno
* RELSEG_SIZE
);
1613 phdr
= (PageHeader
) page
;
1614 if (phdr
->pd_checksum
!= checksum
)
1617 * Retry the block on the first failure. It's
1618 * possible that we read the first 4K page of the
1619 * block just before postgres updated the entire block
1620 * so it ends up looking torn to us. We only need to
1621 * retry once because the LSN should be updated to
1622 * something we can ignore on the next pass. If the
1623 * error happens again then it is a true validation
1626 if (block_retry
== false)
1630 /* Reread the failed block */
1632 basebackup_read_file(fd
,
1633 sink
->bbs_buffer
+ BLCKSZ
* i
,
1634 BLCKSZ
, len
+ BLCKSZ
* i
,
1637 if (reread_cnt
== 0)
1640 * If we hit end-of-file, a concurrent
1641 * truncation must have occurred, so break out
1642 * of this loop just as if the initial fread()
1643 * returned 0. We'll drop through to the same
1644 * code that handles that case. (We must fix
1645 * up cnt first, though.)
1651 /* Set flag so we know a retry was attempted */
1654 /* Reset loop to validate the block again */
1659 checksum_failures
++;
1661 if (checksum_failures
<= 5)
1663 (errmsg("checksum verification failed in "
1664 "file \"%s\", block %u: calculated "
1665 "%X but expected %X",
1666 readfilename
, blkno
, checksum
,
1667 phdr
->pd_checksum
)));
1668 if (checksum_failures
== 5)
1670 (errmsg("further checksum verification "
1671 "failures in file \"%s\" will not "
1672 "be reported", readfilename
)));
1675 block_retry
= false;
1680 bbsink_archive_contents(sink
, cnt
);
1682 /* Also feed it to the checksum machinery. */
1683 if (pg_checksum_update(&checksum_ctx
,
1684 (uint8
*) sink
->bbs_buffer
, cnt
) < 0)
1685 elog(ERROR
, "could not update checksum of base backup");
1690 /* If the file was truncated while we were sending it, pad it with zeros */
1691 while (len
< statbuf
->st_size
)
1693 size_t remaining
= statbuf
->st_size
- len
;
1694 size_t nbytes
= Min(sink
->bbs_buffer_length
, remaining
);
1696 MemSet(sink
->bbs_buffer
, 0, nbytes
);
1697 if (pg_checksum_update(&checksum_ctx
,
1698 (uint8
*) sink
->bbs_buffer
,
1700 elog(ERROR
, "could not update checksum of base backup");
1701 bbsink_archive_contents(sink
, nbytes
);
1706 * Pad to a block boundary, per tar format requirements. (This small piece
1707 * of data is probably not worth throttling, and is not checksummed
1708 * because it's not actually part of the file.)
1710 _tarWritePadding(sink
, len
);
1712 CloseTransientFile(fd
);
1714 if (checksum_failures
> 1)
1717 (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
1718 "file \"%s\" has a total of %d checksum verification failures",
1720 readfilename
, checksum_failures
)));
1722 pgstat_report_checksum_failures_in_db(dboid
, checksum_failures
);
1725 total_checksum_failures
+= checksum_failures
;
1727 AddFileToBackupManifest(manifest
, spcoid
, tarfilename
, statbuf
->st_size
,
1728 (pg_time_t
) statbuf
->st_mtime
, &checksum_ctx
);
1734 _tarWriteHeader(bbsink
*sink
, const char *filename
, const char *linktarget
,
1735 struct stat
*statbuf
, bool sizeonly
)
1742 * As of this writing, the smallest supported block size is 1kB, which
1743 * is twice TAR_BLOCK_SIZE. Since the buffer size is required to be a
1744 * multiple of BLCKSZ, it should be safe to assume that the buffer is
1745 * large enough to fit an entire tar block. We double-check by means
1746 * of these assertions.
1748 StaticAssertStmt(TAR_BLOCK_SIZE
<= BLCKSZ
,
1749 "BLCKSZ too small for tar block");
1750 Assert(sink
->bbs_buffer_length
>= TAR_BLOCK_SIZE
);
1752 rc
= tarCreateHeader(sink
->bbs_buffer
, filename
, linktarget
,
1753 statbuf
->st_size
, statbuf
->st_mode
,
1754 statbuf
->st_uid
, statbuf
->st_gid
,
1761 case TAR_NAME_TOO_LONG
:
1763 (errmsg("file name too long for tar format: \"%s\"",
1766 case TAR_SYMLINK_TOO_LONG
:
1768 (errmsg("symbolic link target too long for tar format: "
1769 "file name \"%s\", target \"%s\"",
1770 filename
, linktarget
)));
1773 elog(ERROR
, "unrecognized tar error: %d", rc
);
1776 bbsink_archive_contents(sink
, TAR_BLOCK_SIZE
);
1779 return TAR_BLOCK_SIZE
;
1783 * Pad with zero bytes out to a multiple of TAR_BLOCK_SIZE.
1786 _tarWritePadding(bbsink
*sink
, int len
)
1788 int pad
= tarPaddingBytesRequired(len
);
1791 * As in _tarWriteHeader, it should be safe to assume that the buffer is
1792 * large enough that we don't need to do this in multiple chunks.
1794 Assert(sink
->bbs_buffer_length
>= TAR_BLOCK_SIZE
);
1795 Assert(pad
<= TAR_BLOCK_SIZE
);
1799 MemSet(sink
->bbs_buffer
, 0, pad
);
1800 bbsink_archive_contents(sink
, pad
);
1805 * If the entry in statbuf is a link, then adjust statbuf to make it look like a
1806 * directory, so that it will be written that way.
1809 convert_link_to_directory(const char *pathbuf
, struct stat
*statbuf
)
1811 /* If symlink, write it as a directory anyway */
1813 if (S_ISLNK(statbuf
->st_mode
))
1815 if (pgwin32_is_junction(pathbuf
))
1817 statbuf
->st_mode
= S_IFDIR
| pg_dir_create_mode
;
1821 * Read some data from a file, setting a wait event and reporting any error
1824 * If partial_read_ok is false, also report an error if the number of bytes
1825 * read is not equal to the number of bytes requested.
1827 * Returns the number of bytes read.
1830 basebackup_read_file(int fd
, char *buf
, size_t nbytes
, off_t offset
,
1831 const char *filename
, bool partial_read_ok
)
1835 pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ
);
1836 rc
= pg_pread(fd
, buf
, nbytes
, offset
);
1837 pgstat_report_wait_end();
1841 (errcode_for_file_access(),
1842 errmsg("could not read file \"%s\": %m", filename
)));
1843 if (!partial_read_ok
&& rc
> 0 && rc
!= nbytes
)
1845 (errcode_for_file_access(),
1846 errmsg("could not read file \"%s\": read %d of %zu",
1847 filename
, rc
, nbytes
)));