1 /*-------------------------------------------------------------------------
3 * basebackup_incremental.c
4 * code for incremental backup support
6 * This code isn't actually in charge of taking an incremental backup;
7 * the actual construction of the incremental backup happens in
8 * basebackup.c. Here, we're concerned with providing the necessary
9 * supports for that operation. In particular, we need to parse the
10 * backup manifest supplied by the user taking the incremental backup
11 * and extract the required information from it.
13 * Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
16 * src/backend/backup/basebackup_incremental.c
18 *-------------------------------------------------------------------------
22 #include "access/timeline.h"
23 #include "access/xlog.h"
24 #include "backup/basebackup_incremental.h"
25 #include "backup/walsummary.h"
26 #include "common/blkreftable.h"
27 #include "common/hashfn.h"
28 #include "common/int.h"
29 #include "common/parse_manifest.h"
30 #include "postmaster/walsummarizer.h"
32 #define BLOCKS_PER_READ 512
35 * We expect to find the last lines of the manifest, including the checksum,
36 * in the last MIN_CHUNK bytes of the manifest. We trigger an incremental
37 * parse step if we are about to overflow MAX_CHUNK bytes.
39 #define MIN_CHUNK 1024
40 #define MAX_CHUNK (128 * 1024)
43 * Details extracted from the WAL ranges present in the supplied backup manifest.
53 * Details extracted from the file list present in the supplied backup manifest.
62 static uint32
hash_string_pointer(const char *s
);
63 #define SH_PREFIX backup_file
64 #define SH_ELEMENT_TYPE backup_file_entry
65 #define SH_KEY_TYPE const char *
67 #define SH_HASH_KEY(tb, key) hash_string_pointer(key)
68 #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
69 #define SH_SCOPE static inline
72 #include "lib/simplehash.h"
74 struct IncrementalBackupInfo
76 /* Memory context for this object and its subsidiary objects. */
79 /* Temporary buffer for storing the manifest while parsing it. */
82 /* WAL ranges extracted from the backup manifest. */
83 List
*manifest_wal_ranges
;
86 * Files extracted from the backup manifest.
88 * We don't really need this information, because we use WAL summaries to
89 * figure out what's changed. It would be unsafe to just rely on the list
90 * of files that existed before, because it's possible for a file to be
91 * removed and a new one created with the same name and different
92 * contents. In such cases, the whole file must still be sent. We can tell
93 * from the WAL summaries whether that happened, but not from the file
96 * Nonetheless, this data is useful for sanity checking. If a file that we
97 * think we shouldn't need to send is not present in the manifest for the
98 * prior backup, something has gone terribly wrong. We retain the file
99 * names and sizes, but not the checksums or last modified times, for
100 * which we have no use.
102 * One significant downside of storing this data is that it consumes
103 * memory. If that turns out to be a problem, we might have to decide not
104 * to retain this information, or to make it optional.
106 backup_file_hash
*manifest_files
;
109 * Block-reference table for the incremental backup.
111 * It's possible that storing the entire block-reference table in memory
112 * will be a problem for some users. The in-memory format that we're using
113 * here is pretty efficient, converging to little more than 1 bit per
114 * block for relation forks with large numbers of modified blocks. It's
115 * possible, however, that if you try to perform an incremental backup of
116 * a database with a sufficiently large number of relations on a
117 * sufficiently small machine, you could run out of memory here. If that
118 * turns out to be a problem in practice, we'll need to be more clever.
120 BlockRefTable
*brtab
;
123 * State object for incremental JSON parsing
125 JsonManifestParseIncrementalState
*inc_state
;
128 static void manifest_process_version(JsonManifestParseContext
*context
,
129 int manifest_version
);
130 static void manifest_process_system_identifier(JsonManifestParseContext
*context
,
131 uint64 manifest_system_identifier
);
132 static void manifest_process_file(JsonManifestParseContext
*context
,
133 const char *pathname
,
135 pg_checksum_type checksum_type
,
137 uint8
*checksum_payload
);
138 static void manifest_process_wal_range(JsonManifestParseContext
*context
,
140 XLogRecPtr start_lsn
,
142 static void manifest_report_error(JsonManifestParseContext
*context
,
144 pg_attribute_printf(2, 3) pg_attribute_noreturn();
145 static int compare_block_numbers(const void *a
, const void *b
);
148 * Create a new object for storing information extracted from the manifest
149 * supplied when creating an incremental backup.
151 IncrementalBackupInfo
*
152 CreateIncrementalBackupInfo(MemoryContext mcxt
)
154 IncrementalBackupInfo
*ib
;
155 MemoryContext oldcontext
;
156 JsonManifestParseContext
*context
;
158 oldcontext
= MemoryContextSwitchTo(mcxt
);
160 ib
= palloc0(sizeof(IncrementalBackupInfo
));
162 initStringInfo(&ib
->buf
);
165 * It's hard to guess how many files a "typical" installation will have in
166 * the data directory, but a fresh initdb creates almost 1000 files as of
167 * this writing, so it seems to make sense for our estimate to
168 * substantially higher.
170 ib
->manifest_files
= backup_file_create(mcxt
, 10000, NULL
);
172 context
= palloc0(sizeof(JsonManifestParseContext
));
173 /* Parse the manifest. */
174 context
->private_data
= ib
;
175 context
->version_cb
= manifest_process_version
;
176 context
->system_identifier_cb
= manifest_process_system_identifier
;
177 context
->per_file_cb
= manifest_process_file
;
178 context
->per_wal_range_cb
= manifest_process_wal_range
;
179 context
->error_cb
= manifest_report_error
;
181 ib
->inc_state
= json_parse_manifest_incremental_init(context
);
183 MemoryContextSwitchTo(oldcontext
);
189 * Before taking an incremental backup, the caller must supply the backup
190 * manifest from a prior backup. Each chunk of manifest data received
191 * from the client should be passed to this function.
194 AppendIncrementalManifestData(IncrementalBackupInfo
*ib
, const char *data
,
197 MemoryContext oldcontext
;
199 /* Switch to our memory context. */
200 oldcontext
= MemoryContextSwitchTo(ib
->mcxt
);
202 if (ib
->buf
.len
> MIN_CHUNK
&& ib
->buf
.len
+ len
> MAX_CHUNK
)
205 * time for an incremental parse. We'll do all but the last MIN_CHUNK
206 * so that we have enough left for the final piece.
208 json_parse_manifest_incremental_chunk(ib
->inc_state
, ib
->buf
.data
,
209 ib
->buf
.len
- MIN_CHUNK
, false);
210 /* now remove what we just parsed */
211 memmove(ib
->buf
.data
, ib
->buf
.data
+ (ib
->buf
.len
- MIN_CHUNK
),
213 ib
->buf
.len
= MIN_CHUNK
;
216 appendBinaryStringInfo(&ib
->buf
, data
, len
);
218 /* Switch back to previous memory context. */
219 MemoryContextSwitchTo(oldcontext
);
223 * Finalize an IncrementalBackupInfo object after all manifest data has
224 * been supplied via calls to AppendIncrementalManifestData.
227 FinalizeIncrementalManifest(IncrementalBackupInfo
*ib
)
229 MemoryContext oldcontext
;
231 /* Switch to our memory context. */
232 oldcontext
= MemoryContextSwitchTo(ib
->mcxt
);
234 /* Parse the last chunk of the manifest */
235 json_parse_manifest_incremental_chunk(ib
->inc_state
, ib
->buf
.data
,
238 /* Done with the buffer, so release memory. */
242 /* Done with inc_state, so release that memory too */
243 json_parse_manifest_incremental_shutdown(ib
->inc_state
);
245 /* Switch back to previous memory context. */
246 MemoryContextSwitchTo(oldcontext
);
250 * Prepare to take an incremental backup.
252 * Before this function is called, AppendIncrementalManifestData and
253 * FinalizeIncrementalManifest should have already been called to pass all
254 * the manifest data to this object.
256 * This function performs sanity checks on the data extracted from the
257 * manifest and figures out for which WAL ranges we need summaries, and
258 * whether those summaries are available. Then, it reads and combines the
259 * data from those summary files. It also updates the backup_state with the
260 * reference TLI and LSN for the prior backup.
263 PrepareForIncrementalBackup(IncrementalBackupInfo
*ib
,
264 BackupState
*backup_state
)
266 MemoryContext oldcontext
;
269 *required_wslist
= NIL
;
271 TimeLineHistoryEntry
**tlep
;
274 bool found_backup_start_tli
= false;
275 TimeLineID earliest_wal_range_tli
= 0;
276 XLogRecPtr earliest_wal_range_start_lsn
= InvalidXLogRecPtr
;
277 TimeLineID latest_wal_range_tli
= 0;
279 Assert(ib
->buf
.data
== NULL
);
281 /* Switch to our memory context. */
282 oldcontext
= MemoryContextSwitchTo(ib
->mcxt
);
285 * A valid backup manifest must always contain at least one WAL range
286 * (usually exactly one, unless the backup spanned a timeline switch).
288 num_wal_ranges
= list_length(ib
->manifest_wal_ranges
);
289 if (num_wal_ranges
== 0)
291 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
292 errmsg("manifest contains no required WAL ranges")));
295 * Match up the TLIs that appear in the WAL ranges of the backup manifest
296 * with those that appear in this server's timeline history. We expect
297 * every backup_wal_range to match to a TimeLineHistoryEntry; if it does
298 * not, that's an error.
300 * This loop also decides which of the WAL ranges is the manifest is most
301 * ancient and which one is the newest, according to the timeline history
302 * of this server, and stores TLIs of those WAL ranges into
303 * earliest_wal_range_tli and latest_wal_range_tli. It also updates
304 * earliest_wal_range_start_lsn to the start LSN of the WAL range for
305 * earliest_wal_range_tli.
307 * Note that the return value of readTimeLineHistory puts the latest
308 * timeline at the beginning of the list, not the end. Hence, the earliest
309 * TLI is the one that occurs nearest the end of the list returned by
310 * readTimeLineHistory, and the latest TLI is the one that occurs closest
313 expectedTLEs
= readTimeLineHistory(backup_state
->starttli
);
314 tlep
= palloc0(num_wal_ranges
* sizeof(TimeLineHistoryEntry
*));
315 for (i
= 0; i
< num_wal_ranges
; ++i
)
317 backup_wal_range
*range
= list_nth(ib
->manifest_wal_ranges
, i
);
318 bool saw_earliest_wal_range_tli
= false;
319 bool saw_latest_wal_range_tli
= false;
321 /* Search this server's history for this WAL range's TLI. */
322 foreach(lc
, expectedTLEs
)
324 TimeLineHistoryEntry
*tle
= lfirst(lc
);
326 if (tle
->tli
== range
->tli
)
332 if (tle
->tli
== earliest_wal_range_tli
)
333 saw_earliest_wal_range_tli
= true;
334 if (tle
->tli
== latest_wal_range_tli
)
335 saw_latest_wal_range_tli
= true;
339 * An incremental backup can only be taken relative to a backup that
340 * represents a previous state of this server. If the backup requires
341 * WAL from a timeline that's not in our history, that definitely
346 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
347 errmsg("timeline %u found in manifest, but not in this server's history",
351 * If we found this TLI in the server's history before encountering
352 * the latest TLI seen so far in the server's history, then this TLI
353 * is the latest one seen so far.
355 * If on the other hand we saw the earliest TLI seen so far before
356 * finding this TLI, this TLI is earlier than the earliest one seen so
357 * far. And if this is the first TLI for which we've searched, it's
358 * also the earliest one seen so far.
360 * On the first loop iteration, both things should necessarily be
363 if (!saw_latest_wal_range_tli
)
364 latest_wal_range_tli
= range
->tli
;
365 if (earliest_wal_range_tli
== 0 || saw_earliest_wal_range_tli
)
367 earliest_wal_range_tli
= range
->tli
;
368 earliest_wal_range_start_lsn
= range
->start_lsn
;
373 * Propagate information about the prior backup into the backup_label that
374 * will be generated for this backup.
376 backup_state
->istartpoint
= earliest_wal_range_start_lsn
;
377 backup_state
->istarttli
= earliest_wal_range_tli
;
380 * Sanity check start and end LSNs for the WAL ranges in the manifest.
382 * Commonly, there won't be any timeline switches during the prior backup
383 * at all, but if there are, they should happen at the same LSNs that this
384 * server switched timelines.
386 * Whether there are any timeline switches during the prior backup or not,
387 * the prior backup shouldn't require any WAL from a timeline prior to the
388 * start of that timeline. It also shouldn't require any WAL from later
389 * than the start of this backup.
391 * If any of these sanity checks fail, one possible explanation is that
392 * the user has generated WAL on the same timeline with the same LSNs more
393 * than once. For instance, if two standbys running on timeline 1 were
394 * both promoted and (due to a broken archiving setup) both selected new
395 * timeline ID 2, then it's possible that one of these checks might trip.
397 * Note that there are lots of ways for the user to do something very bad
398 * without tripping any of these checks, and they are not intended to be
399 * comprehensive. It's pretty hard to see how we could be certain of
400 * anything here. However, if there's a problem staring us right in the
401 * face, it's best to report it, so we do.
403 for (i
= 0; i
< num_wal_ranges
; ++i
)
405 backup_wal_range
*range
= list_nth(ib
->manifest_wal_ranges
, i
);
407 if (range
->tli
== earliest_wal_range_tli
)
409 if (range
->start_lsn
< tlep
[i
]->begin
)
411 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
412 errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X",
414 LSN_FORMAT_ARGS(range
->start_lsn
),
415 LSN_FORMAT_ARGS(tlep
[i
]->begin
))));
419 if (range
->start_lsn
!= tlep
[i
]->begin
)
421 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
422 errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X",
424 LSN_FORMAT_ARGS(range
->start_lsn
),
425 LSN_FORMAT_ARGS(tlep
[i
]->begin
))));
428 if (range
->tli
== latest_wal_range_tli
)
430 if (range
->end_lsn
> backup_state
->startpoint
)
432 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
433 errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X",
435 LSN_FORMAT_ARGS(range
->end_lsn
),
436 LSN_FORMAT_ARGS(backup_state
->startpoint
)),
437 errhint("This can happen for incremental backups on a standby if there was little activity since the previous backup.")));
441 if (range
->end_lsn
!= tlep
[i
]->end
)
443 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
444 errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X",
446 LSN_FORMAT_ARGS(range
->end_lsn
),
447 LSN_FORMAT_ARGS(tlep
[i
]->end
))));
453 * Wait for WAL summarization to catch up to the backup start LSN. This
454 * will throw an error if the WAL summarizer appears to be stuck. If WAL
455 * summarization gets disabled while we're waiting, this will return
456 * immediately, and we'll error out further down if the WAL summaries are
459 WaitForWalSummarization(backup_state
->startpoint
);
462 * Retrieve a list of all WAL summaries on any timeline that overlap with
463 * the LSN range of interest. We could instead call GetWalSummaries() once
464 * per timeline in the loop that follows, but that would involve reading
465 * the directory multiple times. It should be mildly faster - and perhaps
466 * a bit safer - to do it just once.
468 all_wslist
= GetWalSummaries(0, earliest_wal_range_start_lsn
,
469 backup_state
->startpoint
);
472 * We need WAL summaries for everything that happened during the prior
473 * backup and everything that happened afterward up until the point where
474 * the current backup started.
476 foreach(lc
, expectedTLEs
)
478 TimeLineHistoryEntry
*tle
= lfirst(lc
);
479 XLogRecPtr tli_start_lsn
= tle
->begin
;
480 XLogRecPtr tli_end_lsn
= tle
->end
;
481 XLogRecPtr tli_missing_lsn
= InvalidXLogRecPtr
;
485 * Working through the history of this server from the current
486 * timeline backwards, we skip everything until we find the timeline
487 * where this backup started. Most of the time, this means we won't
488 * skip anything at all, as it's unlikely that the timeline has
489 * changed since the beginning of the backup moments ago.
491 if (tle
->tli
== backup_state
->starttli
)
493 found_backup_start_tli
= true;
494 tli_end_lsn
= backup_state
->startpoint
;
496 else if (!found_backup_start_tli
)
500 * Find the summaries that overlap the LSN range of interest for this
501 * timeline. If this is the earliest timeline involved, the range of
502 * interest begins with the start LSN of the prior backup; otherwise,
503 * it begins at the LSN at which this timeline came into existence. If
504 * this is the latest TLI involved, the range of interest ends at the
505 * start LSN of the current backup; otherwise, it ends at the point
506 * where we switched from this timeline to the next one.
508 if (tle
->tli
== earliest_wal_range_tli
)
509 tli_start_lsn
= earliest_wal_range_start_lsn
;
510 tli_wslist
= FilterWalSummaries(all_wslist
, tle
->tli
,
511 tli_start_lsn
, tli_end_lsn
);
514 * There is no guarantee that the WAL summaries we found cover the
515 * entire range of LSNs for which summaries are required, or indeed
516 * that we found any WAL summaries at all. Check whether we have a
517 * problem of that sort.
519 if (!WalSummariesAreComplete(tli_wslist
, tli_start_lsn
, tli_end_lsn
,
522 if (XLogRecPtrIsInvalid(tli_missing_lsn
))
524 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
525 errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist",
527 LSN_FORMAT_ARGS(tli_start_lsn
),
528 LSN_FORMAT_ARGS(tli_end_lsn
))));
531 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE
),
532 errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete",
534 LSN_FORMAT_ARGS(tli_start_lsn
),
535 LSN_FORMAT_ARGS(tli_end_lsn
)),
536 errdetail("The first unsummarized LSN in this range is %X/%X.",
537 LSN_FORMAT_ARGS(tli_missing_lsn
))));
541 * Remember that we need to read these summaries.
543 * Technically, it's possible that this could read more files than
544 * required, since tli_wslist in theory could contain redundant
545 * summaries. For instance, if we have a summary from 0/10000000 to
546 * 0/20000000 and also one from 0/00000000 to 0/30000000, then the
547 * latter subsumes the former and the former could be ignored.
549 * We ignore this possibility because the WAL summarizer only tries to
550 * generate summaries that do not overlap. If somehow they exist,
551 * we'll do a bit of extra work but the results should still be
554 required_wslist
= list_concat(required_wslist
, tli_wslist
);
557 * Timelines earlier than the one in which the prior backup began are
560 if (tle
->tli
== earliest_wal_range_tli
)
565 * Read all of the required block reference table files and merge all of
566 * the data into a single in-memory block reference table.
568 * See the comments for struct IncrementalBackupInfo for some thoughts on
571 ib
->brtab
= CreateEmptyBlockRefTable();
572 foreach(lc
, required_wslist
)
574 WalSummaryFile
*ws
= lfirst(lc
);
576 BlockRefTableReader
*reader
;
577 RelFileLocator rlocator
;
579 BlockNumber limit_block
;
580 BlockNumber blocks
[BLOCKS_PER_READ
];
582 wsio
.file
= OpenWalSummaryFile(ws
, false);
585 (errmsg_internal("reading WAL summary file \"%s\"",
586 FilePathName(wsio
.file
))));
587 reader
= CreateBlockRefTableReader(ReadWalSummary
, &wsio
,
588 FilePathName(wsio
.file
),
589 ReportWalSummaryError
, NULL
);
590 while (BlockRefTableReaderNextRelation(reader
, &rlocator
, &forknum
,
593 BlockRefTableSetLimitBlock(ib
->brtab
, &rlocator
,
594 forknum
, limit_block
);
601 nblocks
= BlockRefTableReaderGetBlocks(reader
, blocks
,
606 for (i
= 0; i
< nblocks
; ++i
)
607 BlockRefTableMarkBlockModified(ib
->brtab
, &rlocator
,
611 DestroyBlockRefTableReader(reader
);
612 FileClose(wsio
.file
);
615 /* Switch back to previous memory context. */
616 MemoryContextSwitchTo(oldcontext
);
620 * Get the pathname that should be used when a file is sent incrementally.
622 * The result is a palloc'd string.
625 GetIncrementalFilePath(Oid dboid
, Oid spcoid
, RelFileNumber relfilenumber
,
626 ForkNumber forknum
, unsigned segno
)
632 path
= GetRelationPath(dboid
, spcoid
, relfilenumber
, INVALID_PROC_NUMBER
,
635 lastslash
= strrchr(path
, '/');
636 Assert(lastslash
!= NULL
);
640 ipath
= psprintf("%s/INCREMENTAL.%s.%u", path
, lastslash
+ 1, segno
);
642 ipath
= psprintf("%s/INCREMENTAL.%s", path
, lastslash
+ 1);
650 * How should we back up a particular file as part of an incremental backup?
652 * If the return value is BACK_UP_FILE_FULLY, caller should back up the whole
653 * file just as if this were not an incremental backup. The contents of the
654 * relative_block_numbers array are unspecified in this case.
656 * If the return value is BACK_UP_FILE_INCREMENTALLY, caller should include
657 * an incremental file in the backup instead of the entire file. On return,
658 * *num_blocks_required will be set to the number of blocks that need to be
659 * sent, and the actual block numbers will have been stored in
660 * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
661 * In addition, *truncation_block_length will be set to the value that should
662 * be included in the incremental file.
665 GetFileBackupMethod(IncrementalBackupInfo
*ib
, const char *path
,
666 Oid dboid
, Oid spcoid
,
667 RelFileNumber relfilenumber
, ForkNumber forknum
,
668 unsigned segno
, size_t size
,
669 unsigned *num_blocks_required
,
670 BlockNumber
*relative_block_numbers
,
671 unsigned *truncation_block_length
)
673 BlockNumber limit_block
;
674 BlockNumber start_blkno
;
675 BlockNumber stop_blkno
;
676 RelFileLocator rlocator
;
677 BlockRefTableEntry
*brtentry
;
681 /* Should only be called after PrepareForIncrementalBackup. */
682 Assert(ib
->buf
.data
== NULL
);
685 * dboid could be InvalidOid if shared rel, but spcoid and relfilenumber
686 * should have legal values.
688 Assert(OidIsValid(spcoid
));
689 Assert(RelFileNumberIsValid(relfilenumber
));
692 * If the file size is too large or not a multiple of BLCKSZ, then
693 * something weird is happening, so give up and send the whole file.
695 if ((size
% BLCKSZ
) != 0 || size
/ BLCKSZ
> RELSEG_SIZE
)
696 return BACK_UP_FILE_FULLY
;
699 * The free-space map fork is not properly WAL-logged, so we need to
700 * backup the entire file every time.
702 if (forknum
== FSM_FORKNUM
)
703 return BACK_UP_FILE_FULLY
;
706 * If this file was not part of the prior backup, back it up fully.
708 * If this file was created after the prior backup and before the start of
709 * the current backup, then the WAL summary information will tell us to
710 * back up the whole file. However, if this file was created after the
711 * start of the current backup, then the WAL summary won't know anything
712 * about it. Without this logic, we would erroneously conclude that it was
713 * OK to send it incrementally.
715 * Note that the file could have existed at the time of the prior backup,
716 * gotten deleted, and then a new file with the same name could have been
717 * created. In that case, this logic won't prevent the file from being
718 * backed up incrementally. But, if the deletion happened before the start
719 * of the current backup, the limit block will be 0, inducing a full
720 * backup. If the deletion happened after the start of the current backup,
721 * reconstruction will erroneously combine blocks from the current
722 * lifespan of the file with blocks from the previous lifespan -- but in
723 * this type of case, WAL replay to reach backup consistency should remove
724 * and recreate the file anyway, so the initial bogus contents should not
727 if (backup_file_lookup(ib
->manifest_files
, path
) == NULL
)
731 ipath
= GetIncrementalFilePath(dboid
, spcoid
, relfilenumber
,
733 if (backup_file_lookup(ib
->manifest_files
, ipath
) == NULL
)
734 return BACK_UP_FILE_FULLY
;
738 * Look up the special block reference table entry for the database as a
741 rlocator
.spcOid
= spcoid
;
742 rlocator
.dbOid
= dboid
;
743 rlocator
.relNumber
= 0;
744 if (BlockRefTableGetEntry(ib
->brtab
, &rlocator
, MAIN_FORKNUM
,
745 &limit_block
) != NULL
)
748 * According to the WAL summary, this database OID/tablespace OID
749 * pairing has been created since the previous backup. So, everything
750 * in it must be backed up fully.
752 return BACK_UP_FILE_FULLY
;
755 /* Look up the block reference table entry for this relfilenode. */
756 rlocator
.relNumber
= relfilenumber
;
757 brtentry
= BlockRefTableGetEntry(ib
->brtab
, &rlocator
, forknum
,
761 * If there is no entry, then there have been no WAL-logged changes to the
762 * relation since the predecessor backup was taken, so we can back it up
763 * incrementally and need not include any modified blocks.
765 * However, if the file is zero-length, we should do a full backup,
766 * because an incremental file is always more than zero length, and it's
767 * silly to take an incremental backup when a full backup would be
770 if (brtentry
== NULL
)
773 return BACK_UP_FILE_FULLY
;
774 *num_blocks_required
= 0;
775 *truncation_block_length
= size
/ BLCKSZ
;
776 return BACK_UP_FILE_INCREMENTALLY
;
780 * If the limit_block is less than or equal to the point where this
781 * segment starts, send the whole file.
783 if (limit_block
<= segno
* RELSEG_SIZE
)
784 return BACK_UP_FILE_FULLY
;
787 * Get relevant entries from the block reference table entry.
789 * We shouldn't overflow computing the start or stop block numbers, but if
790 * it manages to happen somehow, detect it and throw an error.
792 start_blkno
= segno
* RELSEG_SIZE
;
793 stop_blkno
= start_blkno
+ (size
/ BLCKSZ
);
794 if (start_blkno
/ RELSEG_SIZE
!= segno
|| stop_blkno
< start_blkno
)
796 errcode(ERRCODE_INTERNAL_ERROR
),
797 errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
801 * This will write *absolute* block numbers into the output array, but
802 * we'll transpose them below.
804 nblocks
= BlockRefTableEntryGetBlocks(brtentry
, start_blkno
, stop_blkno
,
805 relative_block_numbers
, RELSEG_SIZE
);
806 Assert(nblocks
<= RELSEG_SIZE
);
809 * If we're going to have to send nearly all of the blocks, then just send
810 * the whole file, because that won't require much extra storage or
811 * transfer and will speed up and simplify backup restoration. It's not
812 * clear what threshold is most appropriate here and perhaps it ought to
813 * be configurable, but for now we're just going to say that if we'd need
814 * to send 90% of the blocks anyway, give up and send the whole file.
816 * NB: If you change the threshold here, at least make sure to back up the
817 * file fully when every single block must be sent, because there's
818 * nothing good about sending an incremental file in that case.
820 if (nblocks
* BLCKSZ
> size
* 0.9)
821 return BACK_UP_FILE_FULLY
;
824 * Looks like we can send an incremental file, so sort the block numbers
825 * and then transpose them from absolute block numbers to relative block
826 * numbers if necessary.
828 * NB: If the block reference table was using the bitmap representation
829 * for a given chunk, the block numbers in that chunk will already be
830 * sorted, but when the array-of-offsets representation is used, we can
831 * receive block numbers here out of order.
833 qsort(relative_block_numbers
, nblocks
, sizeof(BlockNumber
),
834 compare_block_numbers
);
835 if (start_blkno
!= 0)
837 for (i
= 0; i
< nblocks
; ++i
)
838 relative_block_numbers
[i
] -= start_blkno
;
840 *num_blocks_required
= nblocks
;
843 * The truncation block length is the minimum length of the reconstructed
844 * file. Any block numbers below this threshold that are not present in
845 * the backup need to be fetched from the prior backup. At or above this
846 * threshold, blocks should only be included in the result if they are
847 * present in the backup. (This may require inserting zero blocks if the
848 * blocks included in the backup are non-consecutive.)
850 *truncation_block_length
= size
/ BLCKSZ
;
851 if (BlockNumberIsValid(limit_block
))
853 unsigned relative_limit
= limit_block
- segno
* RELSEG_SIZE
;
855 if (*truncation_block_length
< relative_limit
)
856 *truncation_block_length
= relative_limit
;
859 /* Send it incrementally. */
860 return BACK_UP_FILE_INCREMENTALLY
;
864 * Compute the size for a header of an incremental file containing a given
865 * number of blocks. The header is rounded to a multiple of BLCKSZ, but
866 * only if the file will store some block data.
869 GetIncrementalHeaderSize(unsigned num_blocks_required
)
873 /* Make sure we're not going to overflow. */
874 Assert(num_blocks_required
<= RELSEG_SIZE
);
877 * Three four byte quantities (magic number, truncation block length,
878 * block count) followed by block numbers.
880 result
= 3 * sizeof(uint32
) + (sizeof(BlockNumber
) * num_blocks_required
);
883 * Round the header size to a multiple of BLCKSZ - when not a multiple of
884 * BLCKSZ, add the missing fraction of a block. But do this only if the
885 * file will store data for some blocks, otherwise keep it small.
887 if ((num_blocks_required
> 0) && (result
% BLCKSZ
!= 0))
888 result
+= BLCKSZ
- (result
% BLCKSZ
);
894 * Compute the size for an incremental file containing a given number of blocks.
897 GetIncrementalFileSize(unsigned num_blocks_required
)
901 /* Make sure we're not going to overflow. */
902 Assert(num_blocks_required
<= RELSEG_SIZE
);
905 * Header with three four byte quantities (magic number, truncation block
906 * length, block count) followed by block numbers, rounded to a multiple
907 * of BLCKSZ (for files with block data), followed by block contents.
909 result
= GetIncrementalHeaderSize(num_blocks_required
);
910 result
+= BLCKSZ
* num_blocks_required
;
916 * Helper function for filemap hash table.
919 hash_string_pointer(const char *s
)
921 unsigned char *ss
= (unsigned char *) s
;
923 return hash_bytes(ss
, strlen(s
));
927 * This callback to validate the manifest version for incremental backup.
930 manifest_process_version(JsonManifestParseContext
*context
,
931 int manifest_version
)
933 /* Incremental backups don't work with manifest version 1 */
934 if (manifest_version
== 1)
935 context
->error_cb(context
,
936 "backup manifest version 1 does not support incremental backup");
940 * This callback to validate the manifest system identifier against the current
944 manifest_process_system_identifier(JsonManifestParseContext
*context
,
945 uint64 manifest_system_identifier
)
947 uint64 system_identifier
;
949 /* Get system identifier of current system */
950 system_identifier
= GetSystemIdentifier();
952 if (manifest_system_identifier
!= system_identifier
)
953 context
->error_cb(context
,
954 "system identifier in backup manifest is %llu, but database system identifier is %llu",
955 (unsigned long long) manifest_system_identifier
,
956 (unsigned long long) system_identifier
);
960 * This callback is invoked for each file mentioned in the backup manifest.
962 * We store the path to each file and the size of each file for sanity-checking
963 * purposes. For further details, see comments for IncrementalBackupInfo.
966 manifest_process_file(JsonManifestParseContext
*context
,
967 const char *pathname
, uint64 size
,
968 pg_checksum_type checksum_type
,
970 uint8
*checksum_payload
)
972 IncrementalBackupInfo
*ib
= context
->private_data
;
973 backup_file_entry
*entry
;
976 entry
= backup_file_insert(ib
->manifest_files
, pathname
, &found
);
979 entry
->path
= MemoryContextStrdup(ib
->manifest_files
->ctx
,
986 * This callback is invoked for each WAL range mentioned in the backup
989 * We're just interested in learning the oldest LSN and the corresponding TLI
990 * that appear in any WAL range.
993 manifest_process_wal_range(JsonManifestParseContext
*context
,
994 TimeLineID tli
, XLogRecPtr start_lsn
,
997 IncrementalBackupInfo
*ib
= context
->private_data
;
998 backup_wal_range
*range
= palloc(sizeof(backup_wal_range
));
1001 range
->start_lsn
= start_lsn
;
1002 range
->end_lsn
= end_lsn
;
1003 ib
->manifest_wal_ranges
= lappend(ib
->manifest_wal_ranges
, range
);
1007 * This callback is invoked if an error occurs while parsing the backup
1011 manifest_report_error(JsonManifestParseContext
*context
, const char *fmt
,...)
1013 StringInfoData errbuf
;
1015 initStringInfo(&errbuf
);
1023 needed
= appendStringInfoVA(&errbuf
, fmt
, ap
);
1027 enlargeStringInfo(&errbuf
, needed
);
1031 errmsg_internal("%s", errbuf
.data
));
1035 * Quicksort comparator for block numbers.
1038 compare_block_numbers(const void *a
, const void *b
)
1040 BlockNumber aa
= *(BlockNumber
*) a
;
1041 BlockNumber bb
= *(BlockNumber
*) b
;
1043 return pg_cmp_u32(aa
, bb
);