1 /*-------------------------------------------------------------------------
4 * code for generating and sending a backup manifest
6 * Portions Copyright (c) 2010-2024, PostgreSQL Global Development Group
9 * src/backend/backup/backup_manifest.c
11 *-------------------------------------------------------------------------
15 #include "access/timeline.h"
16 #include "access/xlog.h"
17 #include "backup/backup_manifest.h"
18 #include "backup/basebackup_sink.h"
19 #include "common/relpath.h"
20 #include "mb/pg_wchar.h"
21 #include "utils/builtins.h"
22 #include "utils/json.h"
24 static void AppendStringToManifest(backup_manifest_info
*manifest
, const char *s
);
27 * Does the user want a backup manifest?
29 * It's simplest to always have a manifest_info object, so that we don't need
30 * checks for NULL pointers in too many places. However, if the user doesn't
31 * want a manifest, we set manifest->buffile to NULL.
34 IsManifestEnabled(backup_manifest_info
*manifest
)
36 return (manifest
->buffile
!= NULL
);
40 * Convenience macro for appending data to the backup manifest.
42 #define AppendToManifest(manifest, ...) \
44 char *_manifest_s = psprintf(__VA_ARGS__); \
45 AppendStringToManifest(manifest, _manifest_s); \
50 * Initialize state so that we can construct a backup manifest.
52 * NB: Although the checksum type for the data files is configurable, the
53 * checksum for the manifest itself always uses SHA-256. See comments in
57 InitializeBackupManifest(backup_manifest_info
*manifest
,
58 backup_manifest_option want_manifest
,
59 pg_checksum_type manifest_checksum_type
)
61 memset(manifest
, 0, sizeof(backup_manifest_info
));
62 manifest
->checksum_type
= manifest_checksum_type
;
64 if (want_manifest
== MANIFEST_OPTION_NO
)
65 manifest
->buffile
= NULL
;
68 manifest
->buffile
= BufFileCreateTemp(false);
69 manifest
->manifest_ctx
= pg_cryptohash_create(PG_SHA256
);
70 if (pg_cryptohash_init(manifest
->manifest_ctx
) < 0)
71 elog(ERROR
, "failed to initialize checksum of backup manifest: %s",
72 pg_cryptohash_error(manifest
->manifest_ctx
));
75 manifest
->manifest_size
= UINT64CONST(0);
76 manifest
->force_encode
= (want_manifest
== MANIFEST_OPTION_FORCE_ENCODE
);
77 manifest
->first_file
= true;
78 manifest
->still_checksumming
= true;
80 if (want_manifest
!= MANIFEST_OPTION_NO
)
81 AppendToManifest(manifest
,
82 "{ \"PostgreSQL-Backup-Manifest-Version\": 2,\n"
83 "\"System-Identifier\": " UINT64_FORMAT
",\n"
85 GetSystemIdentifier());
89 * Free resources assigned to a backup manifest constructed.
92 FreeBackupManifest(backup_manifest_info
*manifest
)
94 pg_cryptohash_free(manifest
->manifest_ctx
);
95 manifest
->manifest_ctx
= NULL
;
99 * Add an entry to the backup manifest for a file.
102 AddFileToBackupManifest(backup_manifest_info
*manifest
, Oid spcoid
,
103 const char *pathname
, size_t size
, pg_time_t mtime
,
104 pg_checksum_context
*checksum_ctx
)
106 char pathbuf
[MAXPGPATH
];
110 if (!IsManifestEnabled(manifest
))
114 * If this file is part of a tablespace, the pathname passed to this
115 * function will be relative to the tar file that contains it. We want the
116 * pathname relative to the data directory (ignoring the intermediate
117 * symlink traversal).
119 if (OidIsValid(spcoid
))
121 snprintf(pathbuf
, sizeof(pathbuf
), "%s/%u/%s", PG_TBLSPC_DIR
, spcoid
,
127 * Each file's entry needs to be separated from any entry that follows by
128 * a comma, but there's no comma before the first one or after the last
129 * one. To make that work, adding a file to the manifest starts by
130 * terminating the most recently added line, with a comma if appropriate,
131 * but does not terminate the line inserted for this file.
133 initStringInfo(&buf
);
134 if (manifest
->first_file
)
136 appendStringInfoChar(&buf
, '\n');
137 manifest
->first_file
= false;
140 appendStringInfoString(&buf
, ",\n");
143 * Write the relative pathname to this file out to the manifest. The
144 * manifest is always stored in UTF-8, so we have to encode paths that are
145 * not valid in that encoding.
147 pathlen
= strlen(pathname
);
148 if (!manifest
->force_encode
&&
149 pg_verify_mbstr(PG_UTF8
, pathname
, pathlen
, true))
151 appendStringInfoString(&buf
, "{ \"Path\": ");
152 escape_json_with_len(&buf
, pathname
, pathlen
);
153 appendStringInfoString(&buf
, ", ");
157 appendStringInfoString(&buf
, "{ \"Encoded-Path\": \"");
158 enlargeStringInfo(&buf
, 2 * pathlen
);
159 buf
.len
+= hex_encode(pathname
, pathlen
,
161 appendStringInfoString(&buf
, "\", ");
164 appendStringInfo(&buf
, "\"Size\": %zu, ", size
);
167 * Convert last modification time to a string and append it to the
168 * manifest. Since it's not clear what time zone to use and since time
169 * zone definitions can change, possibly causing confusion, use GMT
172 appendStringInfoString(&buf
, "\"Last-Modified\": \"");
173 enlargeStringInfo(&buf
, 128);
174 buf
.len
+= pg_strftime(&buf
.data
[buf
.len
], 128, "%Y-%m-%d %H:%M:%S %Z",
176 appendStringInfoChar(&buf
, '"');
178 /* Add checksum information. */
179 if (checksum_ctx
->type
!= CHECKSUM_TYPE_NONE
)
181 uint8 checksumbuf
[PG_CHECKSUM_MAX_LENGTH
];
184 checksumlen
= pg_checksum_final(checksum_ctx
, checksumbuf
);
186 elog(ERROR
, "could not finalize checksum of file \"%s\"",
189 appendStringInfo(&buf
,
190 ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
191 pg_checksum_type_name(checksum_ctx
->type
));
192 enlargeStringInfo(&buf
, 2 * checksumlen
);
193 buf
.len
+= hex_encode((char *) checksumbuf
, checksumlen
,
195 appendStringInfoChar(&buf
, '"');
198 /* Close out the object. */
199 appendStringInfoString(&buf
, " }");
201 /* OK, add it to the manifest. */
202 AppendStringToManifest(manifest
, buf
.data
);
204 /* Avoid leaking memory. */
209 * Add information about the WAL that will need to be replayed when restoring
210 * this backup to the manifest.
213 AddWALInfoToBackupManifest(backup_manifest_info
*manifest
, XLogRecPtr startptr
,
214 TimeLineID starttli
, XLogRecPtr endptr
,
219 bool first_wal_range
= true;
220 bool found_start_timeline
= false;
222 if (!IsManifestEnabled(manifest
))
225 /* Terminate the list of files. */
226 AppendStringToManifest(manifest
, "\n],\n");
228 /* Read the timeline history for the ending timeline. */
229 timelines
= readTimeLineHistory(endtli
);
231 /* Start a list of LSN ranges. */
232 AppendStringToManifest(manifest
, "\"WAL-Ranges\": [\n");
234 foreach(lc
, timelines
)
236 TimeLineHistoryEntry
*entry
= lfirst(lc
);
237 XLogRecPtr tl_beginptr
;
240 * We only care about timelines that were active during the backup.
241 * Skip any that ended before the backup started. (Note that if
242 * entry->end is InvalidXLogRecPtr, it means that the timeline has not
245 if (!XLogRecPtrIsInvalid(entry
->end
) && entry
->end
< startptr
)
249 * Because the timeline history file lists newer timelines before
250 * older ones, the first timeline we encounter that is new enough to
251 * matter ought to match the ending timeline of the backup.
253 if (first_wal_range
&& endtli
!= entry
->tli
)
255 errmsg("expected end timeline %u but found timeline %u",
256 starttli
, entry
->tli
));
259 * If this timeline entry matches with the timeline on which the
260 * backup started, WAL needs to be checked from the start LSN of the
261 * backup. If this entry refers to a newer timeline, WAL needs to be
262 * checked since the beginning of this timeline, so use the LSN where
263 * the timeline began.
265 if (starttli
== entry
->tli
)
266 tl_beginptr
= startptr
;
269 tl_beginptr
= entry
->begin
;
272 * If we reach a TLI that has no valid beginning LSN, there can't
273 * be any more timelines in the history after this point, so we'd
274 * better have arrived at the expected starting TLI. If not,
275 * something's gone horribly wrong.
277 if (XLogRecPtrIsInvalid(entry
->begin
))
279 errmsg("expected start timeline %u but found timeline %u",
280 starttli
, entry
->tli
));
283 AppendToManifest(manifest
,
284 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
285 first_wal_range
? "" : ",\n",
287 LSN_FORMAT_ARGS(tl_beginptr
),
288 LSN_FORMAT_ARGS(endptr
));
290 if (starttli
== entry
->tli
)
292 found_start_timeline
= true;
296 endptr
= entry
->begin
;
297 first_wal_range
= false;
301 * The last entry in the timeline history for the ending timeline should
302 * be the ending timeline itself. Verify that this is what we observed.
304 if (!found_start_timeline
)
306 errmsg("start timeline %u not found in history of timeline %u",
309 /* Terminate the list of WAL ranges. */
310 AppendStringToManifest(manifest
, "\n],\n");
314 * Finalize the backup manifest, and send it to the client.
317 SendBackupManifest(backup_manifest_info
*manifest
, bbsink
*sink
)
319 uint8 checksumbuf
[PG_SHA256_DIGEST_LENGTH
];
320 char checksumstringbuf
[PG_SHA256_DIGEST_STRING_LENGTH
];
321 size_t manifest_bytes_done
= 0;
323 if (!IsManifestEnabled(manifest
))
327 * Append manifest checksum, so that the problems with the manifest itself
330 * We always use SHA-256 for this, regardless of what algorithm is chosen
331 * for checksumming the files. If we ever want to make the checksum
332 * algorithm used for the manifest file variable, the client will need a
333 * way to figure out which algorithm to use as close to the beginning of
334 * the manifest file as possible, to avoid having to read the whole thing
337 manifest
->still_checksumming
= false;
338 if (pg_cryptohash_final(manifest
->manifest_ctx
, checksumbuf
,
339 sizeof(checksumbuf
)) < 0)
340 elog(ERROR
, "failed to finalize checksum of backup manifest: %s",
341 pg_cryptohash_error(manifest
->manifest_ctx
));
342 AppendStringToManifest(manifest
, "\"Manifest-Checksum\": \"");
344 hex_encode((char *) checksumbuf
, sizeof checksumbuf
, checksumstringbuf
);
345 checksumstringbuf
[PG_SHA256_DIGEST_STRING_LENGTH
- 1] = '\0';
347 AppendStringToManifest(manifest
, checksumstringbuf
);
348 AppendStringToManifest(manifest
, "\"}\n");
351 * We've written all the data to the manifest file. Rewind the file so
352 * that we can read it all back.
354 if (BufFileSeek(manifest
->buffile
, 0, 0, SEEK_SET
))
356 (errcode_for_file_access(),
357 errmsg("could not rewind temporary file")));
361 * Send the backup manifest.
363 bbsink_begin_manifest(sink
);
364 while (manifest_bytes_done
< manifest
->manifest_size
)
366 size_t bytes_to_read
;
368 bytes_to_read
= Min(sink
->bbs_buffer_length
,
369 manifest
->manifest_size
- manifest_bytes_done
);
370 BufFileReadExact(manifest
->buffile
, sink
->bbs_buffer
, bytes_to_read
);
371 bbsink_manifest_contents(sink
, bytes_to_read
);
372 manifest_bytes_done
+= bytes_to_read
;
374 bbsink_end_manifest(sink
);
376 /* Release resources */
377 BufFileClose(manifest
->buffile
);
381 * Append a cstring to the manifest.
384 AppendStringToManifest(backup_manifest_info
*manifest
, const char *s
)
388 Assert(manifest
!= NULL
);
389 if (manifest
->still_checksumming
)
391 if (pg_cryptohash_update(manifest
->manifest_ctx
, (uint8
*) s
, len
) < 0)
392 elog(ERROR
, "failed to update checksum of backup manifest: %s",
393 pg_cryptohash_error(manifest
->manifest_ctx
));
395 BufFileWrite(manifest
->buffile
, s
, len
);
396 manifest
->manifest_size
+= len
;