jsonpath scanner: reentrant scanner
[pgsql.git] / src / common / parse_manifest.c
bloba3896eaebe2d556582d9f60cc14e816b3f0d5d94
1 /*-------------------------------------------------------------------------
3 * parse_manifest.c
4 * Parse a backup manifest in JSON format.
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 * src/common/parse_manifest.c
11 *-------------------------------------------------------------------------
14 #include "postgres_fe.h"
16 #include "common/jsonapi.h"
17 #include "common/parse_manifest.h"
20 * Semantic states for JSON manifest parsing.
22 typedef enum
24 JM_EXPECT_TOPLEVEL_START,
25 JM_EXPECT_TOPLEVEL_END,
26 JM_EXPECT_TOPLEVEL_FIELD,
27 JM_EXPECT_VERSION_VALUE,
28 JM_EXPECT_SYSTEM_IDENTIFIER_VALUE,
29 JM_EXPECT_FILES_START,
30 JM_EXPECT_FILES_NEXT,
31 JM_EXPECT_THIS_FILE_FIELD,
32 JM_EXPECT_THIS_FILE_VALUE,
33 JM_EXPECT_WAL_RANGES_START,
34 JM_EXPECT_WAL_RANGES_NEXT,
35 JM_EXPECT_THIS_WAL_RANGE_FIELD,
36 JM_EXPECT_THIS_WAL_RANGE_VALUE,
37 JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
38 JM_EXPECT_EOF,
39 } JsonManifestSemanticState;
42 * Possible fields for one file as described by the manifest.
44 typedef enum
46 JMFF_PATH,
47 JMFF_ENCODED_PATH,
48 JMFF_SIZE,
49 JMFF_LAST_MODIFIED,
50 JMFF_CHECKSUM_ALGORITHM,
51 JMFF_CHECKSUM,
52 } JsonManifestFileField;
55 * Possible fields for one file as described by the manifest.
57 typedef enum
59 JMWRF_TIMELINE,
60 JMWRF_START_LSN,
61 JMWRF_END_LSN,
62 } JsonManifestWALRangeField;
65 * Internal state used while decoding the JSON-format backup manifest.
67 typedef struct
69 JsonManifestParseContext *context;
70 JsonManifestSemanticState state;
72 /* These fields are used for parsing objects in the list of files. */
73 JsonManifestFileField file_field;
74 char *pathname;
75 char *encoded_pathname;
76 char *size;
77 char *algorithm;
78 pg_checksum_type checksum_algorithm;
79 char *checksum;
81 /* These fields are used for parsing objects in the list of WAL ranges. */
82 JsonManifestWALRangeField wal_range_field;
83 char *timeline;
84 char *start_lsn;
85 char *end_lsn;
87 /* Miscellaneous other stuff. */
88 bool saw_version_field;
89 char *manifest_version;
90 char *manifest_system_identifier;
91 char *manifest_checksum;
92 } JsonManifestParseState;
94 /* typedef appears in parse_manifest.h */
95 struct JsonManifestParseIncrementalState
97 JsonLexContext lex;
98 JsonSemAction sem;
99 pg_cryptohash_ctx *manifest_ctx;
102 static JsonParseErrorType json_manifest_object_start(void *state);
103 static JsonParseErrorType json_manifest_object_end(void *state);
104 static JsonParseErrorType json_manifest_array_start(void *state);
105 static JsonParseErrorType json_manifest_array_end(void *state);
106 static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname,
107 bool isnull);
108 static JsonParseErrorType json_manifest_scalar(void *state, char *token,
109 JsonTokenType tokentype);
110 static void json_manifest_finalize_version(JsonManifestParseState *parse);
111 static void json_manifest_finalize_system_identifier(JsonManifestParseState *parse);
112 static void json_manifest_finalize_file(JsonManifestParseState *parse);
113 static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
114 static void verify_manifest_checksum(JsonManifestParseState *parse,
115 const char *buffer, size_t size,
116 pg_cryptohash_ctx *incr_ctx);
117 static void json_manifest_parse_failure(JsonManifestParseContext *context,
118 char *msg);
120 static int hexdecode_char(char c);
121 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
122 static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
125 * Set up for incremental parsing of the manifest.
128 JsonManifestParseIncrementalState *
129 json_parse_manifest_incremental_init(JsonManifestParseContext *context)
131 JsonManifestParseIncrementalState *incstate;
132 JsonManifestParseState *parse;
133 pg_cryptohash_ctx *manifest_ctx;
135 incstate = palloc(sizeof(JsonManifestParseIncrementalState));
136 parse = palloc(sizeof(JsonManifestParseState));
138 parse->context = context;
139 parse->state = JM_EXPECT_TOPLEVEL_START;
140 parse->saw_version_field = false;
142 makeJsonLexContextIncremental(&(incstate->lex), PG_UTF8, true);
144 incstate->sem.semstate = parse;
145 incstate->sem.object_start = json_manifest_object_start;
146 incstate->sem.object_end = json_manifest_object_end;
147 incstate->sem.array_start = json_manifest_array_start;
148 incstate->sem.array_end = json_manifest_array_end;
149 incstate->sem.object_field_start = json_manifest_object_field_start;
150 incstate->sem.object_field_end = NULL;
151 incstate->sem.array_element_start = NULL;
152 incstate->sem.array_element_end = NULL;
153 incstate->sem.scalar = json_manifest_scalar;
155 manifest_ctx = pg_cryptohash_create(PG_SHA256);
156 if (manifest_ctx == NULL)
157 context->error_cb(context, "out of memory");
158 if (pg_cryptohash_init(manifest_ctx) < 0)
159 context->error_cb(context, "could not initialize checksum of manifest");
160 incstate->manifest_ctx = manifest_ctx;
162 return incstate;
166 * Free an incremental state object and its contents.
168 void
169 json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
171 pfree(incstate->sem.semstate);
172 freeJsonLexContext(&(incstate->lex));
173 /* incstate->manifest_ctx has already been freed */
174 pfree(incstate);
178 * parse the manifest in pieces.
180 * The caller must ensure that the final piece contains the final lines
181 * with the complete checksum.
184 void
185 json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate,
186 const char *chunk, size_t size, bool is_last)
188 JsonParseErrorType res,
189 expected;
190 JsonManifestParseState *parse = incstate->sem.semstate;
191 JsonManifestParseContext *context = parse->context;
193 res = pg_parse_json_incremental(&(incstate->lex), &(incstate->sem),
194 chunk, size, is_last);
196 expected = is_last ? JSON_SUCCESS : JSON_INCOMPLETE;
198 if (res != expected)
199 json_manifest_parse_failure(context,
200 json_errdetail(res, &(incstate->lex)));
202 if (is_last && parse->state != JM_EXPECT_EOF)
203 json_manifest_parse_failure(context, "manifest ended unexpectedly");
205 if (!is_last)
207 if (pg_cryptohash_update(incstate->manifest_ctx,
208 (const uint8 *) chunk, size) < 0)
209 context->error_cb(context, "could not update checksum of manifest");
211 else
213 verify_manifest_checksum(parse, chunk, size, incstate->manifest_ctx);
219 * Main entrypoint to parse a JSON-format backup manifest.
221 * Caller should set up the parsing context and then invoke this function.
222 * For each file whose information is extracted from the manifest,
223 * context->per_file_cb is invoked. In case of trouble, context->error_cb is
224 * invoked and is expected not to return.
226 void
227 json_parse_manifest(JsonManifestParseContext *context, const char *buffer,
228 size_t size)
230 JsonLexContext *lex;
231 JsonParseErrorType json_error;
232 JsonSemAction sem;
233 JsonManifestParseState parse;
235 /* Set up our private parsing context. */
236 parse.context = context;
237 parse.state = JM_EXPECT_TOPLEVEL_START;
238 parse.saw_version_field = false;
240 /* Create a JSON lexing context. */
241 lex = makeJsonLexContextCstringLen(NULL, buffer, size, PG_UTF8, true);
243 /* Set up semantic actions. */
244 sem.semstate = &parse;
245 sem.object_start = json_manifest_object_start;
246 sem.object_end = json_manifest_object_end;
247 sem.array_start = json_manifest_array_start;
248 sem.array_end = json_manifest_array_end;
249 sem.object_field_start = json_manifest_object_field_start;
250 sem.object_field_end = NULL;
251 sem.array_element_start = NULL;
252 sem.array_element_end = NULL;
253 sem.scalar = json_manifest_scalar;
255 /* Run the actual JSON parser. */
256 json_error = pg_parse_json(lex, &sem);
257 if (json_error != JSON_SUCCESS)
258 json_manifest_parse_failure(context, json_errdetail(json_error, lex));
259 if (parse.state != JM_EXPECT_EOF)
260 json_manifest_parse_failure(context, "manifest ended unexpectedly");
262 /* Verify the manifest checksum. */
263 verify_manifest_checksum(&parse, buffer, size, NULL);
265 freeJsonLexContext(lex);
269 * Invoked at the start of each object in the JSON document.
271 * The document as a whole is expected to be an object; each file and each
272 * WAL range is also expected to be an object. If we're anywhere else in the
273 * document, it's an error.
275 static JsonParseErrorType
276 json_manifest_object_start(void *state)
278 JsonManifestParseState *parse = state;
280 switch (parse->state)
282 case JM_EXPECT_TOPLEVEL_START:
283 parse->state = JM_EXPECT_TOPLEVEL_FIELD;
284 break;
285 case JM_EXPECT_FILES_NEXT:
286 parse->state = JM_EXPECT_THIS_FILE_FIELD;
287 parse->pathname = NULL;
288 parse->encoded_pathname = NULL;
289 parse->size = NULL;
290 parse->algorithm = NULL;
291 parse->checksum = NULL;
292 break;
293 case JM_EXPECT_WAL_RANGES_NEXT:
294 parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
295 parse->timeline = NULL;
296 parse->start_lsn = NULL;
297 parse->end_lsn = NULL;
298 break;
299 default:
300 json_manifest_parse_failure(parse->context,
301 "unexpected object start");
302 break;
305 return JSON_SUCCESS;
309 * Invoked at the end of each object in the JSON document.
311 * The possible cases here are the same as for json_manifest_object_start.
312 * There's nothing special to do at the end of the document, but when we
313 * reach the end of an object representing a particular file or WAL range,
314 * we must call json_manifest_finalize_file() to save the associated details.
316 static JsonParseErrorType
317 json_manifest_object_end(void *state)
319 JsonManifestParseState *parse = state;
321 switch (parse->state)
323 case JM_EXPECT_TOPLEVEL_END:
324 parse->state = JM_EXPECT_EOF;
325 break;
326 case JM_EXPECT_THIS_FILE_FIELD:
327 json_manifest_finalize_file(parse);
328 parse->state = JM_EXPECT_FILES_NEXT;
329 break;
330 case JM_EXPECT_THIS_WAL_RANGE_FIELD:
331 json_manifest_finalize_wal_range(parse);
332 parse->state = JM_EXPECT_WAL_RANGES_NEXT;
333 break;
334 default:
335 json_manifest_parse_failure(parse->context,
336 "unexpected object end");
337 break;
340 return JSON_SUCCESS;
344 * Invoked at the start of each array in the JSON document.
346 * Within the toplevel object, the value associated with the "Files" key
347 * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
348 * are expected.
350 static JsonParseErrorType
351 json_manifest_array_start(void *state)
353 JsonManifestParseState *parse = state;
355 switch (parse->state)
357 case JM_EXPECT_FILES_START:
358 parse->state = JM_EXPECT_FILES_NEXT;
359 break;
360 case JM_EXPECT_WAL_RANGES_START:
361 parse->state = JM_EXPECT_WAL_RANGES_NEXT;
362 break;
363 default:
364 json_manifest_parse_failure(parse->context,
365 "unexpected array start");
366 break;
369 return JSON_SUCCESS;
373 * Invoked at the end of each array in the JSON document.
375 * The cases here are analogous to those in json_manifest_array_start.
377 static JsonParseErrorType
378 json_manifest_array_end(void *state)
380 JsonManifestParseState *parse = state;
382 switch (parse->state)
384 case JM_EXPECT_FILES_NEXT:
385 case JM_EXPECT_WAL_RANGES_NEXT:
386 parse->state = JM_EXPECT_TOPLEVEL_FIELD;
387 break;
388 default:
389 json_manifest_parse_failure(parse->context,
390 "unexpected array end");
391 break;
394 return JSON_SUCCESS;
398 * Invoked at the start of each object field in the JSON document.
400 static JsonParseErrorType
401 json_manifest_object_field_start(void *state, char *fname, bool isnull)
403 JsonManifestParseState *parse = state;
405 switch (parse->state)
407 case JM_EXPECT_TOPLEVEL_FIELD:
410 * Inside toplevel object. The version indicator should always be
411 * the first field.
413 if (!parse->saw_version_field)
415 if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
416 json_manifest_parse_failure(parse->context,
417 "expected version indicator");
418 parse->state = JM_EXPECT_VERSION_VALUE;
419 parse->saw_version_field = true;
420 break;
423 /* Is this the system identifier? */
424 if (strcmp(fname, "System-Identifier") == 0)
426 parse->state = JM_EXPECT_SYSTEM_IDENTIFIER_VALUE;
427 break;
430 /* Is this the list of files? */
431 if (strcmp(fname, "Files") == 0)
433 parse->state = JM_EXPECT_FILES_START;
434 break;
437 /* Is this the list of WAL ranges? */
438 if (strcmp(fname, "WAL-Ranges") == 0)
440 parse->state = JM_EXPECT_WAL_RANGES_START;
441 break;
444 /* Is this the manifest checksum? */
445 if (strcmp(fname, "Manifest-Checksum") == 0)
447 parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
448 break;
451 /* It's not a field we recognize. */
452 json_manifest_parse_failure(parse->context,
453 "unrecognized top-level field");
454 break;
456 case JM_EXPECT_THIS_FILE_FIELD:
457 /* Inside object for one file; which key have we got? */
458 if (strcmp(fname, "Path") == 0)
459 parse->file_field = JMFF_PATH;
460 else if (strcmp(fname, "Encoded-Path") == 0)
461 parse->file_field = JMFF_ENCODED_PATH;
462 else if (strcmp(fname, "Size") == 0)
463 parse->file_field = JMFF_SIZE;
464 else if (strcmp(fname, "Last-Modified") == 0)
465 parse->file_field = JMFF_LAST_MODIFIED;
466 else if (strcmp(fname, "Checksum-Algorithm") == 0)
467 parse->file_field = JMFF_CHECKSUM_ALGORITHM;
468 else if (strcmp(fname, "Checksum") == 0)
469 parse->file_field = JMFF_CHECKSUM;
470 else
471 json_manifest_parse_failure(parse->context,
472 "unexpected file field");
473 parse->state = JM_EXPECT_THIS_FILE_VALUE;
474 break;
476 case JM_EXPECT_THIS_WAL_RANGE_FIELD:
477 /* Inside object for one file; which key have we got? */
478 if (strcmp(fname, "Timeline") == 0)
479 parse->wal_range_field = JMWRF_TIMELINE;
480 else if (strcmp(fname, "Start-LSN") == 0)
481 parse->wal_range_field = JMWRF_START_LSN;
482 else if (strcmp(fname, "End-LSN") == 0)
483 parse->wal_range_field = JMWRF_END_LSN;
484 else
485 json_manifest_parse_failure(parse->context,
486 "unexpected WAL range field");
487 parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
488 break;
490 default:
491 json_manifest_parse_failure(parse->context,
492 "unexpected object field");
493 break;
496 pfree(fname);
498 return JSON_SUCCESS;
502 * Invoked at the start of each scalar in the JSON document.
504 * Object field names don't reach this code; those are handled by
505 * json_manifest_object_field_start. When we're inside of the object for
506 * a particular file or WAL range, that function will have noticed the name
507 * of the field, and we'll get the corresponding value here. When we're in
508 * the toplevel object, the parse state itself tells us which field this is.
510 * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
511 * can just check on the spot, the goal here is just to save the value in
512 * the parse state for later use. We don't actually do anything until we
513 * reach either the end of the object representing this file, or the end
514 * of the manifest, as the case may be.
516 static JsonParseErrorType
517 json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
519 JsonManifestParseState *parse = state;
521 switch (parse->state)
523 case JM_EXPECT_VERSION_VALUE:
524 parse->manifest_version = token;
525 json_manifest_finalize_version(parse);
526 parse->state = JM_EXPECT_TOPLEVEL_FIELD;
527 break;
529 case JM_EXPECT_SYSTEM_IDENTIFIER_VALUE:
530 parse->manifest_system_identifier = token;
531 json_manifest_finalize_system_identifier(parse);
532 parse->state = JM_EXPECT_TOPLEVEL_FIELD;
533 break;
535 case JM_EXPECT_THIS_FILE_VALUE:
536 switch (parse->file_field)
538 case JMFF_PATH:
539 parse->pathname = token;
540 break;
541 case JMFF_ENCODED_PATH:
542 parse->encoded_pathname = token;
543 break;
544 case JMFF_SIZE:
545 parse->size = token;
546 break;
547 case JMFF_LAST_MODIFIED:
548 pfree(token); /* unused */
549 break;
550 case JMFF_CHECKSUM_ALGORITHM:
551 parse->algorithm = token;
552 break;
553 case JMFF_CHECKSUM:
554 parse->checksum = token;
555 break;
557 parse->state = JM_EXPECT_THIS_FILE_FIELD;
558 break;
560 case JM_EXPECT_THIS_WAL_RANGE_VALUE:
561 switch (parse->wal_range_field)
563 case JMWRF_TIMELINE:
564 parse->timeline = token;
565 break;
566 case JMWRF_START_LSN:
567 parse->start_lsn = token;
568 break;
569 case JMWRF_END_LSN:
570 parse->end_lsn = token;
571 break;
573 parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
574 break;
576 case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
577 parse->state = JM_EXPECT_TOPLEVEL_END;
578 parse->manifest_checksum = token;
579 break;
581 default:
582 json_manifest_parse_failure(parse->context, "unexpected scalar");
583 break;
586 return JSON_SUCCESS;
590 * Do additional parsing and sanity-checking of the manifest version, and invoke
591 * the callback so that the caller can gets that detail and take actions
592 * accordingly. This happens for each manifest when the corresponding JSON
593 * object is completely parsed.
595 static void
596 json_manifest_finalize_version(JsonManifestParseState *parse)
598 JsonManifestParseContext *context = parse->context;
599 int version;
600 char *ep;
602 Assert(parse->saw_version_field);
604 /* Parse version. */
605 version = strtoi64(parse->manifest_version, &ep, 10);
606 if (*ep)
607 json_manifest_parse_failure(parse->context,
608 "manifest version not an integer");
610 if (version != 1 && version != 2)
611 json_manifest_parse_failure(parse->context,
612 "unexpected manifest version");
614 /* Invoke the callback for version */
615 context->version_cb(context, version);
619 * Do additional parsing and sanity-checking of the system identifier, and
620 * invoke the callback so that the caller can gets that detail and take actions
621 * accordingly.
623 static void
624 json_manifest_finalize_system_identifier(JsonManifestParseState *parse)
626 JsonManifestParseContext *context = parse->context;
627 uint64 system_identifier;
628 char *ep;
630 Assert(parse->manifest_system_identifier != NULL);
632 /* Parse system identifier. */
633 system_identifier = strtou64(parse->manifest_system_identifier, &ep, 10);
634 if (*ep)
635 json_manifest_parse_failure(parse->context,
636 "system identifier in manifest not an integer");
638 /* Invoke the callback for system identifier */
639 context->system_identifier_cb(context, system_identifier);
643 * Do additional parsing and sanity-checking of the details gathered for one
644 * file, and invoke the per-file callback so that the caller gets those
645 * details. This happens for each file when the corresponding JSON object is
646 * completely parsed.
648 static void
649 json_manifest_finalize_file(JsonManifestParseState *parse)
651 JsonManifestParseContext *context = parse->context;
652 uint64 size;
653 char *ep;
654 int checksum_string_length;
655 pg_checksum_type checksum_type;
656 int checksum_length;
657 uint8 *checksum_payload;
659 /* Pathname and size are required. */
660 if (parse->pathname == NULL && parse->encoded_pathname == NULL)
661 json_manifest_parse_failure(parse->context, "missing path name");
662 if (parse->pathname != NULL && parse->encoded_pathname != NULL)
663 json_manifest_parse_failure(parse->context,
664 "both path name and encoded path name");
665 if (parse->size == NULL)
666 json_manifest_parse_failure(parse->context, "missing size");
667 if (parse->algorithm == NULL && parse->checksum != NULL)
668 json_manifest_parse_failure(parse->context,
669 "checksum without algorithm");
671 /* Decode encoded pathname, if that's what we have. */
672 if (parse->encoded_pathname != NULL)
674 int encoded_length = strlen(parse->encoded_pathname);
675 int raw_length = encoded_length / 2;
677 parse->pathname = palloc(raw_length + 1);
678 if (encoded_length % 2 != 0 ||
679 !hexdecode_string((uint8 *) parse->pathname,
680 parse->encoded_pathname,
681 raw_length))
682 json_manifest_parse_failure(parse->context,
683 "could not decode file name");
684 parse->pathname[raw_length] = '\0';
685 pfree(parse->encoded_pathname);
686 parse->encoded_pathname = NULL;
689 /* Parse size. */
690 size = strtou64(parse->size, &ep, 10);
691 if (*ep)
692 json_manifest_parse_failure(parse->context,
693 "file size is not an integer");
695 /* Parse the checksum algorithm, if it's present. */
696 if (parse->algorithm == NULL)
697 checksum_type = CHECKSUM_TYPE_NONE;
698 else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
699 context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
700 parse->algorithm);
702 /* Parse the checksum payload, if it's present. */
703 checksum_string_length = parse->checksum == NULL ? 0
704 : strlen(parse->checksum);
705 if (checksum_string_length == 0)
707 checksum_length = 0;
708 checksum_payload = NULL;
710 else
712 checksum_length = checksum_string_length / 2;
713 checksum_payload = palloc(checksum_length);
714 if (checksum_string_length % 2 != 0 ||
715 !hexdecode_string(checksum_payload, parse->checksum,
716 checksum_length))
717 context->error_cb(context,
718 "invalid checksum for file \"%s\": \"%s\"",
719 parse->pathname, parse->checksum);
722 /* Invoke the callback with the details we've gathered. */
723 context->per_file_cb(context, parse->pathname, size,
724 checksum_type, checksum_length, checksum_payload);
726 /* Free memory we no longer need. */
727 if (parse->size != NULL)
729 pfree(parse->size);
730 parse->size = NULL;
732 if (parse->algorithm != NULL)
734 pfree(parse->algorithm);
735 parse->algorithm = NULL;
737 if (parse->checksum != NULL)
739 pfree(parse->checksum);
740 parse->checksum = NULL;
745 * Do additional parsing and sanity-checking of the details gathered for one
746 * WAL range, and invoke the per-WAL-range callback so that the caller gets
747 * those details. This happens for each WAL range when the corresponding JSON
748 * object is completely parsed.
750 static void
751 json_manifest_finalize_wal_range(JsonManifestParseState *parse)
753 JsonManifestParseContext *context = parse->context;
754 TimeLineID tli;
755 XLogRecPtr start_lsn,
756 end_lsn;
757 char *ep;
759 /* Make sure all fields are present. */
760 if (parse->timeline == NULL)
761 json_manifest_parse_failure(parse->context, "missing timeline");
762 if (parse->start_lsn == NULL)
763 json_manifest_parse_failure(parse->context, "missing start LSN");
764 if (parse->end_lsn == NULL)
765 json_manifest_parse_failure(parse->context, "missing end LSN");
767 /* Parse timeline. */
768 tli = strtoul(parse->timeline, &ep, 10);
769 if (*ep)
770 json_manifest_parse_failure(parse->context,
771 "timeline is not an integer");
772 if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
773 json_manifest_parse_failure(parse->context,
774 "could not parse start LSN");
775 if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
776 json_manifest_parse_failure(parse->context,
777 "could not parse end LSN");
779 /* Invoke the callback with the details we've gathered. */
780 context->per_wal_range_cb(context, tli, start_lsn, end_lsn);
782 /* Free memory we no longer need. */
783 if (parse->timeline != NULL)
785 pfree(parse->timeline);
786 parse->timeline = NULL;
788 if (parse->start_lsn != NULL)
790 pfree(parse->start_lsn);
791 parse->start_lsn = NULL;
793 if (parse->end_lsn != NULL)
795 pfree(parse->end_lsn);
796 parse->end_lsn = NULL;
801 * Verify that the manifest checksum is correct.
803 * The last line of the manifest file is excluded from the manifest checksum,
804 * because the last line is expected to contain the checksum that covers
805 * the rest of the file.
807 * For an incremental parse, this will just be called on the last chunk of the
808 * manifest, and the cryptohash context passed in. For a non-incremental
809 * parse incr_ctx will be NULL.
811 static void
812 verify_manifest_checksum(JsonManifestParseState *parse, const char *buffer,
813 size_t size, pg_cryptohash_ctx *incr_ctx)
815 JsonManifestParseContext *context = parse->context;
816 size_t i;
817 size_t number_of_newlines = 0;
818 size_t ultimate_newline = 0;
819 size_t penultimate_newline = 0;
820 pg_cryptohash_ctx *manifest_ctx;
821 uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
822 uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
824 /* Find the last two newlines in the file. */
825 for (i = 0; i < size; ++i)
827 if (buffer[i] == '\n')
829 ++number_of_newlines;
830 penultimate_newline = ultimate_newline;
831 ultimate_newline = i;
836 * Make sure that the last newline is right at the end, and that there are
837 * at least two lines total. We need this to be true in order for the
838 * following code, which computes the manifest checksum, to work properly.
840 if (number_of_newlines < 2)
841 json_manifest_parse_failure(parse->context,
842 "expected at least 2 lines");
843 if (ultimate_newline != size - 1)
844 json_manifest_parse_failure(parse->context,
845 "last line not newline-terminated");
847 /* Checksum the rest. */
848 if (incr_ctx == NULL)
850 manifest_ctx = pg_cryptohash_create(PG_SHA256);
851 if (manifest_ctx == NULL)
852 context->error_cb(context, "out of memory");
853 if (pg_cryptohash_init(manifest_ctx) < 0)
854 context->error_cb(context, "could not initialize checksum of manifest");
856 else
858 manifest_ctx = incr_ctx;
860 if (pg_cryptohash_update(manifest_ctx, (const uint8 *) buffer, penultimate_newline + 1) < 0)
861 context->error_cb(context, "could not update checksum of manifest");
862 if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
863 sizeof(manifest_checksum_actual)) < 0)
864 context->error_cb(context, "could not finalize checksum of manifest");
866 /* Now verify it. */
867 if (parse->manifest_checksum == NULL)
868 context->error_cb(parse->context, "manifest has no checksum");
869 if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
870 !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
871 PG_SHA256_DIGEST_LENGTH))
872 context->error_cb(context, "invalid manifest checksum: \"%s\"",
873 parse->manifest_checksum);
874 if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
875 PG_SHA256_DIGEST_LENGTH) != 0)
876 context->error_cb(context, "manifest checksum mismatch");
877 pg_cryptohash_free(manifest_ctx);
881 * Report a parse error.
883 * This is intended to be used for fairly low-level failures that probably
884 * shouldn't occur unless somebody has deliberately constructed a bad manifest,
885 * or unless the server is generating bad manifests due to some bug. msg should
886 * be a short string giving some hint as to what the problem is.
888 static void
889 json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
891 context->error_cb(context, "could not parse backup manifest: %s", msg);
895 * Convert a character which represents a hexadecimal digit to an integer.
897 * Returns -1 if the character is not a hexadecimal digit.
899 static int
900 hexdecode_char(char c)
902 if (c >= '0' && c <= '9')
903 return c - '0';
904 if (c >= 'a' && c <= 'f')
905 return c - 'a' + 10;
906 if (c >= 'A' && c <= 'F')
907 return c - 'A' + 10;
909 return -1;
913 * Decode a hex string into a byte string, 2 hex chars per byte.
915 * Returns false if invalid characters are encountered; otherwise true.
917 static bool
918 hexdecode_string(uint8 *result, char *input, int nbytes)
920 int i;
922 for (i = 0; i < nbytes; ++i)
924 int n1 = hexdecode_char(input[i * 2]);
925 int n2 = hexdecode_char(input[i * 2 + 1]);
927 if (n1 < 0 || n2 < 0)
928 return false;
929 result[i] = n1 * 16 + n2;
932 return true;
936 * Parse an XLogRecPtr expressed using the usual string format.
938 static bool
939 parse_xlogrecptr(XLogRecPtr *result, char *input)
941 uint32 hi;
942 uint32 lo;
944 if (sscanf(input, "%X/%X", &hi, &lo) != 2)
945 return false;
946 *result = ((uint64) hi) << 32 | lo;
947 return true;