1 /* reps-strings.c : intepreting representations with respect to strings
3 * ====================================================================
4 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
6 * This software is licensed as described in the file COPYING, which
7 * you should have received as part of this distribution. The terms
8 * are also available at http://subversion.tigris.org/license-1.html.
9 * If newer versions of this license are posted there, you may use a
10 * newer version instead, at your option.
12 * This software consists of voluntary contributions made by many
13 * individuals. For exact contribution history, see the revision
14 * history and logs, available at http://subversion.tigris.org/.
15 * ====================================================================
25 #include "svn_pools.h"
31 #include "reps-strings.h"
33 #include "bdb/reps-table.h"
34 #include "bdb/strings-table.h"
36 #include "../libsvn_fs/fs-loader.h"
37 #include "svn_private_config.h"
40 /*** Helper Functions ***/
43 /* Return non-zero iff REP is mutable under transaction TXN_ID. */
44 static svn_boolean_t
rep_is_mutable(representation_t
*rep
,
47 if ((! rep
->txn_id
) || (strcmp(rep
->txn_id
, txn_id
) != 0))
52 /* Helper macro that evaluates to an error message indicating that
53 the representation referred to by X has an unknown node kind. */
54 #define UNKNOWN_NODE_KIND(x) \
56 (SVN_ERR_FS_CORRUPT, NULL, \
57 _("Unknown node kind for representation '%s'"), x)
59 /* Return a `fulltext' representation, allocated in POOL, which
60 * references the string STR_KEY.
62 * If TXN_ID is non-zero and non-NULL, make the representation mutable
65 * If STR_KEY is non-null, copy it into an allocation from POOL.
67 * If CHECKSUM is non-null, use it as the checksum for the new rep;
68 * else initialize the rep with an all-zero (i.e., always successful)
71 static representation_t
*
72 make_fulltext_rep(const char *str_key
,
74 const unsigned char *checksum
,
78 representation_t
*rep
= apr_pcalloc(pool
, sizeof(*rep
));
79 if (txn_id
&& *txn_id
)
80 rep
->txn_id
= apr_pstrdup(pool
, txn_id
);
81 rep
->kind
= rep_kind_fulltext
;
84 memcpy(rep
->checksum
, checksum
, APR_MD5_DIGESTSIZE
);
86 memset(rep
->checksum
, 0, APR_MD5_DIGESTSIZE
);
88 rep
->contents
.fulltext
.string_key
89 = str_key
? apr_pstrdup(pool
, str_key
) : NULL
;
94 /* Set *KEYS to an array of string keys gleaned from `delta'
95 representation REP. Allocate *KEYS in POOL. */
97 delta_string_keys(apr_array_header_t
**keys
,
98 const representation_t
*rep
,
103 apr_array_header_t
*chunks
;
105 if (rep
->kind
!= rep_kind_delta
)
106 return svn_error_create
107 (SVN_ERR_FS_GENERAL
, NULL
,
108 _("Representation is not of type 'delta'"));
110 /* Set up a convenience variable. */
111 chunks
= rep
->contents
.delta
.chunks
;
113 /* Initialize *KEYS to an empty array. */
114 *keys
= apr_array_make(pool
, chunks
->nelts
, sizeof(key
));
118 /* Now, push the string keys for each window into *KEYS */
119 for (i
= 0; i
< chunks
->nelts
; i
++)
121 rep_delta_chunk_t
*chunk
= APR_ARRAY_IDX(chunks
, i
, rep_delta_chunk_t
*);
123 key
= apr_pstrdup(pool
, chunk
->string_key
);
124 APR_ARRAY_PUSH(*keys
, const char *) = key
;
131 /* Delete the strings associated with array KEYS in FS as part of TRAIL. */
133 delete_strings(apr_array_header_t
*keys
,
140 apr_pool_t
*subpool
= svn_pool_create(pool
);
142 for (i
= 0; i
< keys
->nelts
; i
++)
144 svn_pool_clear(subpool
);
145 str_key
= APR_ARRAY_IDX(keys
, i
, const char *);
146 SVN_ERR(svn_fs_bdb__string_delete(fs
, str_key
, trail
, subpool
));
148 svn_pool_destroy(subpool
);
154 /*** Reading the contents from a representation. ***/
156 struct compose_handler_baton
158 /* The combined window, and the pool it's allocated from. */
159 svn_txdelta_window_t
*window
;
160 apr_pool_t
*window_pool
;
162 /* If the incoming window was self-compressed, and the combined WINDOW
163 exists from previous iterations, SOURCE_BUF will point to the
164 expanded self-compressed window. */
167 /* The trail for this operation. WINDOW_POOL will be a child of
168 TRAIL->pool. No allocations will be made from TRAIL->pool itself. */
171 /* TRUE when no more windows have to be read/combined. */
174 /* TRUE if we've just started reading a new window. We need this
175 because the svndiff handler will push a NULL window at the end of
176 the stream, and we have to ignore that; but we must also know
177 when it's appropriate to push a NULL window at the combiner. */
182 /* Handle one window. If BATON is emtpy, copy the WINDOW into it;
183 otherwise, combine WINDOW with the one in BATON, unless WINDOW
184 is self-compressed (i.e., does not copy from the source view),
185 in which case expand. */
188 compose_handler(svn_txdelta_window_t
*window
, void *baton
)
190 struct compose_handler_baton
*cb
= baton
;
191 assert(!cb
->done
|| window
== NULL
);
192 assert(cb
->trail
&& cb
->trail
->pool
);
194 if (!cb
->init
&& !window
)
197 /* We should never get here if we've already expanded a
198 self-compressed window. */
199 assert(!cb
->source_buf
);
203 if (window
&& (window
->sview_len
== 0 || window
->src_ops
== 0))
205 /* This is a self-compressed window. Don't combine it with
206 the others, because the combiner may go quadratic. Instead,
207 expand it here and signal that the combination has
209 apr_size_t source_len
= window
->tview_len
;
210 assert(cb
->window
->sview_len
== source_len
);
211 cb
->source_buf
= apr_palloc(cb
->window_pool
, source_len
);
212 svn_txdelta_apply_instructions(window
, NULL
,
213 cb
->source_buf
, &source_len
);
218 /* Combine the incoming window with whatever's in the baton. */
219 apr_pool_t
*composite_pool
= svn_pool_create(cb
->trail
->pool
);
220 svn_txdelta_window_t
*composite
;
222 composite
= svn_txdelta_compose_windows(window
, cb
->window
,
224 svn_pool_destroy(cb
->window_pool
);
225 cb
->window
= composite
;
226 cb
->window_pool
= composite_pool
;
227 cb
->done
= (composite
->sview_len
== 0 || composite
->src_ops
== 0);
232 /* Copy the (first) window into the baton. */
233 apr_pool_t
*window_pool
= svn_pool_create(cb
->trail
->pool
);
234 assert(cb
->window_pool
== NULL
);
235 cb
->window
= svn_txdelta_window_dup(window
, window_pool
);
236 cb
->window_pool
= window_pool
;
237 cb
->done
= (window
->sview_len
== 0 || window
->src_ops
== 0);
248 /* Read one delta window from REP[CUR_CHUNK] and push it at the
249 composition handler. */
252 get_one_window(struct compose_handler_baton
*cb
,
254 representation_t
*rep
,
257 svn_stream_t
*wstream
;
258 char diffdata
[4096]; /* hunk of svndiff data */
259 svn_filesize_t off
; /* offset into svndiff data */
260 apr_size_t amt
; /* how much svndiff data to/was read */
263 apr_array_header_t
*chunks
= rep
->contents
.delta
.chunks
;
264 rep_delta_chunk_t
*this_chunk
, *first_chunk
;
267 if (chunks
->nelts
<= cur_chunk
)
268 return compose_handler(NULL
, cb
);
270 /* Set up a window handling stream for the svndiff data. */
271 wstream
= svn_txdelta_parse_svndiff(compose_handler
, cb
, TRUE
,
274 /* First things first: send the "SVN"{version} header through the
275 stream. ### For now, we will just use the version specified
276 in the first chunk, and then verify that no chunks have a
277 different version number than the one used. In the future,
278 we might simply convert chunks that use a different version
279 of the diff format -- or, heck, a different format
280 altogether -- to the format/version of the first chunk. */
281 first_chunk
= APR_ARRAY_IDX(chunks
, 0, rep_delta_chunk_t
*);
285 diffdata
[3] = (char) (first_chunk
->version
);
287 SVN_ERR(svn_stream_write(wstream
, diffdata
, &amt
));
288 /* FIXME: The stream write handler is borked; assert (amt == 4); */
290 /* Get this string key which holds this window's data.
291 ### todo: make sure this is an `svndiff' DIFF skel here. */
292 this_chunk
= APR_ARRAY_IDX(chunks
, cur_chunk
, rep_delta_chunk_t
*);
293 str_key
= this_chunk
->string_key
;
295 /* Run through the svndiff data, at least as far as necessary. */
299 amt
= sizeof(diffdata
);
300 SVN_ERR(svn_fs_bdb__string_read(fs
, str_key
, diffdata
,
301 off
, &amt
, cb
->trail
,
304 SVN_ERR(svn_stream_write(wstream
, diffdata
, &amt
));
307 SVN_ERR(svn_stream_close(wstream
));
310 assert(cb
->window
!= NULL
);
311 assert(cb
->window_pool
!= NULL
);
316 /* Undeltify a range of data. DELTAS is the set of delta windows to
317 combine, FULLTEXT is the source text, CUR_CHUNK is the index of the
318 delta chunk we're starting from. OFFSET is the relative offset of
319 the requested data within the chunk; BUF and LEN are what we're
323 rep_undeltify_range(svn_fs_t
*fs
,
324 apr_array_header_t
*deltas
,
325 representation_t
*fulltext
,
333 apr_size_t len_read
= 0;
337 struct compose_handler_baton cb
= { 0 };
338 char *source_buf
, *target_buf
;
339 apr_size_t target_len
;
344 for (cur_rep
= 0; !cb
.done
&& cur_rep
< deltas
->nelts
; ++cur_rep
)
346 representation_t
*const rep
=
347 APR_ARRAY_IDX(deltas
, cur_rep
, representation_t
*);
348 SVN_ERR(get_one_window(&cb
, fs
, rep
, cur_chunk
));
352 /* That's it, no more source data is available. */
355 /* The source view length should not be 0 if there are source
356 copy ops in the window. */
357 assert(cb
.window
->sview_len
> 0 || cb
.window
->src_ops
== 0);
359 /* cb.window is the combined delta window. Read the source text
363 /* The combiner already created the source text from a
364 self-compressed window. */
365 source_buf
= cb
.source_buf
;
367 else if (fulltext
&& cb
.window
->sview_len
> 0 && cb
.window
->src_ops
> 0)
369 apr_size_t source_len
= cb
.window
->sview_len
;
370 source_buf
= apr_palloc(cb
.window_pool
, source_len
);
371 SVN_ERR(svn_fs_bdb__string_read
372 (fs
, fulltext
->contents
.fulltext
.string_key
,
373 source_buf
, cb
.window
->sview_offset
, &source_len
,
375 if (source_len
!= cb
.window
->sview_len
)
376 return svn_error_create
377 (SVN_ERR_FS_CORRUPT
, NULL
,
378 _("Svndiff source length inconsistency"));
382 source_buf
= NULL
; /* Won't read anything from here. */
387 target_len
= *len
- len_read
+ offset
;
388 target_buf
= apr_palloc(cb
.window_pool
, target_len
);
392 target_len
= *len
- len_read
;
396 svn_txdelta_apply_instructions(cb
.window
, source_buf
,
397 target_buf
, &target_len
);
400 assert(target_len
> offset
);
401 target_len
-= offset
;
402 memcpy(buf
, target_buf
+ offset
, target_len
);
403 offset
= 0; /* Read from the beginning of the next chunk. */
405 /* Don't need this window any more. */
406 svn_pool_destroy(cb
.window_pool
);
408 len_read
+= target_len
;
412 while (len_read
< *len
);
420 /* Calculate the index of the chunk in REP that contains REP_OFFSET,
421 and find the relative CHUNK_OFFSET within the chunk.
422 Return -1 if offset is beyond the end of the represented data.
423 ### The basic assumption is that all delta windows are the same size
424 and aligned at the same offset, so this number is the same in all
425 dependent deltas. Oh, and the chunks in REP must be ordered. */
428 get_chunk_offset(representation_t
*rep
,
429 svn_filesize_t rep_offset
,
430 apr_size_t
*chunk_offset
)
432 const apr_array_header_t
*chunks
= rep
->contents
.delta
.chunks
;
434 assert(chunks
->nelts
);
436 /* ### Yes, this is a linear search. I'll change this to bisection
437 the very second we notice it's slowing us down. */
438 for (cur_chunk
= 0; cur_chunk
< chunks
->nelts
; ++cur_chunk
)
440 const rep_delta_chunk_t
*const this_chunk
441 = APR_ARRAY_IDX(chunks
, cur_chunk
, rep_delta_chunk_t
*);
443 if ((this_chunk
->offset
+ this_chunk
->size
) > rep_offset
)
445 assert(this_chunk
->offset
<= rep_offset
);
446 assert(rep_offset
- this_chunk
->offset
< SVN_MAX_OBJECT_SIZE
);
447 *chunk_offset
= (apr_size_t
) (rep_offset
- this_chunk
->offset
);
455 /* Copy into BUF *LEN bytes starting at OFFSET from the string
456 represented via REP_KEY in FS, as part of TRAIL.
457 The number of bytes actually copied is stored in *LEN. */
459 rep_read_range(svn_fs_t
*fs
,
461 svn_filesize_t offset
,
467 representation_t
*rep
;
468 apr_size_t chunk_offset
;
470 /* Read in our REP. */
471 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
472 if (rep
->kind
== rep_kind_fulltext
)
474 SVN_ERR(svn_fs_bdb__string_read(fs
, rep
->contents
.fulltext
.string_key
,
475 buf
, offset
, len
, trail
, pool
));
477 else if (rep
->kind
== rep_kind_delta
)
479 const int cur_chunk
= get_chunk_offset(rep
, offset
, &chunk_offset
);
485 /* Preserve for potential use in error message. */
486 const char *first_rep_key
= rep_key
;
487 /* Make a list of all the rep's we need to undeltify this range.
488 We'll have to read them within this trail anyway, so we might
489 as well do it once and up front. */
490 apr_array_header_t
*reps
= /* ### what constant here? */
491 apr_array_make(pool
, 666, sizeof(rep
));
494 const rep_delta_chunk_t
*const first_chunk
495 = APR_ARRAY_IDX(rep
->contents
.delta
.chunks
,
496 0, rep_delta_chunk_t
*);
497 const rep_delta_chunk_t
*const chunk
498 = APR_ARRAY_IDX(rep
->contents
.delta
.chunks
,
499 cur_chunk
, rep_delta_chunk_t
*);
501 /* Verify that this chunk is of the same version as the first. */
502 if (first_chunk
->version
!= chunk
->version
)
503 return svn_error_createf
504 (SVN_ERR_FS_CORRUPT
, NULL
,
505 _("Diff version inconsistencies in representation '%s'"),
508 rep_key
= chunk
->rep_key
;
509 APR_ARRAY_PUSH(reps
, representation_t
*) = rep
;
510 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
,
513 while (rep
->kind
== rep_kind_delta
514 && rep
->contents
.delta
.chunks
->nelts
> cur_chunk
);
516 /* Right. We've either just read the fulltext rep, or a rep that's
517 too short, in which case we'll undeltify without source data.*/
518 if (rep
->kind
!= rep_kind_delta
&& rep
->kind
!= rep_kind_fulltext
)
519 return UNKNOWN_NODE_KIND(rep_key
);
521 if (rep
->kind
== rep_kind_delta
)
522 rep
= NULL
; /* Don't use source data */
524 err
= rep_undeltify_range(fs
, reps
, rep
, cur_chunk
, buf
,
525 chunk_offset
, len
, trail
, pool
);
528 if (err
->apr_err
== SVN_ERR_FS_CORRUPT
)
529 return svn_error_createf
530 (SVN_ERR_FS_CORRUPT
, err
,
531 _("Corruption detected whilst reading delta chain from "
532 "representation '%s' to '%s'"), first_rep_key
, rep_key
);
538 else /* unknown kind */
539 return UNKNOWN_NODE_KIND(rep_key
);
546 svn_fs_base__get_mutable_rep(const char **new_rep_key
,
553 representation_t
*rep
= NULL
;
554 const char *new_str
= NULL
;
556 /* We were passed an existing REP_KEY, so examine it. If it is
557 mutable already, then just return REP_KEY as the mutable result
559 if (rep_key
&& (rep_key
[0] != '\0'))
561 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
562 if (rep_is_mutable(rep
, txn_id
))
564 *new_rep_key
= rep_key
;
569 /* Either we weren't provided a base key to examine, or the base key
570 we were provided was not mutable. So, let's make a new
571 representation and return its key to the caller. */
572 SVN_ERR(svn_fs_bdb__string_append(fs
, &new_str
, 0, NULL
, trail
, pool
));
573 rep
= make_fulltext_rep(new_str
, txn_id
,
574 svn_md5_empty_string_digest(), pool
);
575 SVN_ERR(svn_fs_bdb__write_new_rep(new_rep_key
, fs
, rep
, trail
, pool
));
582 svn_fs_base__delete_rep_if_mutable(svn_fs_t
*fs
,
588 representation_t
*rep
;
590 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
591 if (! rep_is_mutable(rep
, txn_id
))
594 if (rep
->kind
== rep_kind_fulltext
)
596 SVN_ERR(svn_fs_bdb__string_delete(fs
,
597 rep
->contents
.fulltext
.string_key
,
600 else if (rep
->kind
== rep_kind_delta
)
602 apr_array_header_t
*keys
;
603 SVN_ERR(delta_string_keys(&keys
, rep
, pool
));
604 SVN_ERR(delete_strings(keys
, fs
, trail
, pool
));
606 else /* unknown kind */
607 return UNKNOWN_NODE_KIND(rep_key
);
609 SVN_ERR(svn_fs_bdb__delete_rep(fs
, rep_key
, trail
, pool
));
615 /*** Reading and writing data via representations. ***/
619 struct rep_read_baton
621 /* The FS from which we're reading. */
624 /* The representation skel whose contents we want to read. If this
625 is NULL, the rep has never had any contents, so all reads fetch 0
628 Formerly, we cached the entire rep skel here, not just the key.
629 That way we didn't have to fetch the rep from the db every time
630 we want to read a little bit more of the file. Unfortunately,
631 this has a problem: if, say, a file's representation changes
632 while we're reading (changes from fulltext to delta, for
633 example), we'll never know it. So for correctness, we now
634 refetch the representation skel every time we want to read
638 /* How many bytes have been read already. */
639 svn_filesize_t offset
;
641 /* If present, the read will be done as part of this trail, and the
642 trail's pool will be used. Otherwise, see `pool' below. */
645 /* MD5 checksum. Initialized when the baton is created, updated as
646 we read data, and finalized when the stream is closed. */
647 struct apr_md5_ctx_t md5_context
;
649 /* The length of the rep's contents (as fulltext, that is,
650 independent of how the rep actually stores the data.) This is
651 retrieved when the baton is created, and used to determine when
652 we have read the last byte, at which point we compare checksums.
654 Getting this at baton creation time makes interleaved reads and
655 writes on the same rep in the same trail impossible. But we're
656 not doing that, and probably no one ever should. And anyway if
657 they do, they should see problems immediately. */
660 /* Set to FALSE when the baton is created, TRUE when the md5_context
662 svn_boolean_t checksum_finalized
;
664 /* Used for temporary allocations, iff `trail' (above) is null. */
671 rep_read_get_baton(struct rep_read_baton
**rb_p
,
674 svn_boolean_t use_trail_for_reads
,
678 struct rep_read_baton
*b
;
680 b
= apr_pcalloc(pool
, sizeof(*b
));
681 apr_md5_init(&(b
->md5_context
));
684 SVN_ERR(svn_fs_base__rep_contents_size(&(b
->size
), fs
, rep_key
,
689 b
->checksum_finalized
= FALSE
;
691 b
->trail
= use_trail_for_reads
? trail
: NULL
;
693 b
->rep_key
= rep_key
;
703 /*** Retrieving data. ***/
706 svn_fs_base__rep_contents_size(svn_filesize_t
*size_p
,
712 representation_t
*rep
;
714 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
716 if (rep
->kind
== rep_kind_fulltext
)
718 /* Get the size by asking Berkeley for the string's length. */
719 SVN_ERR(svn_fs_bdb__string_size(size_p
, fs
,
720 rep
->contents
.fulltext
.string_key
,
723 else if (rep
->kind
== rep_kind_delta
)
725 /* Get the size by finding the last window pkg in the delta and
726 adding its offset to its size. This way, we won't even be
727 messed up by overlapping windows, as long as the window pkgs
728 are still ordered. */
729 apr_array_header_t
*chunks
= rep
->contents
.delta
.chunks
;
730 rep_delta_chunk_t
*last_chunk
;
732 assert(chunks
->nelts
);
734 last_chunk
= APR_ARRAY_IDX(chunks
, chunks
->nelts
- 1,
735 rep_delta_chunk_t
*);
736 *size_p
= last_chunk
->offset
+ last_chunk
->size
;
738 else /* unknown kind */
739 return UNKNOWN_NODE_KIND(rep_key
);
746 svn_fs_base__rep_contents_checksum(unsigned char digest
[],
752 representation_t
*rep
;
754 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
755 memcpy(digest
, rep
->checksum
, APR_MD5_DIGESTSIZE
);
762 svn_fs_base__rep_contents(svn_string_t
*str
,
768 svn_filesize_t contents_size
;
772 SVN_ERR(svn_fs_base__rep_contents_size(&contents_size
, fs
, rep_key
,
775 /* What if the contents are larger than we can handle? */
776 if (contents_size
> SVN_MAX_OBJECT_SIZE
)
777 return svn_error_createf
778 (SVN_ERR_FS_GENERAL
, NULL
,
779 _("Rep contents are too large: "
780 "got %s, limit is %s"),
781 apr_psprintf(pool
, "%" SVN_FILESIZE_T_FMT
, contents_size
),
782 apr_psprintf(pool
, "%" APR_SIZE_T_FMT
, SVN_MAX_OBJECT_SIZE
));
784 str
->len
= (apr_size_t
) contents_size
;
786 data
= apr_palloc(pool
, str
->len
);
789 SVN_ERR(rep_read_range(fs
, rep_key
, 0, data
, &len
, trail
, pool
));
793 return svn_error_createf
794 (SVN_ERR_FS_CORRUPT
, NULL
,
795 _("Failure reading rep '%s'"), rep_key
);
797 /* Just the standard paranoia. */
799 representation_t
*rep
;
800 apr_md5_ctx_t md5_context
;
801 unsigned char checksum
[APR_MD5_DIGESTSIZE
];
803 apr_md5_init(&md5_context
);
804 apr_md5_update(&md5_context
, str
->data
, str
->len
);
805 apr_md5_final(checksum
, &md5_context
);
807 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
808 if (! svn_md5_digests_match(checksum
, rep
->checksum
))
809 return svn_error_createf
810 (SVN_ERR_FS_CORRUPT
, NULL
,
811 _("Checksum mismatch on rep '%s':\n"
813 " actual: %s\n"), rep_key
,
814 svn_md5_digest_to_cstring_display(rep
->checksum
, pool
),
815 svn_md5_digest_to_cstring_display(checksum
, pool
));
824 struct rep_read_baton
*rb
; /* The data source. */
825 char *buf
; /* Where to put what we read. */
826 apr_size_t
*len
; /* How much to read / was read. */
830 /* BATON is of type `read_rep_args':
832 Read into BATON->rb->buf the *(BATON->len) bytes starting at
833 BATON->rb->offset from the data represented at BATON->rb->rep_key
834 in BATON->rb->fs, as part of TRAIL.
836 Afterwards, *(BATON->len) is the number of bytes actually read, and
837 BATON->rb->offset is incremented by that amount.
839 If BATON->rb->rep_key is null, this is assumed to mean the file's
840 contents have no representation, i.e., the file has no contents.
841 In that case, if BATON->rb->offset > 0, return the error
842 SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
845 txn_body_read_rep(void *baton
, trail_t
*trail
)
847 struct read_rep_args
*args
= baton
;
849 if (args
->rb
->rep_key
)
851 SVN_ERR(rep_read_range(args
->rb
->fs
,
859 args
->rb
->offset
+= *(args
->len
);
861 /* We calculate the checksum just once, the moment we see the
862 * last byte of data. But we can't assume there was a short
863 * read. The caller may have known the length of the data and
864 * requested exactly that amount, so there would never be a
865 * short read. (That's why the read baton has to know the
866 * length of the data in advance.)
868 * On the other hand, some callers invoke the stream reader in a
869 * loop whose termination condition is that the read returned
870 * zero bytes of data -- which usually results in the read
871 * function being called one more time *after* the call that got
872 * a short read (indicating end-of-stream).
874 * The conditions below ensure that we compare checksums even
875 * when there is no short read associated with the last byte of
876 * data, while also ensuring that it's harmless to repeatedly
877 * read 0 bytes from the stream.
879 if (! args
->rb
->checksum_finalized
)
881 apr_md5_update(&(args
->rb
->md5_context
), args
->buf
, *(args
->len
));
883 if (args
->rb
->offset
== args
->rb
->size
)
885 representation_t
*rep
;
886 unsigned char checksum
[APR_MD5_DIGESTSIZE
];
888 apr_md5_final(checksum
, &(args
->rb
->md5_context
));
889 args
->rb
->checksum_finalized
= TRUE
;
891 SVN_ERR(svn_fs_bdb__read_rep(&rep
, args
->rb
->fs
,
893 trail
, trail
->pool
));
894 if (! svn_md5_digests_match(checksum
, rep
->checksum
))
895 return svn_error_createf
896 (SVN_ERR_FS_CORRUPT
, NULL
,
897 _("Checksum mismatch on rep '%s':\n"
899 " actual: %s\n"), args
->rb
->rep_key
,
900 svn_md5_digest_to_cstring_display(rep
->checksum
,
902 svn_md5_digest_to_cstring_display(checksum
, trail
->pool
));
906 else if (args
->rb
->offset
> 0)
910 (SVN_ERR_FS_REP_CHANGED
, NULL
,
911 _("Null rep, but offset past zero already"));
921 rep_read_contents(void *baton
, char *buf
, apr_size_t
*len
)
923 struct rep_read_baton
*rb
= baton
;
924 struct read_rep_args args
;
930 /* If we got a trail, use it; else make one. */
932 SVN_ERR(txn_body_read_rep(&args
, rb
->trail
));
935 /* Hey, guess what? trails don't clear their own subpools. In
936 the case of reading from the db, any returned data should
937 live in our pre-allocated buffer, so the whole operation can
938 happen within a single malloc/free cycle. This prevents us
939 from creating millions of unnecessary trail subpools when
940 reading a big file. */
941 apr_pool_t
*subpool
= svn_pool_create(rb
->pool
);
942 SVN_ERR(svn_fs_base__retry_txn(rb
->fs
,
946 svn_pool_destroy(subpool
);
955 struct rep_write_baton
957 /* The FS in which we're writing. */
960 /* The representation skel whose contents we want to write. */
963 /* The transaction id under which this write action will take
967 /* If present, do the write as part of this trail, and use trail's
968 pool. Otherwise, see `pool' below. */
971 /* MD5 checksum. Initialized when the baton is created, updated as
972 we write data, and finalized and stored when the stream is
974 struct apr_md5_ctx_t md5_context
;
975 unsigned char md5_digest
[APR_MD5_DIGESTSIZE
];
976 svn_boolean_t finalized
;
978 /* Used for temporary allocations, iff `trail' (above) is null. */
984 static struct rep_write_baton
*
985 rep_write_get_baton(svn_fs_t
*fs
,
991 struct rep_write_baton
*b
;
993 b
= apr_pcalloc(pool
, sizeof(*b
));
994 apr_md5_init(&(b
->md5_context
));
998 b
->rep_key
= rep_key
;
1005 /* Write LEN bytes from BUF into the end of the string represented via
1006 REP_KEY in FS, as part of TRAIL. If the representation is not
1007 mutable, return the error SVN_FS_REP_NOT_MUTABLE. */
1008 static svn_error_t
*
1009 rep_write(svn_fs_t
*fs
,
1010 const char *rep_key
,
1017 representation_t
*rep
;
1019 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
1021 if (! rep_is_mutable(rep
, txn_id
))
1022 return svn_error_createf
1023 (SVN_ERR_FS_REP_NOT_MUTABLE
, NULL
,
1024 _("Rep '%s' is not mutable"), rep_key
);
1026 if (rep
->kind
== rep_kind_fulltext
)
1028 SVN_ERR(svn_fs_bdb__string_append
1029 (fs
, &(rep
->contents
.fulltext
.string_key
), len
, buf
,
1032 else if (rep
->kind
== rep_kind_delta
)
1034 /* There should never be a case when we have a mutable
1035 non-fulltext rep. The only code that creates mutable reps is
1036 in this file, and it creates them fulltext. */
1037 return svn_error_createf
1038 (SVN_ERR_FS_CORRUPT
, NULL
,
1039 _("Rep '%s' both mutable and non-fulltext"), rep_key
);
1041 else /* unknown kind */
1042 return UNKNOWN_NODE_KIND(rep_key
);
1044 return SVN_NO_ERROR
;
1048 struct write_rep_args
1050 struct rep_write_baton
*wb
; /* Destination. */
1051 const char *buf
; /* Data. */
1052 apr_size_t len
; /* How much to write. */
1056 /* BATON is of type `write_rep_args':
1057 Append onto BATON->wb->rep_key's contents BATON->len bytes of
1058 data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
1060 If the representation is not mutable, return the error
1061 SVN_FS_REP_NOT_MUTABLE. */
1062 static svn_error_t
*
1063 txn_body_write_rep(void *baton
, trail_t
*trail
)
1065 struct write_rep_args
*args
= baton
;
1067 SVN_ERR(rep_write(args
->wb
->fs
,
1075 apr_md5_update(&(args
->wb
->md5_context
), args
->buf
, args
->len
);
1077 return SVN_NO_ERROR
;
1081 static svn_error_t
*
1082 rep_write_contents(void *baton
,
1086 struct rep_write_baton
*wb
= baton
;
1087 struct write_rep_args args
;
1089 /* We toss LEN's indirectness because if not all the bytes are
1090 written, it's an error, so we wouldn't be reporting anything back
1091 through *LEN anyway. */
1096 /* If we got a trail, use it; else make one. */
1098 SVN_ERR(txn_body_write_rep(&args
, wb
->trail
));
1101 /* Hey, guess what? trails don't clear their own subpools. In
1102 the case of simply writing the rep to the db, we're *certain*
1103 that there's no data coming back to us that needs to be
1104 preserved... so the whole operation can happen within a
1105 single malloc/free cycle. This prevents us from creating
1106 millions of unnecessary trail subpools when writing a big
1108 apr_pool_t
*subpool
= svn_pool_create(wb
->pool
);
1109 SVN_ERR(svn_fs_base__retry_txn(wb
->fs
,
1113 svn_pool_destroy(subpool
);
1116 return SVN_NO_ERROR
;
1120 /* Helper for rep_write_close_contents(); see that doc string for
1121 more. BATON is of type `struct rep_write_baton'. */
1122 static svn_error_t
*
1123 txn_body_write_close_rep(void *baton
, trail_t
*trail
)
1125 struct rep_write_baton
*wb
= baton
;
1126 representation_t
*rep
;
1128 SVN_ERR(svn_fs_bdb__read_rep(&rep
, wb
->fs
, wb
->rep_key
,
1129 trail
, trail
->pool
));
1130 memcpy(rep
->checksum
, wb
->md5_digest
, APR_MD5_DIGESTSIZE
);
1131 SVN_ERR(svn_fs_bdb__write_rep(wb
->fs
, wb
->rep_key
, rep
,
1132 trail
, trail
->pool
));
1134 return SVN_NO_ERROR
;
1138 /* BATON is of type `struct rep_write_baton'.
1140 * Finalize BATON->md5_context and store the resulting digest under
1143 static svn_error_t
*
1144 rep_write_close_contents(void *baton
)
1146 struct rep_write_baton
*wb
= baton
;
1148 /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated
1149 digestification, then we wouldn't need a stream close function at
1150 all -- instead, we could update the stored checksum each time a
1151 write occurred, which would have the added advantage of making
1152 interleaving reads and writes work. Currently, they'd fail with
1153 a checksum mismatch, it just happens that our code never tries to
1156 if (! wb
->finalized
)
1158 apr_md5_final(wb
->md5_digest
, &wb
->md5_context
);
1159 wb
->finalized
= TRUE
;
1162 /* If we got a trail, use it; else make one. */
1165 SVN_ERR(txn_body_write_close_rep(wb
, wb
->trail
));
1169 SVN_ERR(svn_fs_base__retry_txn(wb
->fs
,
1170 txn_body_write_close_rep
,
1175 return SVN_NO_ERROR
;
1179 /** Public read and write stream constructors. **/
1182 svn_fs_base__rep_contents_read_stream(svn_stream_t
**rs_p
,
1184 const char *rep_key
,
1185 svn_boolean_t use_trail_for_reads
,
1189 struct rep_read_baton
*rb
;
1191 SVN_ERR(rep_read_get_baton(&rb
, fs
, rep_key
, use_trail_for_reads
,
1193 *rs_p
= svn_stream_create(rb
, pool
);
1194 svn_stream_set_read(*rs_p
, rep_read_contents
);
1196 return SVN_NO_ERROR
;
1200 /* Clear the contents of REP_KEY, so that it represents the empty
1201 string, as part of TRAIL. TXN_ID is the id of the Subversion
1202 transaction under which this occurs. If REP_KEY is not mutable,
1203 return the error SVN_ERR_FS_REP_NOT_MUTABLE. */
1204 static svn_error_t
*
1205 rep_contents_clear(svn_fs_t
*fs
,
1206 const char *rep_key
,
1211 representation_t
*rep
;
1212 const char *str_key
;
1214 SVN_ERR(svn_fs_bdb__read_rep(&rep
, fs
, rep_key
, trail
, pool
));
1216 /* Make sure it's mutable. */
1217 if (! rep_is_mutable(rep
, txn_id
))
1218 return svn_error_createf
1219 (SVN_ERR_FS_REP_NOT_MUTABLE
, NULL
,
1220 _("Rep '%s' is not mutable"), rep_key
);
1222 assert(rep
->kind
== rep_kind_fulltext
);
1224 /* If rep has no string, just return success. Else, clear the
1225 underlying string. */
1226 str_key
= rep
->contents
.fulltext
.string_key
;
1227 if (str_key
&& *str_key
)
1229 SVN_ERR(svn_fs_bdb__string_clear(fs
, str_key
, trail
, pool
));
1230 memcpy(rep
->checksum
, svn_md5_empty_string_digest(),
1231 APR_MD5_DIGESTSIZE
);
1232 SVN_ERR(svn_fs_bdb__write_rep(fs
, rep_key
, rep
, trail
, pool
));
1234 return SVN_NO_ERROR
;
1239 svn_fs_base__rep_contents_write_stream(svn_stream_t
**ws_p
,
1241 const char *rep_key
,
1243 svn_boolean_t use_trail_for_writes
,
1247 struct rep_write_baton
*wb
;
1249 /* Clear the current rep contents (free mutability check!). */
1250 SVN_ERR(rep_contents_clear(fs
, rep_key
, txn_id
, trail
, pool
));
1252 /* Now, generate the write baton and stream. */
1253 wb
= rep_write_get_baton(fs
, rep_key
, txn_id
,
1254 use_trail_for_writes
? trail
: NULL
, pool
);
1255 *ws_p
= svn_stream_create(wb
, pool
);
1256 svn_stream_set_write(*ws_p
, rep_write_contents
);
1257 svn_stream_set_close(*ws_p
, rep_write_close_contents
);
1259 return SVN_NO_ERROR
;
1264 /*** Deltified storage. ***/
1266 /* Baton for svn_write_fn_t write_string_set(). */
1267 struct write_svndiff_strings_baton
1269 /* The fs where lives the string we're writing. */
1272 /* The key of the string we're writing to. Typically this is
1273 initialized to NULL, so svn_fs_base__string_append() can fill in a
1277 /* The amount of txdelta data written to the current
1278 string-in-progress. */
1281 /* The amount of svndiff header information we've written thus far
1282 to the strings table. */
1283 apr_size_t header_read
;
1285 /* The version number of the svndiff data written. ### You'd better
1286 not count on this being populated after the first chunk is sent
1287 through the interface, since it lives at the 4th byte of the
1291 /* The trail we're writing in. */
1297 /* Function of type `svn_write_fn_t', for writing to a collection of
1298 strings; BATON is `struct write_svndiff_strings_baton *'.
1300 On the first call, BATON->key is null. A new string key in
1301 BATON->fs is chosen and stored in BATON->key; each call appends
1302 *LEN bytes from DATA onto the string. *LEN is never changed; if
1303 the write fails to write all *LEN bytes, an error is returned.
1304 BATON->size is used to track the total amount of data written via
1305 this handler, and must be reset by the caller to 0 when appropriate. */
1306 static svn_error_t
*
1307 write_svndiff_strings(void *baton
, const char *data
, apr_size_t
*len
)
1309 struct write_svndiff_strings_baton
*wb
= baton
;
1310 const char *buf
= data
;
1311 apr_size_t nheader
= 0;
1313 /* If we haven't stripped all the header information from this
1314 stream yet, keep stripping. If someone sends a first window
1315 through here that's shorter than 4 bytes long, this will probably
1316 cause a nuclear reactor meltdown somewhere in the American
1318 if (wb
->header_read
< 4)
1320 nheader
= 4 - wb
->header_read
;
1323 wb
->header_read
+= nheader
;
1325 /* If we have *now* read the full 4-byte header, check that
1326 least byte for the version number of the svndiff format. */
1327 if (wb
->header_read
== 4)
1328 wb
->version
= *(buf
- 1);
1331 /* Append to the current string we're writing (or create a new one
1332 if WB->key is NULL). */
1333 SVN_ERR(svn_fs_bdb__string_append(wb
->fs
, &(wb
->key
), *len
,
1334 buf
, wb
->trail
, wb
->trail
->pool
));
1336 /* Make sure we (still) have a key. */
1337 if (wb
->key
== NULL
)
1338 return svn_error_create(SVN_ERR_FS_GENERAL
, NULL
,
1339 _("Failed to get new string key"));
1341 /* Restore *LEN to the value it *would* have been were it not for
1342 header stripping. */
1345 /* Increment our running total of bytes written to this string. */
1348 return SVN_NO_ERROR
;
1352 typedef struct window_write_t
1354 const char *key
; /* string key for this window */
1355 apr_size_t svndiff_len
; /* amount of svndiff data written to the string */
1356 svn_filesize_t text_off
; /* offset of fulltext represented by this window */
1357 apr_size_t text_len
; /* amount of fulltext data represented by this window */
1363 svn_fs_base__rep_deltify(svn_fs_t
*fs
,
1369 base_fs_data_t
*bfd
= fs
->fsap_data
;
1370 svn_stream_t
*source_stream
; /* stream to read the source */
1371 svn_stream_t
*target_stream
; /* stream to read the target */
1372 svn_txdelta_stream_t
*txdelta_stream
; /* stream to read delta windows */
1374 /* window-y things, and an array to track them */
1376 apr_array_header_t
*windows
;
1378 /* stream to write new (deltified) target data and its baton */
1379 svn_stream_t
*new_target_stream
;
1380 struct write_svndiff_strings_baton new_target_baton
;
1382 /* window handler/baton for writing to above stream */
1383 svn_txdelta_window_handler_t new_target_handler
;
1384 void *new_target_handler_baton
;
1386 /* yes, we do windows */
1387 svn_txdelta_window_t
*window
;
1389 /* The current offset into the fulltext that our window is about to
1390 write. This doubles, after all windows are written, as the
1391 total size of the svndiff data for the deltification process. */
1392 svn_filesize_t tview_off
= 0;
1394 /* The total amount of diff data written while deltifying. */
1395 svn_filesize_t diffsize
= 0;
1397 /* TARGET's original string keys */
1398 apr_array_header_t
*orig_str_keys
;
1400 /* The digest for the representation's fulltext contents. */
1401 unsigned char rep_digest
[APR_MD5_DIGESTSIZE
];
1404 const unsigned char *digest
;
1406 /* pool for holding the windows */
1409 /* Paranoia: never allow a rep to be deltified against itself,
1410 because then there would be no fulltext reachable in the delta
1411 chain, and badness would ensue. */
1412 if (strcmp(target
, source
) == 0)
1413 return svn_error_createf
1414 (SVN_ERR_FS_CORRUPT
, NULL
,
1415 _("Attempt to deltify '%s' against itself"),
1418 /* Set up a handler for the svndiff data, which will write each
1419 window to its own string in the `strings' table. */
1420 new_target_baton
.fs
= fs
;
1421 new_target_baton
.trail
= trail
;
1422 new_target_baton
.header_read
= FALSE
;
1423 new_target_stream
= svn_stream_create(&new_target_baton
, pool
);
1424 svn_stream_set_write(new_target_stream
, write_svndiff_strings
);
1426 /* Get streams to our source and target text data. */
1427 SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream
, fs
, source
,
1428 TRUE
, trail
, pool
));
1429 SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream
, fs
, target
,
1430 TRUE
, trail
, pool
));
1432 /* Setup a stream to convert the textdelta data into svndiff windows. */
1433 svn_txdelta(&txdelta_stream
, source_stream
, target_stream
, pool
);
1435 if (bfd
->format
>= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT
)
1436 svn_txdelta_to_svndiff2(&new_target_handler
, &new_target_handler_baton
,
1437 new_target_stream
, 1, pool
);
1439 svn_txdelta_to_svndiff2(&new_target_handler
, &new_target_handler_baton
,
1440 new_target_stream
, 0, pool
);
1442 /* subpool for the windows */
1443 wpool
= svn_pool_create(pool
);
1445 /* Now, loop, manufacturing and dispatching windows of svndiff data. */
1446 windows
= apr_array_make(pool
, 1, sizeof(ww
));
1449 /* Reset some baton variables. */
1450 new_target_baton
.size
= 0;
1451 new_target_baton
.key
= NULL
;
1453 /* Free the window. */
1454 svn_pool_clear(wpool
);
1456 /* Fetch the next window of txdelta data. */
1457 SVN_ERR(svn_txdelta_next_window(&window
, txdelta_stream
, wpool
));
1459 /* Send off this package to be written as svndiff data. */
1460 SVN_ERR(new_target_handler(window
, new_target_handler_baton
));
1463 /* Add a new window description to our array. */
1464 ww
= apr_pcalloc(pool
, sizeof(*ww
));
1465 ww
->key
= new_target_baton
.key
;
1466 ww
->svndiff_len
= new_target_baton
.size
;
1467 ww
->text_off
= tview_off
;
1468 ww
->text_len
= window
->tview_len
;
1469 APR_ARRAY_PUSH(windows
, window_write_t
*) = ww
;
1471 /* Update our recordkeeping variables. */
1472 tview_off
+= window
->tview_len
;
1473 diffsize
+= ww
->svndiff_len
;
1478 svn_pool_destroy(wpool
);
1480 /* Having processed all the windows, we can query the MD5 digest
1482 digest
= svn_txdelta_md5_digest(txdelta_stream
);
1484 return svn_error_createf
1485 (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT
, NULL
,
1486 _("Failed to calculate MD5 digest for '%s'"),
1489 /* Construct a list of the strings used by the old representation so
1490 that we can delete them later. While we are here, if the old
1491 representation was a fulltext, check to make sure the delta we're
1492 replacing it with is actually smaller. (Don't perform this check
1493 if we're replacing a delta; in that case, we're going for a time
1494 optimization, not a space optimization.) */
1496 representation_t
*old_rep
;
1497 const char *str_key
;
1499 SVN_ERR(svn_fs_bdb__read_rep(&old_rep
, fs
, target
, trail
, pool
));
1500 if (old_rep
->kind
== rep_kind_fulltext
)
1502 svn_filesize_t old_size
= 0;
1504 str_key
= old_rep
->contents
.fulltext
.string_key
;
1505 SVN_ERR(svn_fs_bdb__string_size(&old_size
, fs
, str_key
,
1507 orig_str_keys
= apr_array_make(pool
, 1, sizeof(str_key
));
1508 APR_ARRAY_PUSH(orig_str_keys
, const char *) = str_key
;
1510 /* If the new data is NOT an space optimization, destroy the
1511 string(s) we created, and get outta here. */
1512 if (diffsize
>= old_size
)
1515 for (i
= 0; i
< windows
->nelts
; i
++)
1517 ww
= APR_ARRAY_IDX(windows
, i
, window_write_t
*);
1518 SVN_ERR(svn_fs_bdb__string_delete(fs
, ww
->key
, trail
, pool
));
1520 return SVN_NO_ERROR
;
1523 else if (old_rep
->kind
== rep_kind_delta
)
1524 SVN_ERR(delta_string_keys(&orig_str_keys
, old_rep
, pool
));
1525 else /* unknown kind */
1526 return UNKNOWN_NODE_KIND(target
);
1528 /* Save the checksum, since the new rep needs it. */
1529 memcpy(rep_digest
, old_rep
->checksum
, APR_MD5_DIGESTSIZE
);
1532 /* Hook the new strings we wrote into the rest of the filesystem by
1533 building a new representation to replace our old one. */
1535 representation_t new_rep
;
1536 rep_delta_chunk_t
*chunk
;
1537 apr_array_header_t
*chunks
;
1540 new_rep
.kind
= rep_kind_delta
;
1541 new_rep
.txn_id
= NULL
;
1543 /* Migrate the old rep's checksum to the new rep. */
1544 memcpy(new_rep
.checksum
, rep_digest
, APR_MD5_DIGESTSIZE
);
1546 chunks
= apr_array_make(pool
, windows
->nelts
, sizeof(chunk
));
1548 /* Loop through the windows we wrote, creating and adding new
1549 chunks to the representation. */
1550 for (i
= 0; i
< windows
->nelts
; i
++)
1552 ww
= APR_ARRAY_IDX(windows
, i
, window_write_t
*);
1554 /* Allocate a chunk and its window */
1555 chunk
= apr_palloc(pool
, sizeof(*chunk
));
1556 chunk
->offset
= ww
->text_off
;
1558 /* Populate the window */
1559 chunk
->version
= new_target_baton
.version
;
1560 chunk
->string_key
= ww
->key
;
1561 chunk
->size
= ww
->text_len
;
1562 chunk
->rep_key
= source
;
1564 /* Add this chunk to the array. */
1565 APR_ARRAY_PUSH(chunks
, rep_delta_chunk_t
*) = chunk
;
1568 /* Put the chunks array into the representation. */
1569 new_rep
.contents
.delta
.chunks
= chunks
;
1571 /* Write out the new representation. */
1572 SVN_ERR(svn_fs_bdb__write_rep(fs
, target
, &new_rep
, trail
, pool
));
1574 /* Delete the original pre-deltified strings. */
1575 SVN_ERR(delete_strings(orig_str_keys
, fs
, trail
, pool
));
1578 return SVN_NO_ERROR
;