Mark many merge tests as skip-against-old-server.
[svn.git] / subversion / libsvn_fs_base / reps-strings.c
blob33863a9be8eedbb3c1c1af9ada112d2a88951bff
1 /* reps-strings.c : intepreting representations with respect to strings
3 * ====================================================================
4 * Copyright (c) 2000-2007 CollabNet. All rights reserved.
6 * This software is licensed as described in the file COPYING, which
7 * you should have received as part of this distribution. The terms
8 * are also available at http://subversion.tigris.org/license-1.html.
9 * If newer versions of this license are posted there, you may use a
10 * newer version instead, at your option.
12 * This software consists of voluntary contributions made by many
13 * individuals. For exact contribution history, see the revision
14 * history and logs, available at http://subversion.tigris.org/.
15 * ====================================================================
18 #include <assert.h>
19 #include <apr_md5.h>
21 #define APU_WANT_DB
22 #include <apu_want.h>
24 #include "svn_fs.h"
25 #include "svn_pools.h"
26 #include "svn_md5.h"
28 #include "fs.h"
29 #include "err.h"
30 #include "trail.h"
31 #include "reps-strings.h"
33 #include "bdb/reps-table.h"
34 #include "bdb/strings-table.h"
36 #include "../libsvn_fs/fs-loader.h"
37 #include "svn_private_config.h"
40 /*** Helper Functions ***/
43 /* Return non-zero iff REP is mutable under transaction TXN_ID. */
44 static svn_boolean_t rep_is_mutable(representation_t *rep,
45 const char *txn_id)
47 if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0))
48 return FALSE;
49 return TRUE;
52 /* Helper macro that evaluates to an error message indicating that
53 the representation referred to by X has an unknown node kind. */
54 #define UNKNOWN_NODE_KIND(x) \
55 svn_error_createf \
56 (SVN_ERR_FS_CORRUPT, NULL, \
57 _("Unknown node kind for representation '%s'"), x)
59 /* Return a `fulltext' representation, allocated in POOL, which
60 * references the string STR_KEY.
62 * If TXN_ID is non-zero and non-NULL, make the representation mutable
63 * under that TXN_ID.
65 * If STR_KEY is non-null, copy it into an allocation from POOL.
67 * If CHECKSUM is non-null, use it as the checksum for the new rep;
68 * else initialize the rep with an all-zero (i.e., always successful)
69 * checksum.
71 static representation_t *
72 make_fulltext_rep(const char *str_key,
73 const char *txn_id,
74 const unsigned char *checksum,
75 apr_pool_t *pool)
78 representation_t *rep = apr_pcalloc(pool, sizeof(*rep));
79 if (txn_id && *txn_id)
80 rep->txn_id = apr_pstrdup(pool, txn_id);
81 rep->kind = rep_kind_fulltext;
83 if (checksum)
84 memcpy(rep->checksum, checksum, APR_MD5_DIGESTSIZE);
85 else
86 memset(rep->checksum, 0, APR_MD5_DIGESTSIZE);
88 rep->contents.fulltext.string_key
89 = str_key ? apr_pstrdup(pool, str_key) : NULL;
90 return rep;
94 /* Set *KEYS to an array of string keys gleaned from `delta'
95 representation REP. Allocate *KEYS in POOL. */
96 static svn_error_t *
97 delta_string_keys(apr_array_header_t **keys,
98 const representation_t *rep,
99 apr_pool_t *pool)
101 const char *key;
102 int i;
103 apr_array_header_t *chunks;
105 if (rep->kind != rep_kind_delta)
106 return svn_error_create
107 (SVN_ERR_FS_GENERAL, NULL,
108 _("Representation is not of type 'delta'"));
110 /* Set up a convenience variable. */
111 chunks = rep->contents.delta.chunks;
113 /* Initialize *KEYS to an empty array. */
114 *keys = apr_array_make(pool, chunks->nelts, sizeof(key));
115 if (! chunks->nelts)
116 return SVN_NO_ERROR;
118 /* Now, push the string keys for each window into *KEYS */
119 for (i = 0; i < chunks->nelts; i++)
121 rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *);
123 key = apr_pstrdup(pool, chunk->string_key);
124 APR_ARRAY_PUSH(*keys, const char *) = key;
127 return SVN_NO_ERROR;
131 /* Delete the strings associated with array KEYS in FS as part of TRAIL. */
132 static svn_error_t *
133 delete_strings(apr_array_header_t *keys,
134 svn_fs_t *fs,
135 trail_t *trail,
136 apr_pool_t *pool)
138 int i;
139 const char *str_key;
140 apr_pool_t *subpool = svn_pool_create(pool);
142 for (i = 0; i < keys->nelts; i++)
144 svn_pool_clear(subpool);
145 str_key = APR_ARRAY_IDX(keys, i, const char *);
146 SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool));
148 svn_pool_destroy(subpool);
149 return SVN_NO_ERROR;
154 /*** Reading the contents from a representation. ***/
156 struct compose_handler_baton
158 /* The combined window, and the pool it's allocated from. */
159 svn_txdelta_window_t *window;
160 apr_pool_t *window_pool;
162 /* If the incoming window was self-compressed, and the combined WINDOW
163 exists from previous iterations, SOURCE_BUF will point to the
164 expanded self-compressed window. */
165 char *source_buf;
167 /* The trail for this operation. WINDOW_POOL will be a child of
168 TRAIL->pool. No allocations will be made from TRAIL->pool itself. */
169 trail_t *trail;
171 /* TRUE when no more windows have to be read/combined. */
172 svn_boolean_t done;
174 /* TRUE if we've just started reading a new window. We need this
175 because the svndiff handler will push a NULL window at the end of
176 the stream, and we have to ignore that; but we must also know
177 when it's appropriate to push a NULL window at the combiner. */
178 svn_boolean_t init;
182 /* Handle one window. If BATON is emtpy, copy the WINDOW into it;
183 otherwise, combine WINDOW with the one in BATON, unless WINDOW
184 is self-compressed (i.e., does not copy from the source view),
185 in which case expand. */
187 static svn_error_t *
188 compose_handler(svn_txdelta_window_t *window, void *baton)
190 struct compose_handler_baton *cb = baton;
191 assert(!cb->done || window == NULL);
192 assert(cb->trail && cb->trail->pool);
194 if (!cb->init && !window)
195 return SVN_NO_ERROR;
197 /* We should never get here if we've already expanded a
198 self-compressed window. */
199 assert(!cb->source_buf);
201 if (cb->window)
203 if (window && (window->sview_len == 0 || window->src_ops == 0))
205 /* This is a self-compressed window. Don't combine it with
206 the others, because the combiner may go quadratic. Instead,
207 expand it here and signal that the combination has
208 ended. */
209 apr_size_t source_len = window->tview_len;
210 assert(cb->window->sview_len == source_len);
211 cb->source_buf = apr_palloc(cb->window_pool, source_len);
212 svn_txdelta_apply_instructions(window, NULL,
213 cb->source_buf, &source_len);
214 cb->done = TRUE;
216 else
218 /* Combine the incoming window with whatever's in the baton. */
219 apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool);
220 svn_txdelta_window_t *composite;
222 composite = svn_txdelta_compose_windows(window, cb->window,
223 composite_pool);
224 svn_pool_destroy(cb->window_pool);
225 cb->window = composite;
226 cb->window_pool = composite_pool;
227 cb->done = (composite->sview_len == 0 || composite->src_ops == 0);
230 else if (window)
232 /* Copy the (first) window into the baton. */
233 apr_pool_t *window_pool = svn_pool_create(cb->trail->pool);
234 assert(cb->window_pool == NULL);
235 cb->window = svn_txdelta_window_dup(window, window_pool);
236 cb->window_pool = window_pool;
237 cb->done = (window->sview_len == 0 || window->src_ops == 0);
239 else
240 cb->done = TRUE;
242 cb->init = FALSE;
243 return SVN_NO_ERROR;
248 /* Read one delta window from REP[CUR_CHUNK] and push it at the
249 composition handler. */
251 static svn_error_t *
252 get_one_window(struct compose_handler_baton *cb,
253 svn_fs_t *fs,
254 representation_t *rep,
255 int cur_chunk)
257 svn_stream_t *wstream;
258 char diffdata[4096]; /* hunk of svndiff data */
259 svn_filesize_t off; /* offset into svndiff data */
260 apr_size_t amt; /* how much svndiff data to/was read */
261 const char *str_key;
263 apr_array_header_t *chunks = rep->contents.delta.chunks;
264 rep_delta_chunk_t *this_chunk, *first_chunk;
266 cb->init = TRUE;
267 if (chunks->nelts <= cur_chunk)
268 return compose_handler(NULL, cb);
270 /* Set up a window handling stream for the svndiff data. */
271 wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE,
272 cb->trail->pool);
274 /* First things first: send the "SVN"{version} header through the
275 stream. ### For now, we will just use the version specified
276 in the first chunk, and then verify that no chunks have a
277 different version number than the one used. In the future,
278 we might simply convert chunks that use a different version
279 of the diff format -- or, heck, a different format
280 altogether -- to the format/version of the first chunk. */
281 first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*);
282 diffdata[0] = 'S';
283 diffdata[1] = 'V';
284 diffdata[2] = 'N';
285 diffdata[3] = (char) (first_chunk->version);
286 amt = 4;
287 SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
288 /* FIXME: The stream write handler is borked; assert (amt == 4); */
290 /* Get this string key which holds this window's data.
291 ### todo: make sure this is an `svndiff' DIFF skel here. */
292 this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
293 str_key = this_chunk->string_key;
295 /* Run through the svndiff data, at least as far as necessary. */
296 off = 0;
299 amt = sizeof(diffdata);
300 SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata,
301 off, &amt, cb->trail,
302 cb->trail->pool));
303 off += amt;
304 SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
306 while (amt != 0);
307 SVN_ERR(svn_stream_close(wstream));
309 assert(!cb->init);
310 assert(cb->window != NULL);
311 assert(cb->window_pool != NULL);
312 return SVN_NO_ERROR;
316 /* Undeltify a range of data. DELTAS is the set of delta windows to
317 combine, FULLTEXT is the source text, CUR_CHUNK is the index of the
318 delta chunk we're starting from. OFFSET is the relative offset of
319 the requested data within the chunk; BUF and LEN are what we're
320 undeltifying to. */
322 static svn_error_t *
323 rep_undeltify_range(svn_fs_t *fs,
324 apr_array_header_t *deltas,
325 representation_t *fulltext,
326 int cur_chunk,
327 char *buf,
328 apr_size_t offset,
329 apr_size_t *len,
330 trail_t *trail,
331 apr_pool_t *pool)
333 apr_size_t len_read = 0;
337 struct compose_handler_baton cb = { 0 };
338 char *source_buf, *target_buf;
339 apr_size_t target_len;
340 int cur_rep;
342 cb.trail = trail;
343 cb.done = FALSE;
344 for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep)
346 representation_t *const rep =
347 APR_ARRAY_IDX(deltas, cur_rep, representation_t*);
348 SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk));
351 if (!cb.window)
352 /* That's it, no more source data is available. */
353 break;
355 /* The source view length should not be 0 if there are source
356 copy ops in the window. */
357 assert(cb.window->sview_len > 0 || cb.window->src_ops == 0);
359 /* cb.window is the combined delta window. Read the source text
360 into a buffer. */
361 if (cb.source_buf)
363 /* The combiner already created the source text from a
364 self-compressed window. */
365 source_buf = cb.source_buf;
367 else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0)
369 apr_size_t source_len = cb.window->sview_len;
370 source_buf = apr_palloc(cb.window_pool, source_len);
371 SVN_ERR(svn_fs_bdb__string_read
372 (fs, fulltext->contents.fulltext.string_key,
373 source_buf, cb.window->sview_offset, &source_len,
374 trail, pool));
375 if (source_len != cb.window->sview_len)
376 return svn_error_create
377 (SVN_ERR_FS_CORRUPT, NULL,
378 _("Svndiff source length inconsistency"));
380 else
382 source_buf = NULL; /* Won't read anything from here. */
385 if (offset > 0)
387 target_len = *len - len_read + offset;
388 target_buf = apr_palloc(cb.window_pool, target_len);
390 else
392 target_len = *len - len_read;
393 target_buf = buf;
396 svn_txdelta_apply_instructions(cb.window, source_buf,
397 target_buf, &target_len);
398 if (offset > 0)
400 assert(target_len > offset);
401 target_len -= offset;
402 memcpy(buf, target_buf + offset, target_len);
403 offset = 0; /* Read from the beginning of the next chunk. */
405 /* Don't need this window any more. */
406 svn_pool_destroy(cb.window_pool);
408 len_read += target_len;
409 buf += target_len;
410 ++cur_chunk;
412 while (len_read < *len);
414 *len = len_read;
415 return SVN_NO_ERROR;
420 /* Calculate the index of the chunk in REP that contains REP_OFFSET,
421 and find the relative CHUNK_OFFSET within the chunk.
422 Return -1 if offset is beyond the end of the represented data.
423 ### The basic assumption is that all delta windows are the same size
424 and aligned at the same offset, so this number is the same in all
425 dependent deltas. Oh, and the chunks in REP must be ordered. */
427 static int
428 get_chunk_offset(representation_t *rep,
429 svn_filesize_t rep_offset,
430 apr_size_t *chunk_offset)
432 const apr_array_header_t *chunks = rep->contents.delta.chunks;
433 int cur_chunk;
434 assert(chunks->nelts);
436 /* ### Yes, this is a linear search. I'll change this to bisection
437 the very second we notice it's slowing us down. */
438 for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk)
440 const rep_delta_chunk_t *const this_chunk
441 = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
443 if ((this_chunk->offset + this_chunk->size) > rep_offset)
445 assert(this_chunk->offset <= rep_offset);
446 assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE);
447 *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset);
448 return cur_chunk;
452 return -1;
455 /* Copy into BUF *LEN bytes starting at OFFSET from the string
456 represented via REP_KEY in FS, as part of TRAIL.
457 The number of bytes actually copied is stored in *LEN. */
458 static svn_error_t *
459 rep_read_range(svn_fs_t *fs,
460 const char *rep_key,
461 svn_filesize_t offset,
462 char *buf,
463 apr_size_t *len,
464 trail_t *trail,
465 apr_pool_t *pool)
467 representation_t *rep;
468 apr_size_t chunk_offset;
470 /* Read in our REP. */
471 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
472 if (rep->kind == rep_kind_fulltext)
474 SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key,
475 buf, offset, len, trail, pool));
477 else if (rep->kind == rep_kind_delta)
479 const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset);
480 if (cur_chunk < 0)
481 *len = 0;
482 else
484 svn_error_t *err;
485 /* Preserve for potential use in error message. */
486 const char *first_rep_key = rep_key;
487 /* Make a list of all the rep's we need to undeltify this range.
488 We'll have to read them within this trail anyway, so we might
489 as well do it once and up front. */
490 apr_array_header_t *reps = /* ### what constant here? */
491 apr_array_make(pool, 666, sizeof(rep));
494 const rep_delta_chunk_t *const first_chunk
495 = APR_ARRAY_IDX(rep->contents.delta.chunks,
496 0, rep_delta_chunk_t*);
497 const rep_delta_chunk_t *const chunk
498 = APR_ARRAY_IDX(rep->contents.delta.chunks,
499 cur_chunk, rep_delta_chunk_t*);
501 /* Verify that this chunk is of the same version as the first. */
502 if (first_chunk->version != chunk->version)
503 return svn_error_createf
504 (SVN_ERR_FS_CORRUPT, NULL,
505 _("Diff version inconsistencies in representation '%s'"),
506 rep_key);
508 rep_key = chunk->rep_key;
509 APR_ARRAY_PUSH(reps, representation_t *) = rep;
510 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key,
511 trail, pool));
513 while (rep->kind == rep_kind_delta
514 && rep->contents.delta.chunks->nelts > cur_chunk);
516 /* Right. We've either just read the fulltext rep, or a rep that's
517 too short, in which case we'll undeltify without source data.*/
518 if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext)
519 return UNKNOWN_NODE_KIND(rep_key);
521 if (rep->kind == rep_kind_delta)
522 rep = NULL; /* Don't use source data */
524 err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf,
525 chunk_offset, len, trail, pool);
526 if (err)
528 if (err->apr_err == SVN_ERR_FS_CORRUPT)
529 return svn_error_createf
530 (SVN_ERR_FS_CORRUPT, err,
531 _("Corruption detected whilst reading delta chain from "
532 "representation '%s' to '%s'"), first_rep_key, rep_key);
533 else
534 return err;
538 else /* unknown kind */
539 return UNKNOWN_NODE_KIND(rep_key);
541 return SVN_NO_ERROR;
545 svn_error_t *
546 svn_fs_base__get_mutable_rep(const char **new_rep_key,
547 const char *rep_key,
548 svn_fs_t *fs,
549 const char *txn_id,
550 trail_t *trail,
551 apr_pool_t *pool)
553 representation_t *rep = NULL;
554 const char *new_str = NULL;
556 /* We were passed an existing REP_KEY, so examine it. If it is
557 mutable already, then just return REP_KEY as the mutable result
558 key. */
559 if (rep_key && (rep_key[0] != '\0'))
561 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
562 if (rep_is_mutable(rep, txn_id))
564 *new_rep_key = rep_key;
565 return SVN_NO_ERROR;
569 /* Either we weren't provided a base key to examine, or the base key
570 we were provided was not mutable. So, let's make a new
571 representation and return its key to the caller. */
572 SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool));
573 rep = make_fulltext_rep(new_str, txn_id,
574 svn_md5_empty_string_digest(), pool);
575 SVN_ERR(svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool));
577 return SVN_NO_ERROR;
581 svn_error_t *
582 svn_fs_base__delete_rep_if_mutable(svn_fs_t *fs,
583 const char *rep_key,
584 const char *txn_id,
585 trail_t *trail,
586 apr_pool_t *pool)
588 representation_t *rep;
590 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
591 if (! rep_is_mutable(rep, txn_id))
592 return SVN_NO_ERROR;
594 if (rep->kind == rep_kind_fulltext)
596 SVN_ERR(svn_fs_bdb__string_delete(fs,
597 rep->contents.fulltext.string_key,
598 trail, pool));
600 else if (rep->kind == rep_kind_delta)
602 apr_array_header_t *keys;
603 SVN_ERR(delta_string_keys(&keys, rep, pool));
604 SVN_ERR(delete_strings(keys, fs, trail, pool));
606 else /* unknown kind */
607 return UNKNOWN_NODE_KIND(rep_key);
609 SVN_ERR(svn_fs_bdb__delete_rep(fs, rep_key, trail, pool));
610 return SVN_NO_ERROR;
615 /*** Reading and writing data via representations. ***/
617 /** Reading. **/
619 struct rep_read_baton
621 /* The FS from which we're reading. */
622 svn_fs_t *fs;
624 /* The representation skel whose contents we want to read. If this
625 is NULL, the rep has never had any contents, so all reads fetch 0
626 bytes.
628 Formerly, we cached the entire rep skel here, not just the key.
629 That way we didn't have to fetch the rep from the db every time
630 we want to read a little bit more of the file. Unfortunately,
631 this has a problem: if, say, a file's representation changes
632 while we're reading (changes from fulltext to delta, for
633 example), we'll never know it. So for correctness, we now
634 refetch the representation skel every time we want to read
635 another chunk. */
636 const char *rep_key;
638 /* How many bytes have been read already. */
639 svn_filesize_t offset;
641 /* If present, the read will be done as part of this trail, and the
642 trail's pool will be used. Otherwise, see `pool' below. */
643 trail_t *trail;
645 /* MD5 checksum. Initialized when the baton is created, updated as
646 we read data, and finalized when the stream is closed. */
647 struct apr_md5_ctx_t md5_context;
649 /* The length of the rep's contents (as fulltext, that is,
650 independent of how the rep actually stores the data.) This is
651 retrieved when the baton is created, and used to determine when
652 we have read the last byte, at which point we compare checksums.
654 Getting this at baton creation time makes interleaved reads and
655 writes on the same rep in the same trail impossible. But we're
656 not doing that, and probably no one ever should. And anyway if
657 they do, they should see problems immediately. */
658 svn_filesize_t size;
660 /* Set to FALSE when the baton is created, TRUE when the md5_context
661 is digestified. */
662 svn_boolean_t checksum_finalized;
664 /* Used for temporary allocations, iff `trail' (above) is null. */
665 apr_pool_t *pool;
670 static svn_error_t *
671 rep_read_get_baton(struct rep_read_baton **rb_p,
672 svn_fs_t *fs,
673 const char *rep_key,
674 svn_boolean_t use_trail_for_reads,
675 trail_t *trail,
676 apr_pool_t *pool)
678 struct rep_read_baton *b;
680 b = apr_pcalloc(pool, sizeof(*b));
681 apr_md5_init(&(b->md5_context));
683 if (rep_key)
684 SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key,
685 trail, pool));
686 else
687 b->size = 0;
689 b->checksum_finalized = FALSE;
690 b->fs = fs;
691 b->trail = use_trail_for_reads ? trail : NULL;
692 b->pool = pool;
693 b->rep_key = rep_key;
694 b->offset = 0;
696 *rb_p = b;
698 return SVN_NO_ERROR;
703 /*** Retrieving data. ***/
705 svn_error_t *
706 svn_fs_base__rep_contents_size(svn_filesize_t *size_p,
707 svn_fs_t *fs,
708 const char *rep_key,
709 trail_t *trail,
710 apr_pool_t *pool)
712 representation_t *rep;
714 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
716 if (rep->kind == rep_kind_fulltext)
718 /* Get the size by asking Berkeley for the string's length. */
719 SVN_ERR(svn_fs_bdb__string_size(size_p, fs,
720 rep->contents.fulltext.string_key,
721 trail, pool));
723 else if (rep->kind == rep_kind_delta)
725 /* Get the size by finding the last window pkg in the delta and
726 adding its offset to its size. This way, we won't even be
727 messed up by overlapping windows, as long as the window pkgs
728 are still ordered. */
729 apr_array_header_t *chunks = rep->contents.delta.chunks;
730 rep_delta_chunk_t *last_chunk;
732 assert(chunks->nelts);
734 last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1,
735 rep_delta_chunk_t *);
736 *size_p = last_chunk->offset + last_chunk->size;
738 else /* unknown kind */
739 return UNKNOWN_NODE_KIND(rep_key);
741 return SVN_NO_ERROR;
745 svn_error_t *
746 svn_fs_base__rep_contents_checksum(unsigned char digest[],
747 svn_fs_t *fs,
748 const char *rep_key,
749 trail_t *trail,
750 apr_pool_t *pool)
752 representation_t *rep;
754 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
755 memcpy(digest, rep->checksum, APR_MD5_DIGESTSIZE);
757 return SVN_NO_ERROR;
761 svn_error_t *
762 svn_fs_base__rep_contents(svn_string_t *str,
763 svn_fs_t *fs,
764 const char *rep_key,
765 trail_t *trail,
766 apr_pool_t *pool)
768 svn_filesize_t contents_size;
769 apr_size_t len;
770 char *data;
772 SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key,
773 trail, pool));
775 /* What if the contents are larger than we can handle? */
776 if (contents_size > SVN_MAX_OBJECT_SIZE)
777 return svn_error_createf
778 (SVN_ERR_FS_GENERAL, NULL,
779 _("Rep contents are too large: "
780 "got %s, limit is %s"),
781 apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size),
782 apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE));
783 else
784 str->len = (apr_size_t) contents_size;
786 data = apr_palloc(pool, str->len);
787 str->data = data;
788 len = str->len;
789 SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool));
791 /* Paranoia. */
792 if (len != str->len)
793 return svn_error_createf
794 (SVN_ERR_FS_CORRUPT, NULL,
795 _("Failure reading rep '%s'"), rep_key);
797 /* Just the standard paranoia. */
799 representation_t *rep;
800 apr_md5_ctx_t md5_context;
801 unsigned char checksum[APR_MD5_DIGESTSIZE];
803 apr_md5_init(&md5_context);
804 apr_md5_update(&md5_context, str->data, str->len);
805 apr_md5_final(checksum, &md5_context);
807 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
808 if (! svn_md5_digests_match(checksum, rep->checksum))
809 return svn_error_createf
810 (SVN_ERR_FS_CORRUPT, NULL,
811 _("Checksum mismatch on rep '%s':\n"
812 " expected: %s\n"
813 " actual: %s\n"), rep_key,
814 svn_md5_digest_to_cstring_display(rep->checksum, pool),
815 svn_md5_digest_to_cstring_display(checksum, pool));
818 return SVN_NO_ERROR;
822 struct read_rep_args
824 struct rep_read_baton *rb; /* The data source. */
825 char *buf; /* Where to put what we read. */
826 apr_size_t *len; /* How much to read / was read. */
830 /* BATON is of type `read_rep_args':
832 Read into BATON->rb->buf the *(BATON->len) bytes starting at
833 BATON->rb->offset from the data represented at BATON->rb->rep_key
834 in BATON->rb->fs, as part of TRAIL.
836 Afterwards, *(BATON->len) is the number of bytes actually read, and
837 BATON->rb->offset is incremented by that amount.
839 If BATON->rb->rep_key is null, this is assumed to mean the file's
840 contents have no representation, i.e., the file has no contents.
841 In that case, if BATON->rb->offset > 0, return the error
842 SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
843 zero and return. */
844 static svn_error_t *
845 txn_body_read_rep(void *baton, trail_t *trail)
847 struct read_rep_args *args = baton;
849 if (args->rb->rep_key)
851 SVN_ERR(rep_read_range(args->rb->fs,
852 args->rb->rep_key,
853 args->rb->offset,
854 args->buf,
855 args->len,
856 trail,
857 trail->pool));
859 args->rb->offset += *(args->len);
861 /* We calculate the checksum just once, the moment we see the
862 * last byte of data. But we can't assume there was a short
863 * read. The caller may have known the length of the data and
864 * requested exactly that amount, so there would never be a
865 * short read. (That's why the read baton has to know the
866 * length of the data in advance.)
868 * On the other hand, some callers invoke the stream reader in a
869 * loop whose termination condition is that the read returned
870 * zero bytes of data -- which usually results in the read
871 * function being called one more time *after* the call that got
872 * a short read (indicating end-of-stream).
874 * The conditions below ensure that we compare checksums even
875 * when there is no short read associated with the last byte of
876 * data, while also ensuring that it's harmless to repeatedly
877 * read 0 bytes from the stream.
879 if (! args->rb->checksum_finalized)
881 apr_md5_update(&(args->rb->md5_context), args->buf, *(args->len));
883 if (args->rb->offset == args->rb->size)
885 representation_t *rep;
886 unsigned char checksum[APR_MD5_DIGESTSIZE];
888 apr_md5_final(checksum, &(args->rb->md5_context));
889 args->rb->checksum_finalized = TRUE;
891 SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs,
892 args->rb->rep_key,
893 trail, trail->pool));
894 if (! svn_md5_digests_match(checksum, rep->checksum))
895 return svn_error_createf
896 (SVN_ERR_FS_CORRUPT, NULL,
897 _("Checksum mismatch on rep '%s':\n"
898 " expected: %s\n"
899 " actual: %s\n"), args->rb->rep_key,
900 svn_md5_digest_to_cstring_display(rep->checksum,
901 trail->pool),
902 svn_md5_digest_to_cstring_display(checksum, trail->pool));
906 else if (args->rb->offset > 0)
908 return
909 svn_error_create
910 (SVN_ERR_FS_REP_CHANGED, NULL,
911 _("Null rep, but offset past zero already"));
913 else
914 *(args->len) = 0;
916 return SVN_NO_ERROR;
920 static svn_error_t *
921 rep_read_contents(void *baton, char *buf, apr_size_t *len)
923 struct rep_read_baton *rb = baton;
924 struct read_rep_args args;
926 args.rb = rb;
927 args.buf = buf;
928 args.len = len;
930 /* If we got a trail, use it; else make one. */
931 if (rb->trail)
932 SVN_ERR(txn_body_read_rep(&args, rb->trail));
933 else
935 /* Hey, guess what? trails don't clear their own subpools. In
936 the case of reading from the db, any returned data should
937 live in our pre-allocated buffer, so the whole operation can
938 happen within a single malloc/free cycle. This prevents us
939 from creating millions of unnecessary trail subpools when
940 reading a big file. */
941 apr_pool_t *subpool = svn_pool_create(rb->pool);
942 SVN_ERR(svn_fs_base__retry_txn(rb->fs,
943 txn_body_read_rep,
944 &args,
945 subpool));
946 svn_pool_destroy(subpool);
948 return SVN_NO_ERROR;
952 /** Writing. **/
955 struct rep_write_baton
957 /* The FS in which we're writing. */
958 svn_fs_t *fs;
960 /* The representation skel whose contents we want to write. */
961 const char *rep_key;
963 /* The transaction id under which this write action will take
964 place. */
965 const char *txn_id;
967 /* If present, do the write as part of this trail, and use trail's
968 pool. Otherwise, see `pool' below. */
969 trail_t *trail;
971 /* MD5 checksum. Initialized when the baton is created, updated as
972 we write data, and finalized and stored when the stream is
973 closed. */
974 struct apr_md5_ctx_t md5_context;
975 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
976 svn_boolean_t finalized;
978 /* Used for temporary allocations, iff `trail' (above) is null. */
979 apr_pool_t *pool;
984 static struct rep_write_baton *
985 rep_write_get_baton(svn_fs_t *fs,
986 const char *rep_key,
987 const char *txn_id,
988 trail_t *trail,
989 apr_pool_t *pool)
991 struct rep_write_baton *b;
993 b = apr_pcalloc(pool, sizeof(*b));
994 apr_md5_init(&(b->md5_context));
995 b->fs = fs;
996 b->trail = trail;
997 b->pool = pool;
998 b->rep_key = rep_key;
999 b->txn_id = txn_id;
1000 return b;
1005 /* Write LEN bytes from BUF into the end of the string represented via
1006 REP_KEY in FS, as part of TRAIL. If the representation is not
1007 mutable, return the error SVN_FS_REP_NOT_MUTABLE. */
1008 static svn_error_t *
1009 rep_write(svn_fs_t *fs,
1010 const char *rep_key,
1011 const char *buf,
1012 apr_size_t len,
1013 const char *txn_id,
1014 trail_t *trail,
1015 apr_pool_t *pool)
1017 representation_t *rep;
1019 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1021 if (! rep_is_mutable(rep, txn_id))
1022 return svn_error_createf
1023 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1024 _("Rep '%s' is not mutable"), rep_key);
1026 if (rep->kind == rep_kind_fulltext)
1028 SVN_ERR(svn_fs_bdb__string_append
1029 (fs, &(rep->contents.fulltext.string_key), len, buf,
1030 trail, pool));
1032 else if (rep->kind == rep_kind_delta)
1034 /* There should never be a case when we have a mutable
1035 non-fulltext rep. The only code that creates mutable reps is
1036 in this file, and it creates them fulltext. */
1037 return svn_error_createf
1038 (SVN_ERR_FS_CORRUPT, NULL,
1039 _("Rep '%s' both mutable and non-fulltext"), rep_key);
1041 else /* unknown kind */
1042 return UNKNOWN_NODE_KIND(rep_key);
1044 return SVN_NO_ERROR;
1048 struct write_rep_args
1050 struct rep_write_baton *wb; /* Destination. */
1051 const char *buf; /* Data. */
1052 apr_size_t len; /* How much to write. */
1056 /* BATON is of type `write_rep_args':
1057 Append onto BATON->wb->rep_key's contents BATON->len bytes of
1058 data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
1060 If the representation is not mutable, return the error
1061 SVN_FS_REP_NOT_MUTABLE. */
1062 static svn_error_t *
1063 txn_body_write_rep(void *baton, trail_t *trail)
1065 struct write_rep_args *args = baton;
1067 SVN_ERR(rep_write(args->wb->fs,
1068 args->wb->rep_key,
1069 args->buf,
1070 args->len,
1071 args->wb->txn_id,
1072 trail,
1073 trail->pool));
1075 apr_md5_update(&(args->wb->md5_context), args->buf, args->len);
1077 return SVN_NO_ERROR;
1081 static svn_error_t *
1082 rep_write_contents(void *baton,
1083 const char *buf,
1084 apr_size_t *len)
1086 struct rep_write_baton *wb = baton;
1087 struct write_rep_args args;
1089 /* We toss LEN's indirectness because if not all the bytes are
1090 written, it's an error, so we wouldn't be reporting anything back
1091 through *LEN anyway. */
1092 args.wb = wb;
1093 args.buf = buf;
1094 args.len = *len;
1096 /* If we got a trail, use it; else make one. */
1097 if (wb->trail)
1098 SVN_ERR(txn_body_write_rep(&args, wb->trail));
1099 else
1101 /* Hey, guess what? trails don't clear their own subpools. In
1102 the case of simply writing the rep to the db, we're *certain*
1103 that there's no data coming back to us that needs to be
1104 preserved... so the whole operation can happen within a
1105 single malloc/free cycle. This prevents us from creating
1106 millions of unnecessary trail subpools when writing a big
1107 file. */
1108 apr_pool_t *subpool = svn_pool_create(wb->pool);
1109 SVN_ERR(svn_fs_base__retry_txn(wb->fs,
1110 txn_body_write_rep,
1111 &args,
1112 subpool));
1113 svn_pool_destroy(subpool);
1116 return SVN_NO_ERROR;
1120 /* Helper for rep_write_close_contents(); see that doc string for
1121 more. BATON is of type `struct rep_write_baton'. */
1122 static svn_error_t *
1123 txn_body_write_close_rep(void *baton, trail_t *trail)
1125 struct rep_write_baton *wb = baton;
1126 representation_t *rep;
1128 SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key,
1129 trail, trail->pool));
1130 memcpy(rep->checksum, wb->md5_digest, APR_MD5_DIGESTSIZE);
1131 SVN_ERR(svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep,
1132 trail, trail->pool));
1134 return SVN_NO_ERROR;
1138 /* BATON is of type `struct rep_write_baton'.
1140 * Finalize BATON->md5_context and store the resulting digest under
1141 * BATON->rep_key.
1143 static svn_error_t *
1144 rep_write_close_contents(void *baton)
1146 struct rep_write_baton *wb = baton;
1148 /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated
1149 digestification, then we wouldn't need a stream close function at
1150 all -- instead, we could update the stored checksum each time a
1151 write occurred, which would have the added advantage of making
1152 interleaving reads and writes work. Currently, they'd fail with
1153 a checksum mismatch, it just happens that our code never tries to
1154 do that anyway. */
1156 if (! wb->finalized)
1158 apr_md5_final(wb->md5_digest, &wb->md5_context);
1159 wb->finalized = TRUE;
1162 /* If we got a trail, use it; else make one. */
1163 if (wb->trail)
1165 SVN_ERR(txn_body_write_close_rep(wb, wb->trail));
1167 else
1169 SVN_ERR(svn_fs_base__retry_txn(wb->fs,
1170 txn_body_write_close_rep,
1172 wb->pool));
1175 return SVN_NO_ERROR;
1179 /** Public read and write stream constructors. **/
1181 svn_error_t *
1182 svn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p,
1183 svn_fs_t *fs,
1184 const char *rep_key,
1185 svn_boolean_t use_trail_for_reads,
1186 trail_t *trail,
1187 apr_pool_t *pool)
1189 struct rep_read_baton *rb;
1191 SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads,
1192 trail, pool));
1193 *rs_p = svn_stream_create(rb, pool);
1194 svn_stream_set_read(*rs_p, rep_read_contents);
1196 return SVN_NO_ERROR;
1200 /* Clear the contents of REP_KEY, so that it represents the empty
1201 string, as part of TRAIL. TXN_ID is the id of the Subversion
1202 transaction under which this occurs. If REP_KEY is not mutable,
1203 return the error SVN_ERR_FS_REP_NOT_MUTABLE. */
1204 static svn_error_t *
1205 rep_contents_clear(svn_fs_t *fs,
1206 const char *rep_key,
1207 const char *txn_id,
1208 trail_t *trail,
1209 apr_pool_t *pool)
1211 representation_t *rep;
1212 const char *str_key;
1214 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1216 /* Make sure it's mutable. */
1217 if (! rep_is_mutable(rep, txn_id))
1218 return svn_error_createf
1219 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1220 _("Rep '%s' is not mutable"), rep_key);
1222 assert(rep->kind == rep_kind_fulltext);
1224 /* If rep has no string, just return success. Else, clear the
1225 underlying string. */
1226 str_key = rep->contents.fulltext.string_key;
1227 if (str_key && *str_key)
1229 SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool));
1230 memcpy(rep->checksum, svn_md5_empty_string_digest(),
1231 APR_MD5_DIGESTSIZE);
1232 SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool));
1234 return SVN_NO_ERROR;
1238 svn_error_t *
1239 svn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p,
1240 svn_fs_t *fs,
1241 const char *rep_key,
1242 const char *txn_id,
1243 svn_boolean_t use_trail_for_writes,
1244 trail_t *trail,
1245 apr_pool_t *pool)
1247 struct rep_write_baton *wb;
1249 /* Clear the current rep contents (free mutability check!). */
1250 SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool));
1252 /* Now, generate the write baton and stream. */
1253 wb = rep_write_get_baton(fs, rep_key, txn_id,
1254 use_trail_for_writes ? trail : NULL, pool);
1255 *ws_p = svn_stream_create(wb, pool);
1256 svn_stream_set_write(*ws_p, rep_write_contents);
1257 svn_stream_set_close(*ws_p, rep_write_close_contents);
1259 return SVN_NO_ERROR;
1264 /*** Deltified storage. ***/
1266 /* Baton for svn_write_fn_t write_string_set(). */
1267 struct write_svndiff_strings_baton
1269 /* The fs where lives the string we're writing. */
1270 svn_fs_t *fs;
1272 /* The key of the string we're writing to. Typically this is
1273 initialized to NULL, so svn_fs_base__string_append() can fill in a
1274 value. */
1275 const char *key;
1277 /* The amount of txdelta data written to the current
1278 string-in-progress. */
1279 apr_size_t size;
1281 /* The amount of svndiff header information we've written thus far
1282 to the strings table. */
1283 apr_size_t header_read;
1285 /* The version number of the svndiff data written. ### You'd better
1286 not count on this being populated after the first chunk is sent
1287 through the interface, since it lives at the 4th byte of the
1288 stream. */
1289 apr_byte_t version;
1291 /* The trail we're writing in. */
1292 trail_t *trail;
1297 /* Function of type `svn_write_fn_t', for writing to a collection of
1298 strings; BATON is `struct write_svndiff_strings_baton *'.
1300 On the first call, BATON->key is null. A new string key in
1301 BATON->fs is chosen and stored in BATON->key; each call appends
1302 *LEN bytes from DATA onto the string. *LEN is never changed; if
1303 the write fails to write all *LEN bytes, an error is returned.
1304 BATON->size is used to track the total amount of data written via
1305 this handler, and must be reset by the caller to 0 when appropriate. */
1306 static svn_error_t *
1307 write_svndiff_strings(void *baton, const char *data, apr_size_t *len)
1309 struct write_svndiff_strings_baton *wb = baton;
1310 const char *buf = data;
1311 apr_size_t nheader = 0;
1313 /* If we haven't stripped all the header information from this
1314 stream yet, keep stripping. If someone sends a first window
1315 through here that's shorter than 4 bytes long, this will probably
1316 cause a nuclear reactor meltdown somewhere in the American
1317 midwest. */
1318 if (wb->header_read < 4)
1320 nheader = 4 - wb->header_read;
1321 *len -= nheader;
1322 buf += nheader;
1323 wb->header_read += nheader;
1325 /* If we have *now* read the full 4-byte header, check that
1326 least byte for the version number of the svndiff format. */
1327 if (wb->header_read == 4)
1328 wb->version = *(buf - 1);
1331 /* Append to the current string we're writing (or create a new one
1332 if WB->key is NULL). */
1333 SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len,
1334 buf, wb->trail, wb->trail->pool));
1336 /* Make sure we (still) have a key. */
1337 if (wb->key == NULL)
1338 return svn_error_create(SVN_ERR_FS_GENERAL, NULL,
1339 _("Failed to get new string key"));
1341 /* Restore *LEN to the value it *would* have been were it not for
1342 header stripping. */
1343 *len += nheader;
1345 /* Increment our running total of bytes written to this string. */
1346 wb->size += *len;
1348 return SVN_NO_ERROR;
1352 typedef struct window_write_t
1354 const char *key; /* string key for this window */
1355 apr_size_t svndiff_len; /* amount of svndiff data written to the string */
1356 svn_filesize_t text_off; /* offset of fulltext represented by this window */
1357 apr_size_t text_len; /* amount of fulltext data represented by this window */
1359 } window_write_t;
1362 svn_error_t *
1363 svn_fs_base__rep_deltify(svn_fs_t *fs,
1364 const char *target,
1365 const char *source,
1366 trail_t *trail,
1367 apr_pool_t *pool)
1369 base_fs_data_t *bfd = fs->fsap_data;
1370 svn_stream_t *source_stream; /* stream to read the source */
1371 svn_stream_t *target_stream; /* stream to read the target */
1372 svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows */
1374 /* window-y things, and an array to track them */
1375 window_write_t *ww;
1376 apr_array_header_t *windows;
1378 /* stream to write new (deltified) target data and its baton */
1379 svn_stream_t *new_target_stream;
1380 struct write_svndiff_strings_baton new_target_baton;
1382 /* window handler/baton for writing to above stream */
1383 svn_txdelta_window_handler_t new_target_handler;
1384 void *new_target_handler_baton;
1386 /* yes, we do windows */
1387 svn_txdelta_window_t *window;
1389 /* The current offset into the fulltext that our window is about to
1390 write. This doubles, after all windows are written, as the
1391 total size of the svndiff data for the deltification process. */
1392 svn_filesize_t tview_off = 0;
1394 /* The total amount of diff data written while deltifying. */
1395 svn_filesize_t diffsize = 0;
1397 /* TARGET's original string keys */
1398 apr_array_header_t *orig_str_keys;
1400 /* The digest for the representation's fulltext contents. */
1401 unsigned char rep_digest[APR_MD5_DIGESTSIZE];
1403 /* MD5 digest */
1404 const unsigned char *digest;
1406 /* pool for holding the windows */
1407 apr_pool_t *wpool;
1409 /* Paranoia: never allow a rep to be deltified against itself,
1410 because then there would be no fulltext reachable in the delta
1411 chain, and badness would ensue. */
1412 if (strcmp(target, source) == 0)
1413 return svn_error_createf
1414 (SVN_ERR_FS_CORRUPT, NULL,
1415 _("Attempt to deltify '%s' against itself"),
1416 target);
1418 /* Set up a handler for the svndiff data, which will write each
1419 window to its own string in the `strings' table. */
1420 new_target_baton.fs = fs;
1421 new_target_baton.trail = trail;
1422 new_target_baton.header_read = FALSE;
1423 new_target_stream = svn_stream_create(&new_target_baton, pool);
1424 svn_stream_set_write(new_target_stream, write_svndiff_strings);
1426 /* Get streams to our source and target text data. */
1427 SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source,
1428 TRUE, trail, pool));
1429 SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target,
1430 TRUE, trail, pool));
1432 /* Setup a stream to convert the textdelta data into svndiff windows. */
1433 svn_txdelta(&txdelta_stream, source_stream, target_stream, pool);
1435 if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT)
1436 svn_txdelta_to_svndiff2(&new_target_handler, &new_target_handler_baton,
1437 new_target_stream, 1, pool);
1438 else
1439 svn_txdelta_to_svndiff2(&new_target_handler, &new_target_handler_baton,
1440 new_target_stream, 0, pool);
1442 /* subpool for the windows */
1443 wpool = svn_pool_create(pool);
1445 /* Now, loop, manufacturing and dispatching windows of svndiff data. */
1446 windows = apr_array_make(pool, 1, sizeof(ww));
1449 /* Reset some baton variables. */
1450 new_target_baton.size = 0;
1451 new_target_baton.key = NULL;
1453 /* Free the window. */
1454 svn_pool_clear(wpool);
1456 /* Fetch the next window of txdelta data. */
1457 SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool));
1459 /* Send off this package to be written as svndiff data. */
1460 SVN_ERR(new_target_handler(window, new_target_handler_baton));
1461 if (window)
1463 /* Add a new window description to our array. */
1464 ww = apr_pcalloc(pool, sizeof(*ww));
1465 ww->key = new_target_baton.key;
1466 ww->svndiff_len = new_target_baton.size;
1467 ww->text_off = tview_off;
1468 ww->text_len = window->tview_len;
1469 APR_ARRAY_PUSH(windows, window_write_t *) = ww;
1471 /* Update our recordkeeping variables. */
1472 tview_off += window->tview_len;
1473 diffsize += ww->svndiff_len;
1476 } while (window);
1478 svn_pool_destroy(wpool);
1480 /* Having processed all the windows, we can query the MD5 digest
1481 from the stream. */
1482 digest = svn_txdelta_md5_digest(txdelta_stream);
1483 if (! digest)
1484 return svn_error_createf
1485 (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL,
1486 _("Failed to calculate MD5 digest for '%s'"),
1487 source);
1489 /* Construct a list of the strings used by the old representation so
1490 that we can delete them later. While we are here, if the old
1491 representation was a fulltext, check to make sure the delta we're
1492 replacing it with is actually smaller. (Don't perform this check
1493 if we're replacing a delta; in that case, we're going for a time
1494 optimization, not a space optimization.) */
1496 representation_t *old_rep;
1497 const char *str_key;
1499 SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool));
1500 if (old_rep->kind == rep_kind_fulltext)
1502 svn_filesize_t old_size = 0;
1504 str_key = old_rep->contents.fulltext.string_key;
1505 SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key,
1506 trail, pool));
1507 orig_str_keys = apr_array_make(pool, 1, sizeof(str_key));
1508 APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key;
1510 /* If the new data is NOT an space optimization, destroy the
1511 string(s) we created, and get outta here. */
1512 if (diffsize >= old_size)
1514 int i;
1515 for (i = 0; i < windows->nelts; i++)
1517 ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1518 SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool));
1520 return SVN_NO_ERROR;
1523 else if (old_rep->kind == rep_kind_delta)
1524 SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool));
1525 else /* unknown kind */
1526 return UNKNOWN_NODE_KIND(target);
1528 /* Save the checksum, since the new rep needs it. */
1529 memcpy(rep_digest, old_rep->checksum, APR_MD5_DIGESTSIZE);
1532 /* Hook the new strings we wrote into the rest of the filesystem by
1533 building a new representation to replace our old one. */
1535 representation_t new_rep;
1536 rep_delta_chunk_t *chunk;
1537 apr_array_header_t *chunks;
1538 int i;
1540 new_rep.kind = rep_kind_delta;
1541 new_rep.txn_id = NULL;
1543 /* Migrate the old rep's checksum to the new rep. */
1544 memcpy(new_rep.checksum, rep_digest, APR_MD5_DIGESTSIZE);
1546 chunks = apr_array_make(pool, windows->nelts, sizeof(chunk));
1548 /* Loop through the windows we wrote, creating and adding new
1549 chunks to the representation. */
1550 for (i = 0; i < windows->nelts; i++)
1552 ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1554 /* Allocate a chunk and its window */
1555 chunk = apr_palloc(pool, sizeof(*chunk));
1556 chunk->offset = ww->text_off;
1558 /* Populate the window */
1559 chunk->version = new_target_baton.version;
1560 chunk->string_key = ww->key;
1561 chunk->size = ww->text_len;
1562 chunk->rep_key = source;
1564 /* Add this chunk to the array. */
1565 APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk;
1568 /* Put the chunks array into the representation. */
1569 new_rep.contents.delta.chunks = chunks;
1571 /* Write out the new representation. */
1572 SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool));
1574 /* Delete the original pre-deltified strings. */
1575 SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool));
1578 return SVN_NO_ERROR;