2 * text-delta.c -- Internal text delta representation
4 * ====================================================================
5 * Copyright (c) 2000-2006 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
23 #include <apr_general.h> /* for APR_INLINE */
24 #include <apr_md5.h> /* for, um...MD5 stuff */
26 #include "svn_delta.h"
28 #include "svn_pools.h"
32 /* Text delta stream descriptor. */
34 struct svn_txdelta_stream_t
{
35 /* Copied from parameters to svn_txdelta_stream_create. */
37 svn_txdelta_next_window_fn_t next_window
;
38 svn_txdelta_md5_digest_fn_t md5_digest
;
41 /* Delta stream baton. */
42 struct txdelta_baton
{
43 /* These are copied from parameters passed to svn_txdelta. */
48 svn_boolean_t more_source
; /* FALSE if source stream hit EOF. */
49 svn_boolean_t more
; /* TRUE if there are more data in the pool. */
50 svn_filesize_t pos
; /* Offset of next read in source file. */
51 char *buf
; /* Buffer for input data. */
53 apr_md5_ctx_t context
; /* APR's MD5 context container. */
55 /* Calculated digest from MD5 operations.
56 NOTE: This is only valid after this stream has returned the NULL
58 unsigned char digest
[APR_MD5_DIGESTSIZE
];
62 /* Target-push stream descriptor. */
65 /* These are copied from parameters passed to svn_txdelta_target_push. */
67 svn_txdelta_window_handler_t wh
;
73 svn_filesize_t source_offset
;
74 apr_size_t source_len
;
75 svn_boolean_t source_done
;
76 apr_size_t target_len
;
80 /* Text delta applicator. */
83 /* These are copied from parameters passed to svn_txdelta_apply. */
87 /* Private data. Between calls, SBUF contains the data from the
88 * last window's source view, as specified by SBUF_OFFSET and
89 * SBUF_LEN. The contents of TBUF are not interesting between
91 apr_pool_t
*pool
; /* Pool to allocate data from */
92 char *sbuf
; /* Source buffer */
93 apr_size_t sbuf_size
; /* Allocated source buffer space */
94 svn_filesize_t sbuf_offset
; /* Offset of SBUF data in source stream */
95 apr_size_t sbuf_len
; /* Length of SBUF data */
96 char *tbuf
; /* Target buffer */
97 apr_size_t tbuf_size
; /* Allocated target buffer space */
99 apr_md5_ctx_t md5_context
; /* Leads to result_digest below. */
100 unsigned char *result_digest
; /* MD5 digest of resultant fulltext;
101 must point to at least APR_MD5_DIGESTSIZE
104 const char *error_info
; /* Optional extra info for error returns. */
109 svn_txdelta_window_t
*
110 svn_txdelta__make_window(const svn_txdelta__ops_baton_t
*build_baton
,
113 svn_txdelta_window_t
*window
;
114 svn_string_t
*new_data
= apr_palloc(pool
, sizeof(*new_data
));
116 window
= apr_palloc(pool
, sizeof(*window
));
117 window
->sview_offset
= 0;
118 window
->sview_len
= 0;
119 window
->tview_len
= 0;
121 window
->num_ops
= build_baton
->num_ops
;
122 window
->src_ops
= build_baton
->src_ops
;
123 window
->ops
= build_baton
->ops
;
125 /* just copy the fields over, rather than alloc/copying into a whole new
126 svn_string_t structure. */
127 /* ### would be much nicer if window->new_data were not a ptr... */
128 new_data
->data
= build_baton
->new_data
->data
;
129 new_data
->len
= build_baton
->new_data
->len
;
130 window
->new_data
= new_data
;
136 /* Compute and return a delta window using the xdelta algorithm on
137 DATA, which contains SOURCE_LEN bytes of source data and TARGET_LEN
138 bytes of target data. SOURCE_OFFSET gives the offset of the source
139 data, and is simply copied into the window's sview_offset field. */
140 static svn_txdelta_window_t
*
141 compute_window(const char *data
, apr_size_t source_len
, apr_size_t target_len
,
142 svn_filesize_t source_offset
, apr_pool_t
*pool
)
144 svn_txdelta__ops_baton_t build_baton
= { 0 };
145 svn_txdelta_window_t
*window
;
147 /* Compute the delta operations. */
148 build_baton
.new_data
= svn_stringbuf_create("", pool
);
151 svn_txdelta__insert_op(&build_baton
, svn_txdelta_new
, 0, target_len
, data
,
154 svn_txdelta__xdelta(&build_baton
, data
, source_len
, target_len
, pool
);
156 /* Create and return the delta window. */
157 window
= svn_txdelta__make_window(&build_baton
, pool
);
158 window
->sview_offset
= source_offset
;
159 window
->sview_len
= source_len
;
160 window
->tview_len
= target_len
;
166 svn_txdelta_window_t
*
167 svn_txdelta_window_dup(const svn_txdelta_window_t
*window
,
170 svn_txdelta__ops_baton_t build_baton
= { 0 };
171 svn_txdelta_window_t
*new_window
;
172 const apr_size_t ops_size
= (window
->num_ops
* sizeof(*build_baton
.ops
));
174 build_baton
.num_ops
= window
->num_ops
;
175 build_baton
.src_ops
= window
->src_ops
;
176 build_baton
.ops_size
= window
->num_ops
;
177 build_baton
.ops
= apr_palloc(pool
, ops_size
);
178 memcpy(build_baton
.ops
, window
->ops
, ops_size
);
179 build_baton
.new_data
=
180 svn_stringbuf_create_from_string(window
->new_data
, pool
);
182 new_window
= svn_txdelta__make_window(&build_baton
, pool
);
183 new_window
->sview_offset
= window
->sview_offset
;
184 new_window
->sview_len
= window
->sview_len
;
185 new_window
->tview_len
= window
->tview_len
;
189 /* This is a private interlibrary compatibility wrapper. */
190 svn_txdelta_window_t
*
191 svn_txdelta__copy_window(const svn_txdelta_window_t
*window
,
193 svn_txdelta_window_t
*
194 svn_txdelta__copy_window(const svn_txdelta_window_t
*window
,
197 return svn_txdelta_window_dup(window
, pool
);
201 /* Insert a delta op into a delta window. */
204 svn_txdelta__insert_op(svn_txdelta__ops_baton_t
*build_baton
,
205 enum svn_delta_action opcode
,
208 const char *new_data
,
211 svn_txdelta_op_t
*op
;
213 /* Check if this op can be merged with the previous op. The delta
214 combiner sometimes generates such ops, and this is the obvious
215 place to make the check. */
216 if (build_baton
->num_ops
> 0)
218 op
= &build_baton
->ops
[build_baton
->num_ops
- 1];
219 if (op
->action_code
== opcode
220 && (opcode
== svn_txdelta_new
221 || op
->offset
+ op
->length
== offset
))
223 op
->length
+= length
;
224 if (opcode
== svn_txdelta_new
)
225 svn_stringbuf_appendbytes(build_baton
->new_data
,
231 /* Create space for the new op. */
232 if (build_baton
->num_ops
== build_baton
->ops_size
)
234 svn_txdelta_op_t
*const old_ops
= build_baton
->ops
;
235 int const new_ops_size
= (build_baton
->ops_size
== 0
236 ? 16 : 2 * build_baton
->ops_size
);
238 apr_palloc(pool
, new_ops_size
* sizeof(*build_baton
->ops
));
240 /* Copy any existing ops into the new array */
242 memcpy(build_baton
->ops
, old_ops
,
243 build_baton
->ops_size
* sizeof(*build_baton
->ops
));
244 build_baton
->ops_size
= new_ops_size
;
247 /* Insert the op. svn_delta_source and svn_delta_target are
248 just inserted. For svn_delta_new, the new data must be
249 copied into the window. */
250 op
= &build_baton
->ops
[build_baton
->num_ops
];
253 case svn_txdelta_source
:
254 ++build_baton
->src_ops
;
256 case svn_txdelta_target
:
257 op
->action_code
= opcode
;
261 case svn_txdelta_new
:
262 op
->action_code
= opcode
;
263 op
->offset
= build_baton
->new_data
->len
;
265 svn_stringbuf_appendbytes(build_baton
->new_data
, new_data
, length
);
268 assert(!"unknown delta op.");
271 ++build_baton
->num_ops
;
276 /* Generic delta stream functions. */
278 svn_txdelta_stream_t
*
279 svn_txdelta_stream_create(void *baton
,
280 svn_txdelta_next_window_fn_t next_window
,
281 svn_txdelta_md5_digest_fn_t md5_digest
,
284 svn_txdelta_stream_t
*stream
= apr_palloc(pool
, sizeof(*stream
));
286 stream
->baton
= baton
;
287 stream
->next_window
= next_window
;
288 stream
->md5_digest
= md5_digest
;
294 svn_txdelta_next_window(svn_txdelta_window_t
**window
,
295 svn_txdelta_stream_t
*stream
,
298 return stream
->next_window(window
, stream
->baton
, pool
);
301 const unsigned char *
302 svn_txdelta_md5_digest(svn_txdelta_stream_t
*stream
)
304 return stream
->md5_digest(stream
->baton
);
310 txdelta_next_window(svn_txdelta_window_t
**window
,
314 struct txdelta_baton
*b
= baton
;
315 apr_size_t source_len
= SVN_DELTA_WINDOW_SIZE
;
316 apr_size_t target_len
= SVN_DELTA_WINDOW_SIZE
;
318 /* Read the source stream. */
321 SVN_ERR(svn_stream_read(b
->source
, b
->buf
, &source_len
));
322 b
->more_source
= (source_len
== SVN_DELTA_WINDOW_SIZE
);
327 /* Read the target stream. */
328 SVN_ERR(svn_stream_read(b
->target
, b
->buf
+ source_len
,
330 b
->pos
+= source_len
;
332 /* ### The apr_md5 functions always return APR_SUCCESS. At one
333 point, we proposed to APR folks that the interfaces change to
334 return void, but for some people that was apparently not a good
335 idea, and we didn't bother pressing the matter. In the meantime,
336 we just ignore their return values below. */
339 /* No target data? We're done; return the final window. */
340 apr_md5_final(b
->digest
, &(b
->context
));
347 apr_md5_update(&(b
->context
), b
->buf
+ source_len
,
351 *window
= compute_window(b
->buf
, source_len
, target_len
,
352 b
->pos
- source_len
, pool
);
359 static const unsigned char *
360 txdelta_md5_digest(void *baton
)
362 struct txdelta_baton
*b
= baton
;
363 /* If there are more windows for this stream, the digest has not yet
373 svn_txdelta(svn_txdelta_stream_t
**stream
,
374 svn_stream_t
*source
,
375 svn_stream_t
*target
,
378 struct txdelta_baton
*b
= apr_palloc(pool
, sizeof(*b
));
381 b
->more_source
= TRUE
;
384 b
->buf
= apr_palloc(pool
, 2 * SVN_DELTA_WINDOW_SIZE
);
386 /* Initialize MD5 digest calculation. */
387 apr_md5_init(&(b
->context
));
389 *stream
= svn_txdelta_stream_create(b
, txdelta_next_window
,
390 txdelta_md5_digest
, pool
);
395 /* Functions for implementing a "target push" delta. */
397 /* This is the write handler for a target-push delta stream. It reads
398 * source data, buffers target data, and fires off delta windows when
399 * the target data buffer is full. */
401 tpush_write_handler(void *baton
, const char *data
, apr_size_t
*len
)
403 struct tpush_baton
*tb
= baton
;
404 apr_size_t chunk_len
, data_len
= *len
;
405 apr_pool_t
*pool
= svn_pool_create(tb
->pool
);
406 svn_txdelta_window_t
*window
;
410 svn_pool_clear(pool
);
412 /* Make sure we're all full up on source data, if possible. */
413 if (tb
->source_len
== 0 && !tb
->source_done
)
415 tb
->source_len
= SVN_DELTA_WINDOW_SIZE
;
416 SVN_ERR(svn_stream_read(tb
->source
, tb
->buf
, &tb
->source_len
));
417 if (tb
->source_len
< SVN_DELTA_WINDOW_SIZE
)
418 tb
->source_done
= TRUE
;
421 /* Copy in the target data, up to SVN_DELTA_WINDOW_SIZE. */
422 chunk_len
= SVN_DELTA_WINDOW_SIZE
- tb
->target_len
;
423 if (chunk_len
> data_len
)
424 chunk_len
= data_len
;
425 memcpy(tb
->buf
+ tb
->source_len
+ tb
->target_len
, data
, chunk_len
);
427 data_len
-= chunk_len
;
428 tb
->target_len
+= chunk_len
;
430 /* If we're full of target data, compute and fire off a window. */
431 if (tb
->target_len
== SVN_DELTA_WINDOW_SIZE
)
433 window
= compute_window(tb
->buf
, tb
->source_len
, tb
->target_len
,
434 tb
->source_offset
, pool
);
435 SVN_ERR(tb
->wh(window
, tb
->whb
));
436 tb
->source_offset
+= tb
->source_len
;
442 svn_pool_destroy(pool
);
447 /* This is the close handler for a target-push delta stream. It sends
448 * a final window if there is any buffered target data, and then sends
449 * a NULL window signifying the end of the window stream. */
451 tpush_close_handler(void *baton
)
453 struct tpush_baton
*tb
= baton
;
454 svn_txdelta_window_t
*window
;
456 /* Send a final window if we have any residual target data. */
457 if (tb
->target_len
> 0)
459 window
= compute_window(tb
->buf
, tb
->source_len
, tb
->target_len
,
460 tb
->source_offset
, tb
->pool
);
461 SVN_ERR(tb
->wh(window
, tb
->whb
));
464 /* Send a final NULL window signifying the end. */
465 SVN_ERR(tb
->wh(NULL
, tb
->whb
));
471 svn_txdelta_target_push(svn_txdelta_window_handler_t handler
,
472 void *handler_baton
, svn_stream_t
*source
,
475 struct tpush_baton
*tb
;
476 svn_stream_t
*stream
;
478 /* Initialize baton. */
479 tb
= apr_palloc(pool
, sizeof(*tb
));
482 tb
->whb
= handler_baton
;
484 tb
->buf
= apr_palloc(pool
, 2 * SVN_DELTA_WINDOW_SIZE
);
485 tb
->source_offset
= 0;
487 tb
->source_done
= FALSE
;
490 /* Create and return writable stream. */
491 stream
= svn_stream_create(tb
, pool
);
492 svn_stream_set_write(stream
, tpush_write_handler
);
493 svn_stream_set_close(stream
, tpush_close_handler
);
499 /* Functions for applying deltas. */
501 /* Ensure that BUF has enough space for VIEW_LEN bytes. */
502 static APR_INLINE
void
503 size_buffer(char **buf
, apr_size_t
*buf_size
,
504 apr_size_t view_len
, apr_pool_t
*pool
)
506 if (view_len
> *buf_size
)
509 if (*buf_size
< view_len
)
510 *buf_size
= view_len
;
511 *buf
= apr_palloc(pool
, *buf_size
);
517 svn_txdelta_apply_instructions(svn_txdelta_window_t
*window
,
518 const char *sbuf
, char *tbuf
,
521 const svn_txdelta_op_t
*op
;
522 apr_size_t i
, j
, tpos
= 0;
524 for (op
= window
->ops
; op
< window
->ops
+ window
->num_ops
; op
++)
526 const apr_size_t buf_len
= (op
->length
< *tlen
- tpos
527 ? op
->length
: *tlen
- tpos
);
529 /* Check some invariants common to all instructions. */
530 assert(tpos
+ op
->length
<= window
->tview_len
);
532 switch (op
->action_code
)
534 case svn_txdelta_source
:
535 /* Copy from source area. */
536 assert(op
->offset
+ op
->length
<= window
->sview_len
);
537 memcpy(tbuf
+ tpos
, sbuf
+ op
->offset
, buf_len
);
540 case svn_txdelta_target
:
541 /* Copy from target area. Don't use memcpy() since its
542 semantics aren't guaranteed for overlapping memory areas,
543 and target copies are allowed to overlap to generate
545 assert(op
->offset
< tpos
);
546 for (i
= op
->offset
, j
= tpos
; i
< op
->offset
+ buf_len
; i
++)
550 case svn_txdelta_new
:
551 /* Copy from window new area. */
552 assert(op
->offset
+ op
->length
<= window
->new_data
->len
);
554 window
->new_data
->data
+ op
->offset
,
559 assert(!"Invalid delta instruction code");
564 return; /* The buffer is full. */
567 /* Check that we produced the right amount of data. */
568 assert(tpos
== window
->tview_len
);
572 /* This is a private interlibrary compatibility wrapper. */
574 svn_txdelta__apply_instructions(svn_txdelta_window_t
*window
,
575 const char *sbuf
, char *tbuf
,
578 svn_txdelta__apply_instructions(svn_txdelta_window_t
*window
,
579 const char *sbuf
, char *tbuf
,
582 svn_txdelta_apply_instructions(window
, sbuf
, tbuf
, tlen
);
586 /* Apply WINDOW to the streams given by APPL. */
588 apply_window(svn_txdelta_window_t
*window
, void *baton
)
590 struct apply_baton
*ab
= (struct apply_baton
*) baton
;
596 /* We're done; just clean up. */
597 if (ab
->result_digest
)
598 apr_md5_final(ab
->result_digest
, &(ab
->md5_context
));
600 err
= svn_stream_close(ab
->target
);
601 svn_pool_destroy(ab
->pool
);
606 /* Make sure the source view didn't slide backwards. */
607 assert(window
->sview_len
== 0
608 || (window
->sview_offset
>= ab
->sbuf_offset
609 && (window
->sview_offset
+ window
->sview_len
610 >= ab
->sbuf_offset
+ ab
->sbuf_len
)));
612 /* Make sure there's enough room in the target buffer. */
613 size_buffer(&ab
->tbuf
, &ab
->tbuf_size
, window
->tview_len
, ab
->pool
);
615 /* Prepare the source buffer for reading from the input stream. */
616 if (window
->sview_offset
!= ab
->sbuf_offset
617 || window
->sview_len
> ab
->sbuf_size
)
619 char *old_sbuf
= ab
->sbuf
;
621 /* Make sure there's enough room. */
622 size_buffer(&ab
->sbuf
, &ab
->sbuf_size
, window
->sview_len
, ab
->pool
);
624 /* If the existing view overlaps with the new view, copy the
625 * overlap to the beginning of the new buffer. */
626 if (ab
->sbuf_offset
+ ab
->sbuf_len
> window
->sview_offset
)
629 (apr_size_t
)(window
->sview_offset
- ab
->sbuf_offset
);
630 memmove(ab
->sbuf
, old_sbuf
+ start
, ab
->sbuf_len
- start
);
631 ab
->sbuf_len
-= start
;
635 ab
->sbuf_offset
= window
->sview_offset
;
638 /* Read the remainder of the source view into the buffer. */
639 if (ab
->sbuf_len
< window
->sview_len
)
641 len
= window
->sview_len
- ab
->sbuf_len
;
642 err
= svn_stream_read(ab
->source
, ab
->sbuf
+ ab
->sbuf_len
, &len
);
643 if (err
== SVN_NO_ERROR
&& len
!= window
->sview_len
- ab
->sbuf_len
)
644 err
= svn_error_create(SVN_ERR_INCOMPLETE_DATA
, NULL
,
645 "Delta source ended unexpectedly");
646 if (err
!= SVN_NO_ERROR
)
648 ab
->sbuf_len
= window
->sview_len
;
651 /* Apply the window instructions to the source view to generate
653 len
= window
->tview_len
;
654 svn_txdelta_apply_instructions(window
, ab
->sbuf
, ab
->tbuf
, &len
);
655 assert(len
== window
->tview_len
);
657 /* Write out the output. */
659 /* ### We've also considered just adding two (optionally null)
660 arguments to svn_stream_create(): read_checksum and
661 write_checksum. Then instead of every caller updating an md5
662 context when it calls svn_stream_write() or svn_stream_read(),
663 streams would do it automatically, and verify the checksum in
664 svn_stream_closed(). But this might be overkill for issue #689;
665 so for now we just update the context here. */
666 if (ab
->result_digest
)
667 apr_md5_update(&(ab
->md5_context
), ab
->tbuf
, len
);
669 return svn_stream_write(ab
->target
, ab
->tbuf
, &len
);
674 svn_txdelta_apply(svn_stream_t
*source
,
675 svn_stream_t
*target
,
676 unsigned char *result_digest
,
677 const char *error_info
,
679 svn_txdelta_window_handler_t
*handler
,
680 void **handler_baton
)
682 apr_pool_t
*subpool
= svn_pool_create(pool
);
683 struct apply_baton
*ab
;
685 ab
= apr_palloc(subpool
, sizeof(*ab
));
695 ab
->result_digest
= result_digest
;
698 apr_md5_init(&(ab
->md5_context
));
701 ab
->error_info
= apr_pstrdup(subpool
, error_info
);
703 ab
->error_info
= NULL
;
705 *handler
= apply_window
;
711 /* Convenience routines */
714 svn_txdelta_send_string(const svn_string_t
*string
,
715 svn_txdelta_window_handler_t handler
,
719 svn_txdelta_window_t window
= { 0 };
722 /* Build a single `new' op */
723 op
.action_code
= svn_txdelta_new
;
725 op
.length
= string
->len
;
727 /* Build a single window containing a ptr to the string. */
728 window
.tview_len
= string
->len
;
731 window
.new_data
= string
;
733 /* Push the one window at the handler. */
734 SVN_ERR((*handler
)(&window
, handler_baton
));
736 /* Push a NULL at the handler, because we're done. */
737 SVN_ERR((*handler
)(NULL
, handler_baton
));
742 svn_error_t
*svn_txdelta_send_stream(svn_stream_t
*stream
,
743 svn_txdelta_window_handler_t handler
,
745 unsigned char *digest
,
748 svn_txdelta_stream_t
*txstream
;
751 /* ### this is a hack. we should simply read from the stream, construct
752 ### some windows, and pass those to the handler. there isn't any reason
753 ### to crank up a full "diff" algorithm just to copy a stream.
757 /* Create a delta stream which converts an *empty* bytestream into the
758 target bytestream. */
759 svn_txdelta(&txstream
, svn_stream_empty(pool
), stream
, pool
);
760 err
= svn_txdelta_send_txstream(txstream
, handler
, handler_baton
, pool
);
762 if (digest
&& (! err
))
764 const unsigned char *result_md5
;
765 result_md5
= svn_txdelta_md5_digest(txstream
);
766 /* Since err is null, result_md5 "cannot" be null. */
767 memcpy(digest
, result_md5
, APR_MD5_DIGESTSIZE
);
773 svn_error_t
*svn_txdelta_send_txstream(svn_txdelta_stream_t
*txstream
,
774 svn_txdelta_window_handler_t handler
,
778 svn_txdelta_window_t
*window
;
780 /* create a pool just for the windows */
781 apr_pool_t
*wpool
= svn_pool_create(pool
);
785 /* free the window (if any) */
786 svn_pool_clear(wpool
);
788 /* read in a single delta window */
789 SVN_ERR(svn_txdelta_next_window(&window
, txstream
, wpool
));
791 /* shove it at the handler */
792 SVN_ERR((*handler
)(window
, handler_baton
));
794 while (window
!= NULL
);
796 svn_pool_destroy(wpool
);