2 * text-delta.c -- Internal text delta representation
4 * ====================================================================
5 * Copyright (c) 2000-2006 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
23 #include <apr_general.h> /* for APR_INLINE */
24 #include <apr_md5.h> /* for, um...MD5 stuff */
26 #include "svn_delta.h"
28 #include "svn_pools.h"
32 /* Text delta stream descriptor. */
34 struct svn_txdelta_stream_t
{
35 /* Copied from parameters to svn_txdelta_stream_create. */
37 svn_txdelta_next_window_fn_t next_window
;
38 svn_txdelta_md5_digest_fn_t md5_digest
;
41 /* Delta stream baton. */
42 struct txdelta_baton
{
43 /* These are copied from parameters passed to svn_txdelta. */
48 svn_boolean_t more_source
; /* FALSE if source stream hit EOF. */
49 svn_boolean_t more
; /* TRUE if there are more data in the pool. */
50 svn_filesize_t pos
; /* Offset of next read in source file. */
51 char *buf
; /* Buffer for vdelta data. */
53 apr_md5_ctx_t context
; /* APR's MD5 context container. */
55 /* Calculated digest from MD5 operations.
56 NOTE: This is only valid after this stream has returned the NULL
58 unsigned char digest
[APR_MD5_DIGESTSIZE
];
62 /* Target-push stream descriptor. */
65 /* These are copied from parameters passed to svn_txdelta_target_push. */
67 svn_txdelta_window_handler_t wh
;
73 svn_filesize_t source_offset
;
74 apr_size_t source_len
;
75 svn_boolean_t source_done
;
76 apr_size_t target_len
;
80 /* Text delta applicator. */
83 /* These are copied from parameters passed to svn_txdelta_apply. */
87 /* Private data. Between calls, SBUF contains the data from the
88 * last window's source view, as specified by SBUF_OFFSET and
89 * SBUF_LEN. The contents of TBUF are not interesting between
91 apr_pool_t
*pool
; /* Pool to allocate data from */
92 char *sbuf
; /* Source buffer */
93 apr_size_t sbuf_size
; /* Allocated source buffer space */
94 svn_filesize_t sbuf_offset
; /* Offset of SBUF data in source stream */
95 apr_size_t sbuf_len
; /* Length of SBUF data */
96 char *tbuf
; /* Target buffer */
97 apr_size_t tbuf_size
; /* Allocated target buffer space */
99 apr_md5_ctx_t md5_context
; /* Leads to result_digest below. */
100 unsigned char *result_digest
; /* MD5 digest of resultant fulltext;
101 must point to at least APR_MD5_DIGESTSIZE
104 const char *error_info
; /* Optional extra info for error returns. */
109 svn_txdelta_window_t
*
110 svn_txdelta__make_window(const svn_txdelta__ops_baton_t
*build_baton
,
113 svn_txdelta_window_t
*window
;
114 svn_string_t
*new_data
= apr_palloc(pool
, sizeof(*new_data
));
116 window
= apr_palloc(pool
, sizeof(*window
));
117 window
->sview_offset
= 0;
118 window
->sview_len
= 0;
119 window
->tview_len
= 0;
121 window
->num_ops
= build_baton
->num_ops
;
122 window
->src_ops
= build_baton
->src_ops
;
123 window
->ops
= build_baton
->ops
;
125 /* just copy the fields over, rather than alloc/copying into a whole new
126 svn_string_t structure. */
127 /* ### would be much nicer if window->new_data were not a ptr... */
128 new_data
->data
= build_baton
->new_data
->data
;
129 new_data
->len
= build_baton
->new_data
->len
;
130 window
->new_data
= new_data
;
136 /* Compute and return a delta window using the vdelta or xdelta algorithm on
137 DATA, which contains SOURCE_LEN bytes of source data and TARGET_LEN
138 bytes of target data. SOURCE_OFFSET gives the offset of the source
139 data, and is simply copied into the window's sview_offset field. */
140 static svn_txdelta_window_t
*
141 compute_window(const char *data
, apr_size_t source_len
, apr_size_t target_len
,
142 svn_filesize_t source_offset
, apr_pool_t
*pool
)
144 svn_txdelta__ops_baton_t build_baton
= { 0 };
145 svn_txdelta_window_t
*window
;
147 /* Compute the delta operations. */
148 build_baton
.new_data
= svn_stringbuf_create("", pool
);
151 svn_txdelta__vdelta(&build_baton
, data
, source_len
, target_len
, pool
);
153 svn_txdelta__xdelta(&build_baton
, data
, source_len
, target_len
, pool
);
155 /* Create and return the delta window. */
156 window
= svn_txdelta__make_window(&build_baton
, pool
);
157 window
->sview_offset
= source_offset
;
158 window
->sview_len
= source_len
;
159 window
->tview_len
= target_len
;
165 svn_txdelta_window_t
*
166 svn_txdelta_window_dup(const svn_txdelta_window_t
*window
,
169 svn_txdelta__ops_baton_t build_baton
= { 0 };
170 svn_txdelta_window_t
*new_window
;
171 const apr_size_t ops_size
= (window
->num_ops
* sizeof(*build_baton
.ops
));
173 build_baton
.num_ops
= window
->num_ops
;
174 build_baton
.src_ops
= window
->src_ops
;
175 build_baton
.ops_size
= window
->num_ops
;
176 build_baton
.ops
= apr_palloc(pool
, ops_size
);
177 memcpy(build_baton
.ops
, window
->ops
, ops_size
);
178 build_baton
.new_data
=
179 svn_stringbuf_create_from_string(window
->new_data
, pool
);
181 new_window
= svn_txdelta__make_window(&build_baton
, pool
);
182 new_window
->sview_offset
= window
->sview_offset
;
183 new_window
->sview_len
= window
->sview_len
;
184 new_window
->tview_len
= window
->tview_len
;
188 /* This is a private interlibrary compatibility wrapper. */
189 svn_txdelta_window_t
*
190 svn_txdelta__copy_window(const svn_txdelta_window_t
*window
,
192 svn_txdelta_window_t
*
193 svn_txdelta__copy_window(const svn_txdelta_window_t
*window
,
196 return svn_txdelta_window_dup(window
, pool
);
200 /* Insert a delta op into a delta window. */
203 svn_txdelta__insert_op(svn_txdelta__ops_baton_t
*build_baton
,
204 enum svn_delta_action opcode
,
207 const char *new_data
,
210 svn_txdelta_op_t
*op
;
212 /* Check if this op can be merged with the previous op. The vdelta
213 algorithm will never generate such ops, but the delta combiner
214 can, and this is the obvious place to make the check. */
215 if (build_baton
->num_ops
> 0)
217 op
= &build_baton
->ops
[build_baton
->num_ops
- 1];
218 if (op
->action_code
== opcode
219 && (opcode
== svn_txdelta_new
220 || op
->offset
+ op
->length
== offset
))
222 op
->length
+= length
;
223 if (opcode
== svn_txdelta_new
)
224 svn_stringbuf_appendbytes(build_baton
->new_data
,
230 /* Create space for the new op. */
231 if (build_baton
->num_ops
== build_baton
->ops_size
)
233 svn_txdelta_op_t
*const old_ops
= build_baton
->ops
;
234 int const new_ops_size
= (build_baton
->ops_size
== 0
235 ? 16 : 2 * build_baton
->ops_size
);
237 apr_palloc(pool
, new_ops_size
* sizeof(*build_baton
->ops
));
239 /* Copy any existing ops into the new array */
241 memcpy(build_baton
->ops
, old_ops
,
242 build_baton
->ops_size
* sizeof(*build_baton
->ops
));
243 build_baton
->ops_size
= new_ops_size
;
246 /* Insert the op. svn_delta_source and svn_delta_target are
247 just inserted. For svn_delta_new, the new data must be
248 copied into the window. */
249 op
= &build_baton
->ops
[build_baton
->num_ops
];
252 case svn_txdelta_source
:
253 ++build_baton
->src_ops
;
255 case svn_txdelta_target
:
256 op
->action_code
= opcode
;
260 case svn_txdelta_new
:
261 op
->action_code
= opcode
;
262 op
->offset
= build_baton
->new_data
->len
;
264 svn_stringbuf_appendbytes(build_baton
->new_data
, new_data
, length
);
267 assert(!"unknown delta op.");
270 ++build_baton
->num_ops
;
275 /* Generic delta stream functions. */
277 svn_txdelta_stream_t
*
278 svn_txdelta_stream_create(void *baton
,
279 svn_txdelta_next_window_fn_t next_window
,
280 svn_txdelta_md5_digest_fn_t md5_digest
,
283 svn_txdelta_stream_t
*stream
= apr_palloc(pool
, sizeof(*stream
));
285 stream
->baton
= baton
;
286 stream
->next_window
= next_window
;
287 stream
->md5_digest
= md5_digest
;
293 svn_txdelta_next_window(svn_txdelta_window_t
**window
,
294 svn_txdelta_stream_t
*stream
,
297 return stream
->next_window(window
, stream
->baton
, pool
);
300 const unsigned char *
301 svn_txdelta_md5_digest(svn_txdelta_stream_t
*stream
)
303 return stream
->md5_digest(stream
->baton
);
309 txdelta_next_window(svn_txdelta_window_t
**window
,
313 struct txdelta_baton
*b
= baton
;
314 apr_size_t source_len
= SVN_DELTA_WINDOW_SIZE
;
315 apr_size_t target_len
= SVN_DELTA_WINDOW_SIZE
;
317 /* Read the source stream. */
320 SVN_ERR(svn_stream_read(b
->source
, b
->buf
, &source_len
));
321 b
->more_source
= (source_len
== SVN_DELTA_WINDOW_SIZE
);
326 /* Read the target stream. */
327 SVN_ERR(svn_stream_read(b
->target
, b
->buf
+ source_len
,
329 b
->pos
+= source_len
;
331 /* ### The apr_md5 functions always return APR_SUCCESS. At one
332 point, we proposed to APR folks that the interfaces change to
333 return void, but for some people that was apparently not a good
334 idea, and we didn't bother pressing the matter. In the meantime,
335 we just ignore their return values below. */
338 /* No target data? We're done; return the final window. */
339 apr_md5_final(b
->digest
, &(b
->context
));
346 apr_md5_update(&(b
->context
), b
->buf
+ source_len
,
350 *window
= compute_window(b
->buf
, source_len
, target_len
,
351 b
->pos
- source_len
, pool
);
358 static const unsigned char *
359 txdelta_md5_digest(void *baton
)
361 struct txdelta_baton
*b
= baton
;
362 /* If there are more windows for this stream, the digest has not yet
372 svn_txdelta(svn_txdelta_stream_t
**stream
,
373 svn_stream_t
*source
,
374 svn_stream_t
*target
,
377 struct txdelta_baton
*b
= apr_palloc(pool
, sizeof(*b
));
380 b
->more_source
= TRUE
;
383 b
->buf
= apr_palloc(pool
, 2 * SVN_DELTA_WINDOW_SIZE
);
385 /* Initialize MD5 digest calculation. */
386 apr_md5_init(&(b
->context
));
388 *stream
= svn_txdelta_stream_create(b
, txdelta_next_window
,
389 txdelta_md5_digest
, pool
);
394 /* Functions for implementing a "target push" delta. */
396 /* This is the write handler for a target-push delta stream. It reads
397 * source data, buffers target data, and fires off delta windows when
398 * the target data buffer is full. */
400 tpush_write_handler(void *baton
, const char *data
, apr_size_t
*len
)
402 struct tpush_baton
*tb
= baton
;
403 apr_size_t chunk_len
, data_len
= *len
;
404 apr_pool_t
*pool
= svn_pool_create(tb
->pool
);
405 svn_txdelta_window_t
*window
;
409 svn_pool_clear(pool
);
411 /* Make sure we're all full up on source data, if possible. */
412 if (tb
->source_len
== 0 && !tb
->source_done
)
414 tb
->source_len
= SVN_DELTA_WINDOW_SIZE
;
415 SVN_ERR(svn_stream_read(tb
->source
, tb
->buf
, &tb
->source_len
));
416 if (tb
->source_len
< SVN_DELTA_WINDOW_SIZE
)
417 tb
->source_done
= TRUE
;
420 /* Copy in the target data, up to SVN_DELTA_WINDOW_SIZE. */
421 chunk_len
= SVN_DELTA_WINDOW_SIZE
- tb
->target_len
;
422 if (chunk_len
> data_len
)
423 chunk_len
= data_len
;
424 memcpy(tb
->buf
+ tb
->source_len
+ tb
->target_len
, data
, chunk_len
);
426 data_len
-= chunk_len
;
427 tb
->target_len
+= chunk_len
;
429 /* If we're full of target data, compute and fire off a window. */
430 if (tb
->target_len
== SVN_DELTA_WINDOW_SIZE
)
432 window
= compute_window(tb
->buf
, tb
->source_len
, tb
->target_len
,
433 tb
->source_offset
, pool
);
434 SVN_ERR(tb
->wh(window
, tb
->whb
));
435 tb
->source_offset
+= tb
->source_len
;
441 svn_pool_destroy(pool
);
446 /* This is the close handler for a target-push delta stream. It sends
447 * a final window if there is any buffered target data, and then sends
448 * a NULL window signifying the end of the window stream. */
450 tpush_close_handler(void *baton
)
452 struct tpush_baton
*tb
= baton
;
453 svn_txdelta_window_t
*window
;
455 /* Send a final window if we have any residual target data. */
456 if (tb
->target_len
> 0)
458 window
= compute_window(tb
->buf
, tb
->source_len
, tb
->target_len
,
459 tb
->source_offset
, tb
->pool
);
460 SVN_ERR(tb
->wh(window
, tb
->whb
));
463 /* Send a final NULL window signifying the end. */
464 SVN_ERR(tb
->wh(NULL
, tb
->whb
));
470 svn_txdelta_target_push(svn_txdelta_window_handler_t handler
,
471 void *handler_baton
, svn_stream_t
*source
,
474 struct tpush_baton
*tb
;
475 svn_stream_t
*stream
;
477 /* Initialize baton. */
478 tb
= apr_palloc(pool
, sizeof(*tb
));
481 tb
->whb
= handler_baton
;
483 tb
->buf
= apr_palloc(pool
, 2 * SVN_DELTA_WINDOW_SIZE
);
484 tb
->source_offset
= 0;
486 tb
->source_done
= FALSE
;
489 /* Create and return writable stream. */
490 stream
= svn_stream_create(tb
, pool
);
491 svn_stream_set_write(stream
, tpush_write_handler
);
492 svn_stream_set_close(stream
, tpush_close_handler
);
498 /* Functions for applying deltas. */
500 /* Ensure that BUF has enough space for VIEW_LEN bytes. */
501 static APR_INLINE
void
502 size_buffer(char **buf
, apr_size_t
*buf_size
,
503 apr_size_t view_len
, apr_pool_t
*pool
)
505 if (view_len
> *buf_size
)
508 if (*buf_size
< view_len
)
509 *buf_size
= view_len
;
510 *buf
= apr_palloc(pool
, *buf_size
);
516 svn_txdelta_apply_instructions(svn_txdelta_window_t
*window
,
517 const char *sbuf
, char *tbuf
,
520 const svn_txdelta_op_t
*op
;
521 apr_size_t i
, j
, tpos
= 0;
523 for (op
= window
->ops
; op
< window
->ops
+ window
->num_ops
; op
++)
525 const apr_size_t buf_len
= (op
->length
< *tlen
- tpos
526 ? op
->length
: *tlen
- tpos
);
528 /* Check some invariants common to all instructions. */
529 assert(tpos
+ op
->length
<= window
->tview_len
);
531 switch (op
->action_code
)
533 case svn_txdelta_source
:
534 /* Copy from source area. */
535 assert(op
->offset
+ op
->length
<= window
->sview_len
);
536 memcpy(tbuf
+ tpos
, sbuf
+ op
->offset
, buf_len
);
539 case svn_txdelta_target
:
540 /* Copy from target area. Don't use memcpy() since its
541 semantics aren't guaranteed for overlapping memory areas,
542 and target copies are allowed to overlap to generate
544 assert(op
->offset
< tpos
);
545 for (i
= op
->offset
, j
= tpos
; i
< op
->offset
+ buf_len
; i
++)
549 case svn_txdelta_new
:
550 /* Copy from window new area. */
551 assert(op
->offset
+ op
->length
<= window
->new_data
->len
);
553 window
->new_data
->data
+ op
->offset
,
558 assert(!"Invalid delta instruction code");
563 return; /* The buffer is full. */
566 /* Check that we produced the right amount of data. */
567 assert(tpos
== window
->tview_len
);
571 /* This is a private interlibrary compatibility wrapper. */
573 svn_txdelta__apply_instructions(svn_txdelta_window_t
*window
,
574 const char *sbuf
, char *tbuf
,
577 svn_txdelta__apply_instructions(svn_txdelta_window_t
*window
,
578 const char *sbuf
, char *tbuf
,
581 svn_txdelta_apply_instructions(window
, sbuf
, tbuf
, tlen
);
585 /* Apply WINDOW to the streams given by APPL. */
587 apply_window(svn_txdelta_window_t
*window
, void *baton
)
589 struct apply_baton
*ab
= (struct apply_baton
*) baton
;
595 /* We're done; just clean up. */
596 if (ab
->result_digest
)
597 apr_md5_final(ab
->result_digest
, &(ab
->md5_context
));
599 err
= svn_stream_close(ab
->target
);
600 svn_pool_destroy(ab
->pool
);
605 /* Make sure the source view didn't slide backwards. */
606 assert(window
->sview_len
== 0
607 || (window
->sview_offset
>= ab
->sbuf_offset
608 && (window
->sview_offset
+ window
->sview_len
609 >= ab
->sbuf_offset
+ ab
->sbuf_len
)));
611 /* Make sure there's enough room in the target buffer. */
612 size_buffer(&ab
->tbuf
, &ab
->tbuf_size
, window
->tview_len
, ab
->pool
);
614 /* Prepare the source buffer for reading from the input stream. */
615 if (window
->sview_offset
!= ab
->sbuf_offset
616 || window
->sview_len
> ab
->sbuf_size
)
618 char *old_sbuf
= ab
->sbuf
;
620 /* Make sure there's enough room. */
621 size_buffer(&ab
->sbuf
, &ab
->sbuf_size
, window
->sview_len
, ab
->pool
);
623 /* If the existing view overlaps with the new view, copy the
624 * overlap to the beginning of the new buffer. */
625 if (ab
->sbuf_offset
+ ab
->sbuf_len
> window
->sview_offset
)
628 (apr_size_t
)(window
->sview_offset
- ab
->sbuf_offset
);
629 memmove(ab
->sbuf
, old_sbuf
+ start
, ab
->sbuf_len
- start
);
630 ab
->sbuf_len
-= start
;
634 ab
->sbuf_offset
= window
->sview_offset
;
637 /* Read the remainder of the source view into the buffer. */
638 if (ab
->sbuf_len
< window
->sview_len
)
640 len
= window
->sview_len
- ab
->sbuf_len
;
641 err
= svn_stream_read(ab
->source
, ab
->sbuf
+ ab
->sbuf_len
, &len
);
642 if (err
== SVN_NO_ERROR
&& len
!= window
->sview_len
- ab
->sbuf_len
)
643 err
= svn_error_create(SVN_ERR_INCOMPLETE_DATA
, NULL
,
644 "Delta source ended unexpectedly");
645 if (err
!= SVN_NO_ERROR
)
647 ab
->sbuf_len
= window
->sview_len
;
650 /* Apply the window instructions to the source view to generate
652 len
= window
->tview_len
;
653 svn_txdelta_apply_instructions(window
, ab
->sbuf
, ab
->tbuf
, &len
);
654 assert(len
== window
->tview_len
);
656 /* Write out the output. */
658 /* ### We've also considered just adding two (optionally null)
659 arguments to svn_stream_create(): read_checksum and
660 write_checksum. Then instead of every caller updating an md5
661 context when it calls svn_stream_write() or svn_stream_read(),
662 streams would do it automatically, and verify the checksum in
663 svn_stream_closed(). But this might be overkill for issue #689;
664 so for now we just update the context here. */
665 if (ab
->result_digest
)
666 apr_md5_update(&(ab
->md5_context
), ab
->tbuf
, len
);
668 return svn_stream_write(ab
->target
, ab
->tbuf
, &len
);
673 svn_txdelta_apply(svn_stream_t
*source
,
674 svn_stream_t
*target
,
675 unsigned char *result_digest
,
676 const char *error_info
,
678 svn_txdelta_window_handler_t
*handler
,
679 void **handler_baton
)
681 apr_pool_t
*subpool
= svn_pool_create(pool
);
682 struct apply_baton
*ab
;
684 ab
= apr_palloc(subpool
, sizeof(*ab
));
694 ab
->result_digest
= result_digest
;
697 apr_md5_init(&(ab
->md5_context
));
700 ab
->error_info
= apr_pstrdup(subpool
, error_info
);
702 ab
->error_info
= NULL
;
704 *handler
= apply_window
;
710 /* Convenience routines */
713 svn_txdelta_send_string(const svn_string_t
*string
,
714 svn_txdelta_window_handler_t handler
,
718 svn_txdelta_window_t window
= { 0 };
721 /* Build a single `new' op */
722 op
.action_code
= svn_txdelta_new
;
724 op
.length
= string
->len
;
726 /* Build a single window containing a ptr to the string. */
727 window
.tview_len
= string
->len
;
730 window
.new_data
= string
;
732 /* Push the one window at the handler. */
733 SVN_ERR((*handler
)(&window
, handler_baton
));
735 /* Push a NULL at the handler, because we're done. */
736 SVN_ERR((*handler
)(NULL
, handler_baton
));
741 svn_error_t
*svn_txdelta_send_stream(svn_stream_t
*stream
,
742 svn_txdelta_window_handler_t handler
,
744 unsigned char *digest
,
747 svn_txdelta_stream_t
*txstream
;
750 /* ### this is a hack. we should simply read from the stream, construct
751 ### some windows, and pass those to the handler. there isn't any reason
752 ### to crank up a full "diff" algorithm just to copy a stream.
756 /* Create a delta stream which converts an *empty* bytestream into the
757 target bytestream. */
758 svn_txdelta(&txstream
, svn_stream_empty(pool
), stream
, pool
);
759 err
= svn_txdelta_send_txstream(txstream
, handler
, handler_baton
, pool
);
761 if (digest
&& (! err
))
763 const unsigned char *result_md5
;
764 result_md5
= svn_txdelta_md5_digest(txstream
);
765 /* Since err is null, result_md5 "cannot" be null. */
766 memcpy(digest
, result_md5
, APR_MD5_DIGESTSIZE
);
772 svn_error_t
*svn_txdelta_send_txstream(svn_txdelta_stream_t
*txstream
,
773 svn_txdelta_window_handler_t handler
,
777 svn_txdelta_window_t
*window
;
779 /* create a pool just for the windows */
780 apr_pool_t
*wpool
= svn_pool_create(pool
);
784 /* free the window (if any) */
785 svn_pool_clear(wpool
);
787 /* read in a single delta window */
788 SVN_ERR(svn_txdelta_next_window(&window
, txstream
, wpool
));
790 /* shove it at the handler */
791 SVN_ERR((*handler
)(window
, handler_baton
));
793 while (window
!= NULL
);
795 svn_pool_destroy(wpool
);