2 * compose_delta.c: Delta window composition.
4 * ====================================================================
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
22 #include <apr_general.h> /* For APR_INLINE */
24 #include "svn_delta.h"
25 #include "svn_pools.h"
28 /* Define a MIN macro if this platform doesn't already have one. */
30 #define MIN(a, b) ((a) < (b) ? (a) : (b))
34 /* ==================================================================== */
35 /* Support for efficient small-block allocation from pools. */
37 /* The following structs will be allocated and freed often: */
39 /* A node in the range index tree. */
40 typedef struct range_index_node_t range_index_node_t
;
41 struct range_index_node_t
43 /* 'offset' and 'limit' define the range in the source window. */
47 /* 'target_offset' is where that range is represented in the target. */
48 apr_size_t target_offset
;
50 /* 'left' and 'right' link the node into a splay tree. */
51 range_index_node_t
*left
, *right
;
53 /* 'prev' and 'next' link it into an ordered, doubly-linked list. */
54 range_index_node_t
*prev
, *next
;
57 /* A node in a list of ranges for source and target op copies. */
64 typedef struct range_list_node_t range_list_node_t
;
65 struct range_list_node_t
67 /* Where does the range come from?
68 'offset' and 'limit' always refer to the "virtual" source data
69 for the second delta window. For a target range, the actual
70 offset to use for generating the target op is 'target_offset';
71 that field isn't used by source ranges. */
74 /* 'offset' and 'limit' define the range. */
78 /* 'target_offset' is the start of the range in the target. */
79 apr_size_t target_offset
;
81 /* 'prev' and 'next' link the node into an ordered, doubly-linked list. */
82 range_list_node_t
*prev
, *next
;
86 /* This is what will be allocated: */
87 typedef union alloc_block_t alloc_block_t
;
90 range_index_node_t index_node
;
91 range_list_node_t list_node
;
93 /* Links free blocks into a freelist. */
94 alloc_block_t
*next_free
;
98 /* Allocate a block. */
99 static APR_INLINE
void *
100 alloc_block(apr_pool_t
*pool
, alloc_block_t
**free_list
)
102 alloc_block_t
*block
;
103 if (*free_list
== NULL
)
104 block
= apr_palloc(pool
, sizeof(*block
));
108 *free_list
= block
->next_free
;
113 /* Return the block back to the free list. */
114 static APR_INLINE
void
115 free_block(void *ptr
, alloc_block_t
**free_list
)
117 /* Wrapper functions take care of type safety. */
118 alloc_block_t
*const block
= ptr
;
119 block
->next_free
= *free_list
;
125 /* ==================================================================== */
126 /* Mapping offsets in the target streem to txdelta ops. */
128 typedef struct offset_index_t
134 /* Create an index mapping target stream offsets to delta ops in
135 WINDOW. Allocate from POOL. */
137 static offset_index_t
*
138 create_offset_index(const svn_txdelta_window_t
*window
, apr_pool_t
*pool
)
140 offset_index_t
*ndx
= apr_palloc(pool
, sizeof(*ndx
));
141 apr_size_t offset
= 0;
144 ndx
->length
= window
->num_ops
;
145 ndx
->offs
= apr_palloc(pool
, (ndx
->length
+ 1) * sizeof(*ndx
->offs
));
147 for (i
= 0; i
< ndx
->length
; ++i
)
149 ndx
->offs
[i
] = offset
;
150 offset
+= window
->ops
[i
].length
;
152 ndx
->offs
[ndx
->length
] = offset
;
157 /* Find the index of the delta op thet defines that data at OFFSET in
161 search_offset_index(const offset_index_t
*ndx
, apr_size_t offset
)
165 assert(offset
< ndx
->offs
[ndx
->length
]);
167 for (lo
= 0, hi
= ndx
->length
, op
= (lo
+ hi
)/2;
171 const apr_size_t this_offset
= ndx
->offs
[op
];
172 const apr_size_t next_offset
= ndx
->offs
[op
+ 1];
173 if (offset
< this_offset
)
175 else if (offset
> next_offset
)
179 /* this_offset <= offset <= next_offset */
180 if (offset
== next_offset
)
186 assert(ndx
->offs
[op
] <= offset
&& offset
< ndx
->offs
[op
+ 1]);
192 /* ==================================================================== */
193 /* Mapping ranges in the source stream to ranges in the composed delta. */
195 /* The range index tree. */
196 typedef struct range_index_t
198 range_index_node_t
*tree
;
199 alloc_block_t
*free_list
;
203 /* Create a range index tree. Allocate from POOL. */
204 static range_index_t
*
205 create_range_index(apr_pool_t
*pool
)
207 range_index_t
*ndx
= apr_palloc(pool
, sizeof(*ndx
));
210 ndx
->free_list
= NULL
;
214 /* Allocate a node for the range index tree. */
215 static range_index_node_t
*
216 alloc_range_index_node(range_index_t
*ndx
,
219 apr_size_t target_offset
)
221 range_index_node_t
*const node
= alloc_block(ndx
->pool
, &ndx
->free_list
);
222 node
->offset
= offset
;
224 node
->target_offset
= target_offset
;
225 node
->left
= node
->right
= NULL
;
226 node
->prev
= node
->next
= NULL
;
230 /* Free a node from the range index tree. */
232 free_range_index_node(range_index_t
*ndx
, range_index_node_t
*node
)
235 node
->next
->prev
= node
->prev
;
237 node
->prev
->next
= node
->next
;
238 free_block(node
, &ndx
->free_list
);
242 /* Splay the index tree, using OFFSET as the key. */
245 splay_range_index(apr_size_t offset
, range_index_t
*ndx
)
247 range_index_node_t
*tree
= ndx
->tree
;
248 range_index_node_t scratch_node
;
249 range_index_node_t
*left
, *right
;
254 scratch_node
.left
= scratch_node
.right
= NULL
;
255 left
= right
= &scratch_node
;
259 if (offset
< tree
->offset
)
261 if (tree
->left
!= NULL
262 && offset
< tree
->left
->offset
)
265 range_index_node_t
*const node
= tree
->left
;
266 tree
->left
= node
->right
;
270 if (tree
->left
== NULL
)
273 /* Remember the right subtree */
278 else if (offset
> tree
->offset
)
280 if (tree
->right
!= NULL
281 && offset
> tree
->right
->offset
)
284 range_index_node_t
*const node
= tree
->right
;
285 tree
->right
= node
->left
;
289 if (tree
->right
== NULL
)
292 /* Remember the left subtree */
301 /* Link in the left and right subtrees */
302 left
->right
= tree
->left
;
303 right
->left
= tree
->right
;
304 tree
->left
= scratch_node
.right
;
305 tree
->right
= scratch_node
.left
;
307 /* The basic top-down splay is finished, but we may still need to
308 turn the tree around. What we want is to put the node with the
309 largest offset where node->offset <= offset at the top of the
310 tree, so that we can insert the new data (or search for existing
311 ranges) to the right of the root. This makes cleaning up the
312 tree after an insert much simpler, and -- incidentally -- makes
313 the whole range index magic work. */
314 if (offset
< tree
->offset
&& tree
->left
!= NULL
)
316 if (tree
->left
->right
== NULL
)
318 /* A single right rotation is enough. */
319 range_index_node_t
*const node
= tree
->left
;
320 tree
->left
= node
->right
; /* Which is always NULL. */
326 /* Slide down to the rightmost node in the left subtree. */
327 range_index_node_t
**nodep
= &tree
->left
;
328 while ((*nodep
)->right
!= NULL
)
329 nodep
= &(*nodep
)->right
;
331 /* Now move this node to root in one giant promotion. */
336 right
->left
= tree
->right
; /* Which is always NULL, too. */
342 /* Sanity check ... */
343 assert((offset
>= tree
->offset
)
344 || ((tree
->left
== NULL
)
345 && (tree
->prev
== NULL
)));
350 /* Remove all ranges from NDX that fall into the root's range. To
351 keep the range index as small as possible, we must also remove
352 nodes that don't fall into the new range, but have become redundant
353 because the new range overlaps the beginning of the next range.
356 new-range: |-----------------|
357 range-1: |-----------------|
358 range-2: |--------------------|
360 Before new-range was inserted, range-1 and range-2 were both
361 necessary. Now the union of new-range and range-2 completely covers
362 range-1, which has become redundant now.
364 FIXME: But, of course, there's a catch. range-1 must still remain
365 in the tree if we want to optimize the number of target copy ops in
366 the case were a copy falls within range-1, but starts before
367 range-2 and ends after new-range. */
370 delete_subtree(range_index_t
*ndx
, range_index_node_t
*node
)
374 delete_subtree(ndx
, node
->left
);
375 delete_subtree(ndx
, node
->right
);
376 free_range_index_node(ndx
, node
);
381 clean_tree(range_index_t
*ndx
, apr_size_t limit
)
383 apr_size_t top_offset
= limit
+ 1;
384 range_index_node_t
**nodep
= &ndx
->tree
->right
;
385 while (*nodep
!= NULL
)
387 range_index_node_t
*const node
= *nodep
;
388 apr_size_t
const offset
=
389 (node
->right
!= NULL
&& node
->right
->offset
< top_offset
390 ? node
->right
->offset
393 if (node
->limit
<= limit
394 || (node
->offset
< limit
&& offset
< limit
))
396 *nodep
= node
->right
;
398 delete_subtree(ndx
, node
);
402 top_offset
= node
->offset
;
409 /* Add a range [OFFSET, LIMIT) into NDX. If NDX already contains a
410 range that encloses [OFFSET, LIMIT), do nothing. Otherwise, remove
411 all ranges from NDX that are superseded by the new range.
412 NOTE: The range index must be splayed to OFFSET! */
415 insert_range(apr_size_t offset
, apr_size_t limit
, apr_size_t target_offset
,
418 range_index_node_t
*node
= NULL
;
420 if (ndx
->tree
== NULL
)
422 node
= alloc_range_index_node(ndx
, offset
, limit
, target_offset
);
427 if (offset
== ndx
->tree
->offset
428 && limit
> ndx
->tree
->limit
)
430 ndx
->tree
->limit
= limit
;
431 ndx
->tree
->target_offset
= target_offset
;
432 clean_tree(ndx
, limit
);
434 else if (offset
> ndx
->tree
->offset
435 && limit
> ndx
->tree
->limit
)
437 /* We have to make the same sort of checks as clean_tree()
438 does for superseded ranges. Have to merge these someday. */
440 const svn_boolean_t insert_range_p
=
442 || ndx
->tree
->limit
< ndx
->tree
->next
->offset
443 || limit
> ndx
->tree
->next
->limit
);
447 /* Again, we have to check if the new node and the one
448 to the left of the root override root's range. */
449 if (ndx
->tree
->prev
&& ndx
->tree
->prev
->limit
> offset
)
451 /* Replace the data in the splayed node. */
452 ndx
->tree
->offset
= offset
;
453 ndx
->tree
->limit
= limit
;
454 ndx
->tree
->target_offset
= target_offset
;
458 /* Insert the range to the right of the splayed node. */
459 node
= alloc_range_index_node(ndx
, offset
, limit
,
461 if ((node
->next
= ndx
->tree
->next
) != NULL
)
462 node
->next
->prev
= node
;
463 ndx
->tree
->next
= node
;
464 node
->prev
= ndx
->tree
;
466 node
->right
= ndx
->tree
->right
;
467 ndx
->tree
->right
= NULL
;
468 node
->left
= ndx
->tree
;
471 clean_tree(ndx
, limit
);
474 /* Ignore the range */;
476 else if (offset
< ndx
->tree
->offset
)
478 assert(ndx
->tree
->left
== NULL
);
480 /* Insert the range left of the splayed node */
481 node
= alloc_range_index_node(ndx
, offset
, limit
, target_offset
);
482 node
->left
= node
->prev
= NULL
;
483 node
->right
= node
->next
= ndx
->tree
;
484 ndx
->tree
= node
->next
->prev
= node
;
485 clean_tree(ndx
, limit
);
488 /* Ignore the range */;
494 /* ==================================================================== */
495 /* Juggling with lists of ranges. */
497 /* Allocate a node and add it to the range list. LIST is the head of
498 the range list, TAIL is the last node in the list. NDX holds the
499 freelist; OFFSET, LIMIT and KIND are node data. */
500 static range_list_node_t
*
501 alloc_range_list(range_list_node_t
**list
,
502 range_list_node_t
**tail
,
504 enum range_kind kind
,
507 apr_size_t target_offset
)
509 range_list_node_t
*const node
= alloc_block(ndx
->pool
, &ndx
->free_list
);
511 node
->offset
= offset
;
513 node
->target_offset
= target_offset
;
516 node
->prev
= node
->next
= NULL
;
517 *list
= *tail
= node
;
523 (*tail
)->next
= node
;
529 /* Free a range list. LIST is the head of the list, NDX holds the freelist. */
531 free_range_list(range_list_node_t
*list
, range_index_t
*ndx
)
535 range_list_node_t
*const node
= list
;
537 free_block(node
, &ndx
->free_list
);
542 /* Based on the data in NDX, build a list of ranges that cover
543 [OFFSET, LIMIT) in the "virtual" source data.
544 NOTE: The range index must be splayed to OFFSET! */
546 static range_list_node_t
*
547 build_range_list(apr_size_t offset
, apr_size_t limit
, range_index_t
*ndx
)
549 range_list_node_t
*range_list
= NULL
;
550 range_list_node_t
*last_range
= NULL
;
551 range_index_node_t
*node
= ndx
->tree
;
553 while (offset
< limit
)
556 return alloc_range_list(&range_list
, &last_range
, ndx
,
560 if (offset
< node
->offset
)
562 if (limit
<= node
->offset
)
563 return alloc_range_list(&range_list
, &last_range
, ndx
,
568 alloc_range_list(&range_list
, &last_range
, ndx
,
570 offset
, node
->offset
, 0);
571 offset
= node
->offset
;
576 /* TODO: (Potential optimization) Investigate if it would
577 make sense to forbid range_from_target lengths shorter
578 than, say, VD_KEY_SIZE (see vdelta.c) */
580 if (offset
>= node
->limit
)
584 const apr_size_t target_offset
=
585 offset
- node
->offset
+ node
->target_offset
;
587 if (limit
<= node
->limit
)
588 return alloc_range_list(&range_list
, &last_range
, ndx
,
590 offset
, limit
, target_offset
);
593 alloc_range_list(&range_list
, &last_range
, ndx
,
595 offset
, node
->limit
, target_offset
);
596 offset
= node
->limit
;
603 assert(!"A range's offset isn't smaller than its limit? Impossible!");
608 /* Copy the instructions from WINDOW that define the range [OFFSET,
609 LIMIT) in WINDOW's target stream to TARGET_OFFSET in the window
610 represented by BUILD_BATON. Use NDX to find the instructions in
611 WINDOW. Allocate space in BUILD_BATON from POOL. */
614 copy_source_ops(apr_size_t offset
, apr_size_t limit
,
615 apr_size_t target_offset
,
616 svn_txdelta__ops_baton_t
*build_baton
,
617 const svn_txdelta_window_t
*window
,
618 const offset_index_t
*ndx
,
621 const int first_op
= search_offset_index(ndx
, offset
);
622 const int last_op
= search_offset_index(ndx
, limit
- 1);
625 for (op_ndx
= first_op
; op_ndx
<= last_op
; ++op_ndx
)
627 const svn_txdelta_op_t
*const op
= &window
->ops
[op_ndx
];
628 const apr_size_t
*const off
= &ndx
->offs
[op_ndx
];
630 const apr_size_t fix_offset
= (offset
> off
[0] ? offset
- off
[0] : 0);
631 const apr_size_t fix_limit
= (off
[1] > limit
? off
[1] - limit
: 0);
633 /* It would be extremely weird if the fixed-up op had zero length. */
634 assert(fix_offset
+ fix_limit
< op
->length
);
636 if (op
->action_code
!= svn_txdelta_target
)
638 /* Delta ops that don't depend on the virtual target can be
639 copied to the composite unchanged. */
640 const char *const new_data
= (op
->action_code
== svn_txdelta_new
641 ? (window
->new_data
->data
642 + op
->offset
+ fix_offset
)
645 svn_txdelta__insert_op(build_baton
, op
->action_code
,
646 op
->offset
+ fix_offset
,
647 op
->length
- fix_offset
- fix_limit
,
652 /* The source of a target copy must start before the current
653 offset in the (virtual) target stream. */
654 assert(op
->offset
< off
[0]);
656 if (op
->offset
+ op
->length
- fix_limit
<= off
[0])
658 /* The recursion _must_ end, otherwise the delta has
659 circular references, and that is not possible. */
660 copy_source_ops(op
->offset
+ fix_offset
,
661 op
->offset
+ op
->length
- fix_limit
,
663 build_baton
, window
, ndx
, pool
);
667 /* This is an overlapping target copy.
668 The idea here is to transpose the pattern, then generate
669 another overlapping copy. */
670 const apr_size_t ptn_length
= off
[0] - op
->offset
;
671 const apr_size_t ptn_overlap
= fix_offset
% ptn_length
;
672 apr_size_t fix_off
= fix_offset
;
673 apr_size_t tgt_off
= target_offset
;
674 assert(ptn_length
> ptn_overlap
);
676 /* ### FIXME: ptn_overlap is unsigned, so the if() condition
677 below is always true! Either it should be '> 0', or the
678 code block should be unconditional. See also r2288. */
679 if (ptn_overlap
>= 0)
681 /* Issue second subrange in the pattern. */
682 const apr_size_t length
=
683 MIN(op
->length
- fix_off
- fix_limit
,
684 ptn_length
- ptn_overlap
);
685 copy_source_ops(op
->offset
+ ptn_overlap
,
686 op
->offset
+ ptn_overlap
+ length
,
688 build_baton
, window
, ndx
, pool
);
693 assert(fix_off
+ fix_limit
<= op
->length
);
695 && fix_off
+ fix_limit
< op
->length
)
697 /* Issue the first subrange in the pattern. */
698 const apr_size_t length
=
699 MIN(op
->length
- fix_off
- fix_limit
, ptn_overlap
);
700 copy_source_ops(op
->offset
,
703 build_baton
, window
, ndx
, pool
);
708 assert(fix_off
+ fix_limit
<= op
->length
);
709 if (fix_off
+ fix_limit
< op
->length
)
711 /* Now multiply the pattern */
712 svn_txdelta__insert_op(build_baton
, svn_txdelta_target
,
713 tgt_off
- ptn_length
,
714 op
->length
- fix_off
- fix_limit
,
720 /* Adjust the target offset for the next op in the list. */
721 target_offset
+= op
->length
- fix_offset
- fix_limit
;
727 /* ==================================================================== */
728 /* Bringing it all together. */
731 svn_txdelta_window_t
*
732 svn_txdelta_compose_windows(const svn_txdelta_window_t
*window_A
,
733 const svn_txdelta_window_t
*window_B
,
736 svn_txdelta__ops_baton_t build_baton
= { 0 };
737 svn_txdelta_window_t
*composite
;
738 apr_pool_t
*subpool
= svn_pool_create(pool
);
739 offset_index_t
*offset_index
= create_offset_index(window_A
, subpool
);
740 range_index_t
*range_index
= create_range_index(subpool
);
741 apr_size_t target_offset
= 0;
744 /* Read the description of the delta composition algorithm in
745 notes/fs-improvements.txt before going any further.
746 You have been warned. */
747 build_baton
.new_data
= svn_stringbuf_create("", pool
);
748 for (i
= 0; i
< window_B
->num_ops
; ++i
)
750 const svn_txdelta_op_t
*const op
= &window_B
->ops
[i
];
751 if (op
->action_code
!= svn_txdelta_source
)
753 /* Delta ops that don't depend on the source can be copied
754 to the composite unchanged. */
755 const char *const new_data
=
756 (op
->action_code
== svn_txdelta_new
757 ? window_B
->new_data
->data
+ op
->offset
759 svn_txdelta__insert_op(&build_baton
, op
->action_code
,
760 op
->offset
, op
->length
,
765 /* NOTE: Remember that `offset' and `limit' refer to
766 positions in window_B's _source_ stream, which is the
767 same as window_A's _target_ stream! */
768 const apr_size_t offset
= op
->offset
;
769 const apr_size_t limit
= op
->offset
+ op
->length
;
770 range_list_node_t
*range_list
, *range
;
771 apr_size_t tgt_off
= target_offset
;
773 splay_range_index(offset
, range_index
);
774 range_list
= build_range_list(offset
, limit
, range_index
);
776 for (range
= range_list
; range
; range
= range
->next
)
778 if (range
->kind
== range_from_target
)
779 svn_txdelta__insert_op(&build_baton
, svn_txdelta_target
,
780 range
->target_offset
,
781 range
->limit
- range
->offset
,
784 copy_source_ops(range
->offset
, range
->limit
, tgt_off
,
785 &build_baton
, window_A
, offset_index
,
788 tgt_off
+= range
->limit
- range
->offset
;
790 assert(tgt_off
== target_offset
+ op
->length
);
792 free_range_list(range_list
, range_index
);
793 insert_range(offset
, limit
, target_offset
, range_index
);
796 /* Remember the new offset in the would-be target stream. */
797 target_offset
+= op
->length
;
800 svn_pool_destroy(subpool
);
802 composite
= svn_txdelta__make_window(&build_baton
, pool
);
803 composite
->sview_offset
= window_A
->sview_offset
;
804 composite
->sview_len
= window_A
->sview_len
;
805 composite
->tview_len
= window_B
->tview_len
;
809 /* This is a private interlibrary compatibility wrapper. */
810 svn_txdelta_window_t
*
811 svn_txdelta__compose_windows(const svn_txdelta_window_t
*window_A
,
812 const svn_txdelta_window_t
*window_B
,
814 svn_txdelta_window_t
*
815 svn_txdelta__compose_windows(const svn_txdelta_window_t
*window_A
,
816 const svn_txdelta_window_t
*window_B
,
819 return svn_txdelta_compose_windows(window_A
, window_B
, pool
);