2 * compose_delta.c: Delta window composition.
4 * ====================================================================
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
22 #include <apr_general.h> /* For APR_INLINE */
24 #include "svn_delta.h"
25 #include "svn_pools.h"
28 /* Define a MIN macro if this platform doesn't already have one. */
30 #define MIN(a, b) ((a) < (b) ? (a) : (b))
34 /* ==================================================================== */
35 /* Support for efficient small-block allocation from pools. */
37 /* The following structs will be allocated and freed often: */
39 /* A node in the range index tree. */
40 typedef struct range_index_node_t range_index_node_t
;
41 struct range_index_node_t
43 /* 'offset' and 'limit' define the range in the source window. */
47 /* 'target_offset' is where that range is represented in the target. */
48 apr_size_t target_offset
;
50 /* 'left' and 'right' link the node into a splay tree. */
51 range_index_node_t
*left
, *right
;
53 /* 'prev' and 'next' link it into an ordered, doubly-linked list. */
54 range_index_node_t
*prev
, *next
;
57 /* A node in a list of ranges for source and target op copies. */
64 typedef struct range_list_node_t range_list_node_t
;
65 struct range_list_node_t
67 /* Where does the range come from?
68 'offset' and 'limit' always refer to the "virtual" source data
69 for the second delta window. For a target range, the actual
70 offset to use for generating the target op is 'target_offset';
71 that field isn't used by source ranges. */
74 /* 'offset' and 'limit' define the range. */
78 /* 'target_offset' is the start of the range in the target. */
79 apr_size_t target_offset
;
81 /* 'prev' and 'next' link the node into an ordered, doubly-linked list. */
82 range_list_node_t
*prev
, *next
;
86 /* This is what will be allocated: */
87 typedef union alloc_block_t alloc_block_t
;
90 range_index_node_t index_node
;
91 range_list_node_t list_node
;
93 /* Links free blocks into a freelist. */
94 alloc_block_t
*next_free
;
98 /* Allocate a block. */
99 static APR_INLINE
void *
100 alloc_block(apr_pool_t
*pool
, alloc_block_t
**free_list
)
102 alloc_block_t
*block
;
103 if (*free_list
== NULL
)
104 block
= apr_palloc(pool
, sizeof(*block
));
108 *free_list
= block
->next_free
;
113 /* Return the block back to the free list. */
114 static APR_INLINE
void
115 free_block(void *ptr
, alloc_block_t
**free_list
)
117 /* Wrapper functions take care of type safety. */
118 alloc_block_t
*const block
= ptr
;
119 block
->next_free
= *free_list
;
125 /* ==================================================================== */
126 /* Mapping offsets in the target streem to txdelta ops. */
128 typedef struct offset_index_t
134 /* Create an index mapping target stream offsets to delta ops in
135 WINDOW. Allocate from POOL. */
137 static offset_index_t
*
138 create_offset_index(const svn_txdelta_window_t
*window
, apr_pool_t
*pool
)
140 offset_index_t
*ndx
= apr_palloc(pool
, sizeof(*ndx
));
141 apr_size_t offset
= 0;
144 ndx
->length
= window
->num_ops
;
145 ndx
->offs
= apr_palloc(pool
, (ndx
->length
+ 1) * sizeof(*ndx
->offs
));
147 for (i
= 0; i
< ndx
->length
; ++i
)
149 ndx
->offs
[i
] = offset
;
150 offset
+= window
->ops
[i
].length
;
152 ndx
->offs
[ndx
->length
] = offset
;
157 /* Find the index of the delta op thet defines that data at OFFSET in
161 search_offset_index(const offset_index_t
*ndx
, apr_size_t offset
)
165 assert(offset
< ndx
->offs
[ndx
->length
]);
167 for (lo
= 0, hi
= ndx
->length
, op
= (lo
+ hi
)/2;
171 const apr_size_t this_offset
= ndx
->offs
[op
];
172 const apr_size_t next_offset
= ndx
->offs
[op
+ 1];
173 if (offset
< this_offset
)
175 else if (offset
> next_offset
)
179 /* this_offset <= offset <= next_offset */
180 if (offset
== next_offset
)
186 assert(ndx
->offs
[op
] <= offset
&& offset
< ndx
->offs
[op
+ 1]);
192 /* ==================================================================== */
193 /* Mapping ranges in the source stream to ranges in the composed delta. */
195 /* The range index tree. */
196 typedef struct range_index_t
198 range_index_node_t
*tree
;
199 alloc_block_t
*free_list
;
203 /* Create a range index tree. Allocate from POOL. */
204 static range_index_t
*
205 create_range_index(apr_pool_t
*pool
)
207 range_index_t
*ndx
= apr_palloc(pool
, sizeof(*ndx
));
210 ndx
->free_list
= NULL
;
214 /* Allocate a node for the range index tree. */
215 static range_index_node_t
*
216 alloc_range_index_node(range_index_t
*ndx
,
219 apr_size_t target_offset
)
221 range_index_node_t
*const node
= alloc_block(ndx
->pool
, &ndx
->free_list
);
222 node
->offset
= offset
;
224 node
->target_offset
= target_offset
;
225 node
->left
= node
->right
= NULL
;
226 node
->prev
= node
->next
= NULL
;
230 /* Free a node from the range index tree. */
232 free_range_index_node(range_index_t
*ndx
, range_index_node_t
*node
)
235 node
->next
->prev
= node
->prev
;
237 node
->prev
->next
= node
->next
;
238 free_block(node
, &ndx
->free_list
);
242 /* Splay the index tree, using OFFSET as the key. */
245 splay_range_index(apr_size_t offset
, range_index_t
*ndx
)
247 range_index_node_t
*tree
= ndx
->tree
;
248 range_index_node_t scratch_node
;
249 range_index_node_t
*left
, *right
;
254 scratch_node
.left
= scratch_node
.right
= NULL
;
255 left
= right
= &scratch_node
;
259 if (offset
< tree
->offset
)
261 if (tree
->left
!= NULL
262 && offset
< tree
->left
->offset
)
265 range_index_node_t
*const node
= tree
->left
;
266 tree
->left
= node
->right
;
270 if (tree
->left
== NULL
)
273 /* Remember the right subtree */
278 else if (offset
> tree
->offset
)
280 if (tree
->right
!= NULL
281 && offset
> tree
->right
->offset
)
284 range_index_node_t
*const node
= tree
->right
;
285 tree
->right
= node
->left
;
289 if (tree
->right
== NULL
)
292 /* Remember the left subtree */
301 /* Link in the left and right subtrees */
302 left
->right
= tree
->left
;
303 right
->left
= tree
->right
;
304 tree
->left
= scratch_node
.right
;
305 tree
->right
= scratch_node
.left
;
307 /* The basic top-down splay is finished, but we may still need to
308 turn the tree around. What we want is to put the node with the
309 largest offset where node->offset <= offset at the top of the
310 tree, so that we can insert the new data (or search for existing
311 ranges) to the right of the root. This makes cleaning up the
312 tree after an insert much simpler, and -- incidentally -- makes
313 the whole range index magic work. */
314 if (offset
< tree
->offset
&& tree
->left
!= NULL
)
316 if (tree
->left
->right
== NULL
)
318 /* A single right rotation is enough. */
319 range_index_node_t
*const node
= tree
->left
;
320 tree
->left
= node
->right
; /* Which is always NULL. */
326 /* Slide down to the rightmost node in the left subtree. */
327 range_index_node_t
**nodep
= &tree
->left
;
328 while ((*nodep
)->right
!= NULL
)
329 nodep
= &(*nodep
)->right
;
331 /* Now move this node to root in one giant promotion. */
336 right
->left
= tree
->right
; /* Which is always NULL, too. */
342 /* Sanity check ... */
343 assert((offset
>= tree
->offset
)
344 || ((tree
->left
== NULL
)
345 && (tree
->prev
== NULL
)));
350 /* Remove all ranges from NDX that fall into the root's range. To
351 keep the range index as small as possible, we must also remove
352 nodes that don't fall into the new range, but have become redundant
353 because the new range overlaps the beginning of the next range.
356 new-range: |-----------------|
357 range-1: |-----------------|
358 range-2: |--------------------|
360 Before new-range was inserted, range-1 and range-2 were both
361 necessary. Now the union of new-range and range-2 completely covers
362 range-1, which has become redundant now.
364 FIXME: But, of course, there's a catch. range-1 must still remain
365 in the tree if we want to optimize the number of target copy ops in
366 the case were a copy falls within range-1, but starts before
367 range-2 and ends after new-range. */
370 delete_subtree(range_index_t
*ndx
, range_index_node_t
*node
)
374 delete_subtree(ndx
, node
->left
);
375 delete_subtree(ndx
, node
->right
);
376 free_range_index_node(ndx
, node
);
381 clean_tree(range_index_t
*ndx
, apr_size_t limit
)
383 apr_size_t top_offset
= limit
+ 1;
384 range_index_node_t
**nodep
= &ndx
->tree
->right
;
385 while (*nodep
!= NULL
)
387 range_index_node_t
*const node
= *nodep
;
388 apr_size_t
const offset
=
389 (node
->right
!= NULL
&& node
->right
->offset
< top_offset
390 ? node
->right
->offset
393 if (node
->limit
<= limit
394 || (node
->offset
< limit
&& offset
< limit
))
396 *nodep
= node
->right
;
398 delete_subtree(ndx
, node
);
402 top_offset
= node
->offset
;
409 /* Add a range [OFFSET, LIMIT) into NDX. If NDX already contains a
410 range that encloses [OFFSET, LIMIT), do nothing. Otherwise, remove
411 all ranges from NDX that are superseded by the new range.
412 NOTE: The range index must be splayed to OFFSET! */
415 insert_range(apr_size_t offset
, apr_size_t limit
, apr_size_t target_offset
,
418 range_index_node_t
*node
= NULL
;
420 if (ndx
->tree
== NULL
)
422 node
= alloc_range_index_node(ndx
, offset
, limit
, target_offset
);
427 if (offset
== ndx
->tree
->offset
428 && limit
> ndx
->tree
->limit
)
430 ndx
->tree
->limit
= limit
;
431 ndx
->tree
->target_offset
= target_offset
;
432 clean_tree(ndx
, limit
);
434 else if (offset
> ndx
->tree
->offset
435 && limit
> ndx
->tree
->limit
)
437 /* We have to make the same sort of checks as clean_tree()
438 does for superseded ranges. Have to merge these someday. */
440 const svn_boolean_t insert_range_p
=
442 || ndx
->tree
->limit
< ndx
->tree
->next
->offset
443 || limit
> ndx
->tree
->next
->limit
);
447 /* Again, we have to check if the new node and the one
448 to the left of the root override root's range. */
449 if (ndx
->tree
->prev
&& ndx
->tree
->prev
->limit
> offset
)
451 /* Replace the data in the splayed node. */
452 ndx
->tree
->offset
= offset
;
453 ndx
->tree
->limit
= limit
;
454 ndx
->tree
->target_offset
= target_offset
;
458 /* Insert the range to the right of the splayed node. */
459 node
= alloc_range_index_node(ndx
, offset
, limit
,
461 if ((node
->next
= ndx
->tree
->next
) != NULL
)
462 node
->next
->prev
= node
;
463 ndx
->tree
->next
= node
;
464 node
->prev
= ndx
->tree
;
466 node
->right
= ndx
->tree
->right
;
467 ndx
->tree
->right
= NULL
;
468 node
->left
= ndx
->tree
;
471 clean_tree(ndx
, limit
);
474 /* Ignore the range */;
476 else if (offset
< ndx
->tree
->offset
)
478 assert(ndx
->tree
->left
== NULL
);
480 /* Insert the range left of the splayed node */
481 node
= alloc_range_index_node(ndx
, offset
, limit
, target_offset
);
482 node
->left
= node
->prev
= NULL
;
483 node
->right
= node
->next
= ndx
->tree
;
484 ndx
->tree
= node
->next
->prev
= node
;
485 clean_tree(ndx
, limit
);
488 /* Ignore the range */;
494 /* ==================================================================== */
495 /* Juggling with lists of ranges. */
497 /* Allocate a node and add it to the range list. LIST is the head of
498 the range list, TAIL is the last node in the list. NDX holds the
499 freelist; OFFSET, LIMIT and KIND are node data. */
500 static range_list_node_t
*
501 alloc_range_list(range_list_node_t
**list
,
502 range_list_node_t
**tail
,
504 enum range_kind kind
,
507 apr_size_t target_offset
)
509 range_list_node_t
*const node
= alloc_block(ndx
->pool
, &ndx
->free_list
);
511 node
->offset
= offset
;
513 node
->target_offset
= target_offset
;
516 node
->prev
= node
->next
= NULL
;
517 *list
= *tail
= node
;
523 (*tail
)->next
= node
;
529 /* Free a range list. LIST is the head of the list, NDX holds the freelist. */
531 free_range_list(range_list_node_t
*list
, range_index_t
*ndx
)
535 range_list_node_t
*const node
= list
;
537 free_block(node
, &ndx
->free_list
);
542 /* Based on the data in NDX, build a list of ranges that cover
543 [OFFSET, LIMIT) in the "virtual" source data.
544 NOTE: The range index must be splayed to OFFSET! */
546 static range_list_node_t
*
547 build_range_list(apr_size_t offset
, apr_size_t limit
, range_index_t
*ndx
)
549 range_list_node_t
*range_list
= NULL
;
550 range_list_node_t
*last_range
= NULL
;
551 range_index_node_t
*node
= ndx
->tree
;
553 while (offset
< limit
)
556 return alloc_range_list(&range_list
, &last_range
, ndx
,
560 if (offset
< node
->offset
)
562 if (limit
<= node
->offset
)
563 return alloc_range_list(&range_list
, &last_range
, ndx
,
568 alloc_range_list(&range_list
, &last_range
, ndx
,
570 offset
, node
->offset
, 0);
571 offset
= node
->offset
;
576 /* TODO: (Potential optimization) Investigate if it would
577 make sense to forbid short range_from_target lengths
578 (this comment originally said "shorter than, say,
579 VD_KEY_SIZE (see vdelta.c)", but Subversion no longer
582 if (offset
>= node
->limit
)
586 const apr_size_t target_offset
=
587 offset
- node
->offset
+ node
->target_offset
;
589 if (limit
<= node
->limit
)
590 return alloc_range_list(&range_list
, &last_range
, ndx
,
592 offset
, limit
, target_offset
);
595 alloc_range_list(&range_list
, &last_range
, ndx
,
597 offset
, node
->limit
, target_offset
);
598 offset
= node
->limit
;
605 assert(!"A range's offset isn't smaller than its limit? Impossible!");
610 /* Copy the instructions from WINDOW that define the range [OFFSET,
611 LIMIT) in WINDOW's target stream to TARGET_OFFSET in the window
612 represented by BUILD_BATON. Use NDX to find the instructions in
613 WINDOW. Allocate space in BUILD_BATON from POOL. */
616 copy_source_ops(apr_size_t offset
, apr_size_t limit
,
617 apr_size_t target_offset
,
618 svn_txdelta__ops_baton_t
*build_baton
,
619 const svn_txdelta_window_t
*window
,
620 const offset_index_t
*ndx
,
623 const int first_op
= search_offset_index(ndx
, offset
);
624 const int last_op
= search_offset_index(ndx
, limit
- 1);
627 for (op_ndx
= first_op
; op_ndx
<= last_op
; ++op_ndx
)
629 const svn_txdelta_op_t
*const op
= &window
->ops
[op_ndx
];
630 const apr_size_t
*const off
= &ndx
->offs
[op_ndx
];
632 const apr_size_t fix_offset
= (offset
> off
[0] ? offset
- off
[0] : 0);
633 const apr_size_t fix_limit
= (off
[1] > limit
? off
[1] - limit
: 0);
635 /* It would be extremely weird if the fixed-up op had zero length. */
636 assert(fix_offset
+ fix_limit
< op
->length
);
638 if (op
->action_code
!= svn_txdelta_target
)
640 /* Delta ops that don't depend on the virtual target can be
641 copied to the composite unchanged. */
642 const char *const new_data
= (op
->action_code
== svn_txdelta_new
643 ? (window
->new_data
->data
644 + op
->offset
+ fix_offset
)
647 svn_txdelta__insert_op(build_baton
, op
->action_code
,
648 op
->offset
+ fix_offset
,
649 op
->length
- fix_offset
- fix_limit
,
654 /* The source of a target copy must start before the current
655 offset in the (virtual) target stream. */
656 assert(op
->offset
< off
[0]);
658 if (op
->offset
+ op
->length
- fix_limit
<= off
[0])
660 /* The recursion _must_ end, otherwise the delta has
661 circular references, and that is not possible. */
662 copy_source_ops(op
->offset
+ fix_offset
,
663 op
->offset
+ op
->length
- fix_limit
,
665 build_baton
, window
, ndx
, pool
);
669 /* This is an overlapping target copy.
670 The idea here is to transpose the pattern, then generate
671 another overlapping copy. */
672 const apr_size_t ptn_length
= off
[0] - op
->offset
;
673 const apr_size_t ptn_overlap
= fix_offset
% ptn_length
;
674 apr_size_t fix_off
= fix_offset
;
675 apr_size_t tgt_off
= target_offset
;
676 assert(ptn_length
> ptn_overlap
);
678 /* ### FIXME: ptn_overlap is unsigned, so the if() condition
679 below is always true! Either it should be '> 0', or the
680 code block should be unconditional. See also r2288. */
681 if (ptn_overlap
>= 0)
683 /* Issue second subrange in the pattern. */
684 const apr_size_t length
=
685 MIN(op
->length
- fix_off
- fix_limit
,
686 ptn_length
- ptn_overlap
);
687 copy_source_ops(op
->offset
+ ptn_overlap
,
688 op
->offset
+ ptn_overlap
+ length
,
690 build_baton
, window
, ndx
, pool
);
695 assert(fix_off
+ fix_limit
<= op
->length
);
697 && fix_off
+ fix_limit
< op
->length
)
699 /* Issue the first subrange in the pattern. */
700 const apr_size_t length
=
701 MIN(op
->length
- fix_off
- fix_limit
, ptn_overlap
);
702 copy_source_ops(op
->offset
,
705 build_baton
, window
, ndx
, pool
);
710 assert(fix_off
+ fix_limit
<= op
->length
);
711 if (fix_off
+ fix_limit
< op
->length
)
713 /* Now multiply the pattern */
714 svn_txdelta__insert_op(build_baton
, svn_txdelta_target
,
715 tgt_off
- ptn_length
,
716 op
->length
- fix_off
- fix_limit
,
722 /* Adjust the target offset for the next op in the list. */
723 target_offset
+= op
->length
- fix_offset
- fix_limit
;
729 /* ==================================================================== */
730 /* Bringing it all together. */
733 svn_txdelta_window_t
*
734 svn_txdelta_compose_windows(const svn_txdelta_window_t
*window_A
,
735 const svn_txdelta_window_t
*window_B
,
738 svn_txdelta__ops_baton_t build_baton
= { 0 };
739 svn_txdelta_window_t
*composite
;
740 apr_pool_t
*subpool
= svn_pool_create(pool
);
741 offset_index_t
*offset_index
= create_offset_index(window_A
, subpool
);
742 range_index_t
*range_index
= create_range_index(subpool
);
743 apr_size_t target_offset
= 0;
746 /* Read the description of the delta composition algorithm in
747 notes/fs-improvements.txt before going any further.
748 You have been warned. */
749 build_baton
.new_data
= svn_stringbuf_create("", pool
);
750 for (i
= 0; i
< window_B
->num_ops
; ++i
)
752 const svn_txdelta_op_t
*const op
= &window_B
->ops
[i
];
753 if (op
->action_code
!= svn_txdelta_source
)
755 /* Delta ops that don't depend on the source can be copied
756 to the composite unchanged. */
757 const char *const new_data
=
758 (op
->action_code
== svn_txdelta_new
759 ? window_B
->new_data
->data
+ op
->offset
761 svn_txdelta__insert_op(&build_baton
, op
->action_code
,
762 op
->offset
, op
->length
,
767 /* NOTE: Remember that `offset' and `limit' refer to
768 positions in window_B's _source_ stream, which is the
769 same as window_A's _target_ stream! */
770 const apr_size_t offset
= op
->offset
;
771 const apr_size_t limit
= op
->offset
+ op
->length
;
772 range_list_node_t
*range_list
, *range
;
773 apr_size_t tgt_off
= target_offset
;
775 splay_range_index(offset
, range_index
);
776 range_list
= build_range_list(offset
, limit
, range_index
);
778 for (range
= range_list
; range
; range
= range
->next
)
780 if (range
->kind
== range_from_target
)
781 svn_txdelta__insert_op(&build_baton
, svn_txdelta_target
,
782 range
->target_offset
,
783 range
->limit
- range
->offset
,
786 copy_source_ops(range
->offset
, range
->limit
, tgt_off
,
787 &build_baton
, window_A
, offset_index
,
790 tgt_off
+= range
->limit
- range
->offset
;
792 assert(tgt_off
== target_offset
+ op
->length
);
794 free_range_list(range_list
, range_index
);
795 insert_range(offset
, limit
, target_offset
, range_index
);
798 /* Remember the new offset in the would-be target stream. */
799 target_offset
+= op
->length
;
802 svn_pool_destroy(subpool
);
804 composite
= svn_txdelta__make_window(&build_baton
, pool
);
805 composite
->sview_offset
= window_A
->sview_offset
;
806 composite
->sview_len
= window_A
->sview_len
;
807 composite
->tview_len
= window_B
->tview_len
;
811 /* This is a private interlibrary compatibility wrapper. */
812 svn_txdelta_window_t
*
813 svn_txdelta__compose_windows(const svn_txdelta_window_t
*window_A
,
814 const svn_txdelta_window_t
*window_B
,
816 svn_txdelta_window_t
*
817 svn_txdelta__compose_windows(const svn_txdelta_window_t
*window_A
,
818 const svn_txdelta_window_t
*window_B
,
821 return svn_txdelta_compose_windows(window_A
, window_B
, pool
);