Mark many merge tests as skip-against-old-server.
[svn.git] / subversion / libsvn_delta / compose_delta.c
blobc65c77aa58204e64f0185c42c5dbb104ca826a15
1 /*
2 * compose_delta.c: Delta window composition.
4 * ====================================================================
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
20 #include <assert.h>
22 #include <apr_general.h> /* For APR_INLINE */
24 #include "svn_delta.h"
25 #include "svn_pools.h"
26 #include "delta.h"
28 /* Define a MIN macro if this platform doesn't already have one. */
29 #ifndef MIN
30 #define MIN(a, b) ((a) < (b) ? (a) : (b))
31 #endif
34 /* ==================================================================== */
35 /* Support for efficient small-block allocation from pools. */
37 /* The following structs will be allocated and freed often: */
39 /* A node in the range index tree. */
40 typedef struct range_index_node_t range_index_node_t;
41 struct range_index_node_t
43 /* 'offset' and 'limit' define the range in the source window. */
44 apr_size_t offset;
45 apr_size_t limit;
47 /* 'target_offset' is where that range is represented in the target. */
48 apr_size_t target_offset;
50 /* 'left' and 'right' link the node into a splay tree. */
51 range_index_node_t *left, *right;
53 /* 'prev' and 'next' link it into an ordered, doubly-linked list. */
54 range_index_node_t *prev, *next;
57 /* A node in a list of ranges for source and target op copies. */
58 enum range_kind
60 range_from_source,
61 range_from_target
64 typedef struct range_list_node_t range_list_node_t;
65 struct range_list_node_t
67 /* Where does the range come from?
68 'offset' and 'limit' always refer to the "virtual" source data
69 for the second delta window. For a target range, the actual
70 offset to use for generating the target op is 'target_offset';
71 that field isn't used by source ranges. */
72 enum range_kind kind;
74 /* 'offset' and 'limit' define the range. */
75 apr_size_t offset;
76 apr_size_t limit;
78 /* 'target_offset' is the start of the range in the target. */
79 apr_size_t target_offset;
81 /* 'prev' and 'next' link the node into an ordered, doubly-linked list. */
82 range_list_node_t *prev, *next;
86 /* This is what will be allocated: */
87 typedef union alloc_block_t alloc_block_t;
88 union alloc_block_t
90 range_index_node_t index_node;
91 range_list_node_t list_node;
93 /* Links free blocks into a freelist. */
94 alloc_block_t *next_free;
98 /* Allocate a block. */
99 static APR_INLINE void *
100 alloc_block(apr_pool_t *pool, alloc_block_t **free_list)
102 alloc_block_t *block;
103 if (*free_list == NULL)
104 block = apr_palloc(pool, sizeof(*block));
105 else
107 block = *free_list;
108 *free_list = block->next_free;
110 return block;
113 /* Return the block back to the free list. */
114 static APR_INLINE void
115 free_block(void *ptr, alloc_block_t **free_list)
117 /* Wrapper functions take care of type safety. */
118 alloc_block_t *const block = ptr;
119 block->next_free = *free_list;
120 *free_list = block;
125 /* ==================================================================== */
126 /* Mapping offsets in the target streem to txdelta ops. */
128 typedef struct offset_index_t
130 int length;
131 apr_size_t *offs;
132 } offset_index_t;
134 /* Create an index mapping target stream offsets to delta ops in
135 WINDOW. Allocate from POOL. */
137 static offset_index_t *
138 create_offset_index(const svn_txdelta_window_t *window, apr_pool_t *pool)
140 offset_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
141 apr_size_t offset = 0;
142 int i;
144 ndx->length = window->num_ops;
145 ndx->offs = apr_palloc(pool, (ndx->length + 1) * sizeof(*ndx->offs));
147 for (i = 0; i < ndx->length; ++i)
149 ndx->offs[i] = offset;
150 offset += window->ops[i].length;
152 ndx->offs[ndx->length] = offset;
154 return ndx;
157 /* Find the index of the delta op thet defines that data at OFFSET in
158 NDX. */
160 static int
161 search_offset_index(const offset_index_t *ndx, apr_size_t offset)
163 int lo, hi, op;
165 assert(offset < ndx->offs[ndx->length]);
167 for (lo = 0, hi = ndx->length, op = (lo + hi)/2;
168 lo < hi;
169 op = (lo + hi)/2)
171 const apr_size_t this_offset = ndx->offs[op];
172 const apr_size_t next_offset = ndx->offs[op + 1];
173 if (offset < this_offset)
174 hi = op;
175 else if (offset > next_offset)
176 lo = op;
177 else
179 /* this_offset <= offset <= next_offset */
180 if (offset == next_offset)
181 ++op;
182 break;
186 assert(ndx->offs[op] <= offset && offset < ndx->offs[op + 1]);
187 return op;
192 /* ==================================================================== */
193 /* Mapping ranges in the source stream to ranges in the composed delta. */
195 /* The range index tree. */
196 typedef struct range_index_t
198 range_index_node_t *tree;
199 alloc_block_t *free_list;
200 apr_pool_t *pool;
201 } range_index_t;
203 /* Create a range index tree. Allocate from POOL. */
204 static range_index_t *
205 create_range_index(apr_pool_t *pool)
207 range_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
208 ndx->tree = NULL;
209 ndx->pool = pool;
210 ndx->free_list = NULL;
211 return ndx;
214 /* Allocate a node for the range index tree. */
215 static range_index_node_t *
216 alloc_range_index_node(range_index_t *ndx,
217 apr_size_t offset,
218 apr_size_t limit,
219 apr_size_t target_offset)
221 range_index_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
222 node->offset = offset;
223 node->limit = limit;
224 node->target_offset = target_offset;
225 node->left = node->right = NULL;
226 node->prev = node->next = NULL;
227 return node;
230 /* Free a node from the range index tree. */
231 static void
232 free_range_index_node(range_index_t *ndx, range_index_node_t *node)
234 if (node->next)
235 node->next->prev = node->prev;
236 if (node->prev)
237 node->prev->next = node->next;
238 free_block(node, &ndx->free_list);
242 /* Splay the index tree, using OFFSET as the key. */
244 static void
245 splay_range_index(apr_size_t offset, range_index_t *ndx)
247 range_index_node_t *tree = ndx->tree;
248 range_index_node_t scratch_node;
249 range_index_node_t *left, *right;
251 if (tree == NULL)
252 return;
254 scratch_node.left = scratch_node.right = NULL;
255 left = right = &scratch_node;
257 for (;;)
259 if (offset < tree->offset)
261 if (tree->left != NULL
262 && offset < tree->left->offset)
264 /* Right rotation */
265 range_index_node_t *const node = tree->left;
266 tree->left = node->right;
267 node->right = tree;
268 tree = node;
270 if (tree->left == NULL)
271 break;
273 /* Remember the right subtree */
274 right->left = tree;
275 right = tree;
276 tree = tree->left;
278 else if (offset > tree->offset)
280 if (tree->right != NULL
281 && offset > tree->right->offset)
283 /* Left rotation */
284 range_index_node_t *const node = tree->right;
285 tree->right = node->left;
286 node->left = tree;
287 tree = node;
289 if (tree->right == NULL)
290 break;
292 /* Remember the left subtree */
293 left->right = tree;
294 left = tree;
295 tree = tree->right;
297 else
298 break;
301 /* Link in the left and right subtrees */
302 left->right = tree->left;
303 right->left = tree->right;
304 tree->left = scratch_node.right;
305 tree->right = scratch_node.left;
307 /* The basic top-down splay is finished, but we may still need to
308 turn the tree around. What we want is to put the node with the
309 largest offset where node->offset <= offset at the top of the
310 tree, so that we can insert the new data (or search for existing
311 ranges) to the right of the root. This makes cleaning up the
312 tree after an insert much simpler, and -- incidentally -- makes
313 the whole range index magic work. */
314 if (offset < tree->offset && tree->left != NULL)
316 if (tree->left->right == NULL)
318 /* A single right rotation is enough. */
319 range_index_node_t *const node = tree->left;
320 tree->left = node->right; /* Which is always NULL. */
321 node->right = tree;
322 tree = node;
324 else
326 /* Slide down to the rightmost node in the left subtree. */
327 range_index_node_t **nodep = &tree->left;
328 while ((*nodep)->right != NULL)
329 nodep = &(*nodep)->right;
331 /* Now move this node to root in one giant promotion. */
332 right = tree;
333 left = tree->left;
334 tree = *nodep;
335 *nodep = tree->left;
336 right->left = tree->right; /* Which is always NULL, too. */
337 tree->left = left;
338 tree->right = right;
342 /* Sanity check ... */
343 assert((offset >= tree->offset)
344 || ((tree->left == NULL)
345 && (tree->prev == NULL)));
346 ndx->tree = tree;
350 /* Remove all ranges from NDX that fall into the root's range. To
351 keep the range index as small as possible, we must also remove
352 nodes that don't fall into the new range, but have become redundant
353 because the new range overlaps the beginning of the next range.
354 Like this:
356 new-range: |-----------------|
357 range-1: |-----------------|
358 range-2: |--------------------|
360 Before new-range was inserted, range-1 and range-2 were both
361 necessary. Now the union of new-range and range-2 completely covers
362 range-1, which has become redundant now.
364 FIXME: But, of course, there's a catch. range-1 must still remain
365 in the tree if we want to optimize the number of target copy ops in
366 the case were a copy falls within range-1, but starts before
367 range-2 and ends after new-range. */
369 static void
370 delete_subtree(range_index_t *ndx, range_index_node_t *node)
372 if (node != NULL)
374 delete_subtree(ndx, node->left);
375 delete_subtree(ndx, node->right);
376 free_range_index_node(ndx, node);
380 static void
381 clean_tree(range_index_t *ndx, apr_size_t limit)
383 apr_size_t top_offset = limit + 1;
384 range_index_node_t **nodep = &ndx->tree->right;
385 while (*nodep != NULL)
387 range_index_node_t *const node = *nodep;
388 apr_size_t const offset =
389 (node->right != NULL && node->right->offset < top_offset
390 ? node->right->offset
391 : top_offset);
393 if (node->limit <= limit
394 || (node->offset < limit && offset < limit))
396 *nodep = node->right;
397 node->right = NULL;
398 delete_subtree(ndx, node);
400 else
402 top_offset = node->offset;
403 nodep = &node->left;
409 /* Add a range [OFFSET, LIMIT) into NDX. If NDX already contains a
410 range that encloses [OFFSET, LIMIT), do nothing. Otherwise, remove
411 all ranges from NDX that are superseded by the new range.
412 NOTE: The range index must be splayed to OFFSET! */
414 static void
415 insert_range(apr_size_t offset, apr_size_t limit, apr_size_t target_offset,
416 range_index_t *ndx)
418 range_index_node_t *node = NULL;
420 if (ndx->tree == NULL)
422 node = alloc_range_index_node(ndx, offset, limit, target_offset);
423 ndx->tree = node;
425 else
427 if (offset == ndx->tree->offset
428 && limit > ndx->tree->limit)
430 ndx->tree->limit = limit;
431 ndx->tree->target_offset = target_offset;
432 clean_tree(ndx, limit);
434 else if (offset > ndx->tree->offset
435 && limit > ndx->tree->limit)
437 /* We have to make the same sort of checks as clean_tree()
438 does for superseded ranges. Have to merge these someday. */
440 const svn_boolean_t insert_range_p =
441 (!ndx->tree->next
442 || ndx->tree->limit < ndx->tree->next->offset
443 || limit > ndx->tree->next->limit);
445 if (insert_range_p)
447 /* Again, we have to check if the new node and the one
448 to the left of the root override root's range. */
449 if (ndx->tree->prev && ndx->tree->prev->limit > offset)
451 /* Replace the data in the splayed node. */
452 ndx->tree->offset = offset;
453 ndx->tree->limit = limit;
454 ndx->tree->target_offset = target_offset;
456 else
458 /* Insert the range to the right of the splayed node. */
459 node = alloc_range_index_node(ndx, offset, limit,
460 target_offset);
461 if ((node->next = ndx->tree->next) != NULL)
462 node->next->prev = node;
463 ndx->tree->next = node;
464 node->prev = ndx->tree;
466 node->right = ndx->tree->right;
467 ndx->tree->right = NULL;
468 node->left = ndx->tree;
469 ndx->tree = node;
471 clean_tree(ndx, limit);
473 else
474 /* Ignore the range */;
476 else if (offset < ndx->tree->offset)
478 assert(ndx->tree->left == NULL);
480 /* Insert the range left of the splayed node */
481 node = alloc_range_index_node(ndx, offset, limit, target_offset);
482 node->left = node->prev = NULL;
483 node->right = node->next = ndx->tree;
484 ndx->tree = node->next->prev = node;
485 clean_tree(ndx, limit);
487 else
488 /* Ignore the range */;
494 /* ==================================================================== */
495 /* Juggling with lists of ranges. */
497 /* Allocate a node and add it to the range list. LIST is the head of
498 the range list, TAIL is the last node in the list. NDX holds the
499 freelist; OFFSET, LIMIT and KIND are node data. */
500 static range_list_node_t *
501 alloc_range_list(range_list_node_t **list,
502 range_list_node_t **tail,
503 range_index_t *ndx,
504 enum range_kind kind,
505 apr_size_t offset,
506 apr_size_t limit,
507 apr_size_t target_offset)
509 range_list_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
510 node->kind = kind;
511 node->offset = offset;
512 node->limit = limit;
513 node->target_offset = target_offset;
514 if (*list == NULL)
516 node->prev = node->next = NULL;
517 *list = *tail = node;
519 else
521 node->prev = *tail;
522 node->next = NULL;
523 (*tail)->next = node;
524 *tail = node;
526 return *list;
529 /* Free a range list. LIST is the head of the list, NDX holds the freelist. */
530 static void
531 free_range_list(range_list_node_t *list, range_index_t *ndx)
533 while (list)
535 range_list_node_t *const node = list;
536 list = node->next;
537 free_block(node, &ndx->free_list);
542 /* Based on the data in NDX, build a list of ranges that cover
543 [OFFSET, LIMIT) in the "virtual" source data.
544 NOTE: The range index must be splayed to OFFSET! */
546 static range_list_node_t *
547 build_range_list(apr_size_t offset, apr_size_t limit, range_index_t *ndx)
549 range_list_node_t *range_list = NULL;
550 range_list_node_t *last_range = NULL;
551 range_index_node_t *node = ndx->tree;
553 while (offset < limit)
555 if (node == NULL)
556 return alloc_range_list(&range_list, &last_range, ndx,
557 range_from_source,
558 offset, limit, 0);
560 if (offset < node->offset)
562 if (limit <= node->offset)
563 return alloc_range_list(&range_list, &last_range, ndx,
564 range_from_source,
565 offset, limit, 0);
566 else
568 alloc_range_list(&range_list, &last_range, ndx,
569 range_from_source,
570 offset, node->offset, 0);
571 offset = node->offset;
574 else
576 /* TODO: (Potential optimization) Investigate if it would
577 make sense to forbid range_from_target lengths shorter
578 than, say, VD_KEY_SIZE (see vdelta.c) */
580 if (offset >= node->limit)
581 node = node->next;
582 else
584 const apr_size_t target_offset =
585 offset - node->offset + node->target_offset;
587 if (limit <= node->limit)
588 return alloc_range_list(&range_list, &last_range, ndx,
589 range_from_target,
590 offset, limit, target_offset);
591 else
593 alloc_range_list(&range_list, &last_range, ndx,
594 range_from_target,
595 offset, node->limit, target_offset);
596 offset = node->limit;
597 node = node->next;
603 assert(!"A range's offset isn't smaller than its limit? Impossible!");
604 return range_list;
608 /* Copy the instructions from WINDOW that define the range [OFFSET,
609 LIMIT) in WINDOW's target stream to TARGET_OFFSET in the window
610 represented by BUILD_BATON. Use NDX to find the instructions in
611 WINDOW. Allocate space in BUILD_BATON from POOL. */
613 static void
614 copy_source_ops(apr_size_t offset, apr_size_t limit,
615 apr_size_t target_offset,
616 svn_txdelta__ops_baton_t *build_baton,
617 const svn_txdelta_window_t *window,
618 const offset_index_t *ndx,
619 apr_pool_t *pool)
621 const int first_op = search_offset_index(ndx, offset);
622 const int last_op = search_offset_index(ndx, limit - 1);
623 int op_ndx;
625 for (op_ndx = first_op; op_ndx <= last_op; ++op_ndx)
627 const svn_txdelta_op_t *const op = &window->ops[op_ndx];
628 const apr_size_t *const off = &ndx->offs[op_ndx];
630 const apr_size_t fix_offset = (offset > off[0] ? offset - off[0] : 0);
631 const apr_size_t fix_limit = (off[1] > limit ? off[1] - limit : 0);
633 /* It would be extremely weird if the fixed-up op had zero length. */
634 assert(fix_offset + fix_limit < op->length);
636 if (op->action_code != svn_txdelta_target)
638 /* Delta ops that don't depend on the virtual target can be
639 copied to the composite unchanged. */
640 const char *const new_data = (op->action_code == svn_txdelta_new
641 ? (window->new_data->data
642 + op->offset + fix_offset)
643 : NULL);
645 svn_txdelta__insert_op(build_baton, op->action_code,
646 op->offset + fix_offset,
647 op->length - fix_offset - fix_limit,
648 new_data, pool);
650 else
652 /* The source of a target copy must start before the current
653 offset in the (virtual) target stream. */
654 assert(op->offset < off[0]);
656 if (op->offset + op->length - fix_limit <= off[0])
658 /* The recursion _must_ end, otherwise the delta has
659 circular references, and that is not possible. */
660 copy_source_ops(op->offset + fix_offset,
661 op->offset + op->length - fix_limit,
662 target_offset,
663 build_baton, window, ndx, pool);
665 else
667 /* This is an overlapping target copy.
668 The idea here is to transpose the pattern, then generate
669 another overlapping copy. */
670 const apr_size_t ptn_length = off[0] - op->offset;
671 const apr_size_t ptn_overlap = fix_offset % ptn_length;
672 apr_size_t fix_off = fix_offset;
673 apr_size_t tgt_off = target_offset;
674 assert(ptn_length > ptn_overlap);
676 /* ### FIXME: ptn_overlap is unsigned, so the if() condition
677 below is always true! Either it should be '> 0', or the
678 code block should be unconditional. See also r2288. */
679 if (ptn_overlap >= 0)
681 /* Issue second subrange in the pattern. */
682 const apr_size_t length =
683 MIN(op->length - fix_off - fix_limit,
684 ptn_length - ptn_overlap);
685 copy_source_ops(op->offset + ptn_overlap,
686 op->offset + ptn_overlap + length,
687 tgt_off,
688 build_baton, window, ndx, pool);
689 fix_off += length;
690 tgt_off += length;
693 assert(fix_off + fix_limit <= op->length);
694 if (ptn_overlap > 0
695 && fix_off + fix_limit < op->length)
697 /* Issue the first subrange in the pattern. */
698 const apr_size_t length =
699 MIN(op->length - fix_off - fix_limit, ptn_overlap);
700 copy_source_ops(op->offset,
701 op->offset + length,
702 tgt_off,
703 build_baton, window, ndx, pool);
704 fix_off += length;
705 tgt_off += length;
708 assert(fix_off + fix_limit <= op->length);
709 if (fix_off + fix_limit < op->length)
711 /* Now multiply the pattern */
712 svn_txdelta__insert_op(build_baton, svn_txdelta_target,
713 tgt_off - ptn_length,
714 op->length - fix_off - fix_limit,
715 NULL, pool);
720 /* Adjust the target offset for the next op in the list. */
721 target_offset += op->length - fix_offset - fix_limit;
727 /* ==================================================================== */
728 /* Bringing it all together. */
731 svn_txdelta_window_t *
732 svn_txdelta_compose_windows(const svn_txdelta_window_t *window_A,
733 const svn_txdelta_window_t *window_B,
734 apr_pool_t *pool)
736 svn_txdelta__ops_baton_t build_baton = { 0 };
737 svn_txdelta_window_t *composite;
738 apr_pool_t *subpool = svn_pool_create(pool);
739 offset_index_t *offset_index = create_offset_index(window_A, subpool);
740 range_index_t *range_index = create_range_index(subpool);
741 apr_size_t target_offset = 0;
742 int i;
744 /* Read the description of the delta composition algorithm in
745 notes/fs-improvements.txt before going any further.
746 You have been warned. */
747 build_baton.new_data = svn_stringbuf_create("", pool);
748 for (i = 0; i < window_B->num_ops; ++i)
750 const svn_txdelta_op_t *const op = &window_B->ops[i];
751 if (op->action_code != svn_txdelta_source)
753 /* Delta ops that don't depend on the source can be copied
754 to the composite unchanged. */
755 const char *const new_data =
756 (op->action_code == svn_txdelta_new
757 ? window_B->new_data->data + op->offset
758 : NULL);
759 svn_txdelta__insert_op(&build_baton, op->action_code,
760 op->offset, op->length,
761 new_data, pool);
763 else
765 /* NOTE: Remember that `offset' and `limit' refer to
766 positions in window_B's _source_ stream, which is the
767 same as window_A's _target_ stream! */
768 const apr_size_t offset = op->offset;
769 const apr_size_t limit = op->offset + op->length;
770 range_list_node_t *range_list, *range;
771 apr_size_t tgt_off = target_offset;
773 splay_range_index(offset, range_index);
774 range_list = build_range_list(offset, limit, range_index);
776 for (range = range_list; range; range = range->next)
778 if (range->kind == range_from_target)
779 svn_txdelta__insert_op(&build_baton, svn_txdelta_target,
780 range->target_offset,
781 range->limit - range->offset,
782 NULL, pool);
783 else
784 copy_source_ops(range->offset, range->limit, tgt_off,
785 &build_baton, window_A, offset_index,
786 pool);
788 tgt_off += range->limit - range->offset;
790 assert(tgt_off == target_offset + op->length);
792 free_range_list(range_list, range_index);
793 insert_range(offset, limit, target_offset, range_index);
796 /* Remember the new offset in the would-be target stream. */
797 target_offset += op->length;
800 svn_pool_destroy(subpool);
802 composite = svn_txdelta__make_window(&build_baton, pool);
803 composite->sview_offset = window_A->sview_offset;
804 composite->sview_len = window_A->sview_len;
805 composite->tview_len = window_B->tview_len;
806 return composite;
809 /* This is a private interlibrary compatibility wrapper. */
810 svn_txdelta_window_t *
811 svn_txdelta__compose_windows(const svn_txdelta_window_t *window_A,
812 const svn_txdelta_window_t *window_B,
813 apr_pool_t *pool);
814 svn_txdelta_window_t *
815 svn_txdelta__compose_windows(const svn_txdelta_window_t *window_A,
816 const svn_txdelta_window_t *window_B,
817 apr_pool_t *pool)
819 return svn_txdelta_compose_windows(window_A, window_B, pool);