Fix compiler warning due to missing function prototype.
[svn.git] / subversion / libsvn_delta / compose_delta.c
blob492afb98d4ae5969dcc03baadf4c5e98263b92dc
1 /*
2 * compose_delta.c: Delta window composition.
4 * ====================================================================
5 * Copyright (c) 2000-2004 CollabNet. All rights reserved.
7 * This software is licensed as described in the file COPYING, which
8 * you should have received as part of this distribution. The terms
9 * are also available at http://subversion.tigris.org/license-1.html.
10 * If newer versions of this license are posted there, you may use a
11 * newer version instead, at your option.
13 * This software consists of voluntary contributions made by many
14 * individuals. For exact contribution history, see the revision
15 * history and logs, available at http://subversion.tigris.org/.
16 * ====================================================================
20 #include <assert.h>
22 #include <apr_general.h> /* For APR_INLINE */
24 #include "svn_delta.h"
25 #include "svn_pools.h"
26 #include "delta.h"
28 /* Define a MIN macro if this platform doesn't already have one. */
29 #ifndef MIN
30 #define MIN(a, b) ((a) < (b) ? (a) : (b))
31 #endif
34 /* ==================================================================== */
35 /* Support for efficient small-block allocation from pools. */
37 /* The following structs will be allocated and freed often: */
39 /* A node in the range index tree. */
40 typedef struct range_index_node_t range_index_node_t;
41 struct range_index_node_t
43 /* 'offset' and 'limit' define the range in the source window. */
44 apr_size_t offset;
45 apr_size_t limit;
47 /* 'target_offset' is where that range is represented in the target. */
48 apr_size_t target_offset;
50 /* 'left' and 'right' link the node into a splay tree. */
51 range_index_node_t *left, *right;
53 /* 'prev' and 'next' link it into an ordered, doubly-linked list. */
54 range_index_node_t *prev, *next;
57 /* A node in a list of ranges for source and target op copies. */
58 enum range_kind
60 range_from_source,
61 range_from_target
64 typedef struct range_list_node_t range_list_node_t;
65 struct range_list_node_t
67 /* Where does the range come from?
68 'offset' and 'limit' always refer to the "virtual" source data
69 for the second delta window. For a target range, the actual
70 offset to use for generating the target op is 'target_offset';
71 that field isn't used by source ranges. */
72 enum range_kind kind;
74 /* 'offset' and 'limit' define the range. */
75 apr_size_t offset;
76 apr_size_t limit;
78 /* 'target_offset' is the start of the range in the target. */
79 apr_size_t target_offset;
81 /* 'prev' and 'next' link the node into an ordered, doubly-linked list. */
82 range_list_node_t *prev, *next;
86 /* This is what will be allocated: */
87 typedef union alloc_block_t alloc_block_t;
88 union alloc_block_t
90 range_index_node_t index_node;
91 range_list_node_t list_node;
93 /* Links free blocks into a freelist. */
94 alloc_block_t *next_free;
98 /* Allocate a block. */
99 static APR_INLINE void *
100 alloc_block(apr_pool_t *pool, alloc_block_t **free_list)
102 alloc_block_t *block;
103 if (*free_list == NULL)
104 block = apr_palloc(pool, sizeof(*block));
105 else
107 block = *free_list;
108 *free_list = block->next_free;
110 return block;
113 /* Return the block back to the free list. */
114 static APR_INLINE void
115 free_block(void *ptr, alloc_block_t **free_list)
117 /* Wrapper functions take care of type safety. */
118 alloc_block_t *const block = ptr;
119 block->next_free = *free_list;
120 *free_list = block;
125 /* ==================================================================== */
126 /* Mapping offsets in the target streem to txdelta ops. */
128 typedef struct offset_index_t
130 int length;
131 apr_size_t *offs;
132 } offset_index_t;
134 /* Create an index mapping target stream offsets to delta ops in
135 WINDOW. Allocate from POOL. */
137 static offset_index_t *
138 create_offset_index(const svn_txdelta_window_t *window, apr_pool_t *pool)
140 offset_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
141 apr_size_t offset = 0;
142 int i;
144 ndx->length = window->num_ops;
145 ndx->offs = apr_palloc(pool, (ndx->length + 1) * sizeof(*ndx->offs));
147 for (i = 0; i < ndx->length; ++i)
149 ndx->offs[i] = offset;
150 offset += window->ops[i].length;
152 ndx->offs[ndx->length] = offset;
154 return ndx;
157 /* Find the index of the delta op thet defines that data at OFFSET in
158 NDX. */
160 static int
161 search_offset_index(const offset_index_t *ndx, apr_size_t offset)
163 int lo, hi, op;
165 assert(offset < ndx->offs[ndx->length]);
167 for (lo = 0, hi = ndx->length, op = (lo + hi)/2;
168 lo < hi;
169 op = (lo + hi)/2)
171 const apr_size_t this_offset = ndx->offs[op];
172 const apr_size_t next_offset = ndx->offs[op + 1];
173 if (offset < this_offset)
174 hi = op;
175 else if (offset > next_offset)
176 lo = op;
177 else
179 /* this_offset <= offset <= next_offset */
180 if (offset == next_offset)
181 ++op;
182 break;
186 assert(ndx->offs[op] <= offset && offset < ndx->offs[op + 1]);
187 return op;
192 /* ==================================================================== */
193 /* Mapping ranges in the source stream to ranges in the composed delta. */
195 /* The range index tree. */
196 typedef struct range_index_t
198 range_index_node_t *tree;
199 alloc_block_t *free_list;
200 apr_pool_t *pool;
201 } range_index_t;
203 /* Create a range index tree. Allocate from POOL. */
204 static range_index_t *
205 create_range_index(apr_pool_t *pool)
207 range_index_t *ndx = apr_palloc(pool, sizeof(*ndx));
208 ndx->tree = NULL;
209 ndx->pool = pool;
210 ndx->free_list = NULL;
211 return ndx;
214 /* Allocate a node for the range index tree. */
215 static range_index_node_t *
216 alloc_range_index_node(range_index_t *ndx,
217 apr_size_t offset,
218 apr_size_t limit,
219 apr_size_t target_offset)
221 range_index_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
222 node->offset = offset;
223 node->limit = limit;
224 node->target_offset = target_offset;
225 node->left = node->right = NULL;
226 node->prev = node->next = NULL;
227 return node;
230 /* Free a node from the range index tree. */
231 static void
232 free_range_index_node(range_index_t *ndx, range_index_node_t *node)
234 if (node->next)
235 node->next->prev = node->prev;
236 if (node->prev)
237 node->prev->next = node->next;
238 free_block(node, &ndx->free_list);
242 /* Splay the index tree, using OFFSET as the key. */
244 static void
245 splay_range_index(apr_size_t offset, range_index_t *ndx)
247 range_index_node_t *tree = ndx->tree;
248 range_index_node_t scratch_node;
249 range_index_node_t *left, *right;
251 if (tree == NULL)
252 return;
254 scratch_node.left = scratch_node.right = NULL;
255 left = right = &scratch_node;
257 for (;;)
259 if (offset < tree->offset)
261 if (tree->left != NULL
262 && offset < tree->left->offset)
264 /* Right rotation */
265 range_index_node_t *const node = tree->left;
266 tree->left = node->right;
267 node->right = tree;
268 tree = node;
270 if (tree->left == NULL)
271 break;
273 /* Remember the right subtree */
274 right->left = tree;
275 right = tree;
276 tree = tree->left;
278 else if (offset > tree->offset)
280 if (tree->right != NULL
281 && offset > tree->right->offset)
283 /* Left rotation */
284 range_index_node_t *const node = tree->right;
285 tree->right = node->left;
286 node->left = tree;
287 tree = node;
289 if (tree->right == NULL)
290 break;
292 /* Remember the left subtree */
293 left->right = tree;
294 left = tree;
295 tree = tree->right;
297 else
298 break;
301 /* Link in the left and right subtrees */
302 left->right = tree->left;
303 right->left = tree->right;
304 tree->left = scratch_node.right;
305 tree->right = scratch_node.left;
307 /* The basic top-down splay is finished, but we may still need to
308 turn the tree around. What we want is to put the node with the
309 largest offset where node->offset <= offset at the top of the
310 tree, so that we can insert the new data (or search for existing
311 ranges) to the right of the root. This makes cleaning up the
312 tree after an insert much simpler, and -- incidentally -- makes
313 the whole range index magic work. */
314 if (offset < tree->offset && tree->left != NULL)
316 if (tree->left->right == NULL)
318 /* A single right rotation is enough. */
319 range_index_node_t *const node = tree->left;
320 tree->left = node->right; /* Which is always NULL. */
321 node->right = tree;
322 tree = node;
324 else
326 /* Slide down to the rightmost node in the left subtree. */
327 range_index_node_t **nodep = &tree->left;
328 while ((*nodep)->right != NULL)
329 nodep = &(*nodep)->right;
331 /* Now move this node to root in one giant promotion. */
332 right = tree;
333 left = tree->left;
334 tree = *nodep;
335 *nodep = tree->left;
336 right->left = tree->right; /* Which is always NULL, too. */
337 tree->left = left;
338 tree->right = right;
342 /* Sanity check ... */
343 assert((offset >= tree->offset)
344 || ((tree->left == NULL)
345 && (tree->prev == NULL)));
346 ndx->tree = tree;
350 /* Remove all ranges from NDX that fall into the root's range. To
351 keep the range index as small as possible, we must also remove
352 nodes that don't fall into the new range, but have become redundant
353 because the new range overlaps the beginning of the next range.
354 Like this:
356 new-range: |-----------------|
357 range-1: |-----------------|
358 range-2: |--------------------|
360 Before new-range was inserted, range-1 and range-2 were both
361 necessary. Now the union of new-range and range-2 completely covers
362 range-1, which has become redundant now.
364 FIXME: But, of course, there's a catch. range-1 must still remain
365 in the tree if we want to optimize the number of target copy ops in
366 the case were a copy falls within range-1, but starts before
367 range-2 and ends after new-range. */
369 static void
370 delete_subtree(range_index_t *ndx, range_index_node_t *node)
372 if (node != NULL)
374 delete_subtree(ndx, node->left);
375 delete_subtree(ndx, node->right);
376 free_range_index_node(ndx, node);
380 static void
381 clean_tree(range_index_t *ndx, apr_size_t limit)
383 apr_size_t top_offset = limit + 1;
384 range_index_node_t **nodep = &ndx->tree->right;
385 while (*nodep != NULL)
387 range_index_node_t *const node = *nodep;
388 apr_size_t const offset =
389 (node->right != NULL && node->right->offset < top_offset
390 ? node->right->offset
391 : top_offset);
393 if (node->limit <= limit
394 || (node->offset < limit && offset < limit))
396 *nodep = node->right;
397 node->right = NULL;
398 delete_subtree(ndx, node);
400 else
402 top_offset = node->offset;
403 nodep = &node->left;
409 /* Add a range [OFFSET, LIMIT) into NDX. If NDX already contains a
410 range that encloses [OFFSET, LIMIT), do nothing. Otherwise, remove
411 all ranges from NDX that are superseded by the new range.
412 NOTE: The range index must be splayed to OFFSET! */
414 static void
415 insert_range(apr_size_t offset, apr_size_t limit, apr_size_t target_offset,
416 range_index_t *ndx)
418 range_index_node_t *node = NULL;
420 if (ndx->tree == NULL)
422 node = alloc_range_index_node(ndx, offset, limit, target_offset);
423 ndx->tree = node;
425 else
427 if (offset == ndx->tree->offset
428 && limit > ndx->tree->limit)
430 ndx->tree->limit = limit;
431 ndx->tree->target_offset = target_offset;
432 clean_tree(ndx, limit);
434 else if (offset > ndx->tree->offset
435 && limit > ndx->tree->limit)
437 /* We have to make the same sort of checks as clean_tree()
438 does for superseded ranges. Have to merge these someday. */
440 const svn_boolean_t insert_range_p =
441 (!ndx->tree->next
442 || ndx->tree->limit < ndx->tree->next->offset
443 || limit > ndx->tree->next->limit);
445 if (insert_range_p)
447 /* Again, we have to check if the new node and the one
448 to the left of the root override root's range. */
449 if (ndx->tree->prev && ndx->tree->prev->limit > offset)
451 /* Replace the data in the splayed node. */
452 ndx->tree->offset = offset;
453 ndx->tree->limit = limit;
454 ndx->tree->target_offset = target_offset;
456 else
458 /* Insert the range to the right of the splayed node. */
459 node = alloc_range_index_node(ndx, offset, limit,
460 target_offset);
461 if ((node->next = ndx->tree->next) != NULL)
462 node->next->prev = node;
463 ndx->tree->next = node;
464 node->prev = ndx->tree;
466 node->right = ndx->tree->right;
467 ndx->tree->right = NULL;
468 node->left = ndx->tree;
469 ndx->tree = node;
471 clean_tree(ndx, limit);
473 else
474 /* Ignore the range */;
476 else if (offset < ndx->tree->offset)
478 assert(ndx->tree->left == NULL);
480 /* Insert the range left of the splayed node */
481 node = alloc_range_index_node(ndx, offset, limit, target_offset);
482 node->left = node->prev = NULL;
483 node->right = node->next = ndx->tree;
484 ndx->tree = node->next->prev = node;
485 clean_tree(ndx, limit);
487 else
488 /* Ignore the range */;
494 /* ==================================================================== */
495 /* Juggling with lists of ranges. */
497 /* Allocate a node and add it to the range list. LIST is the head of
498 the range list, TAIL is the last node in the list. NDX holds the
499 freelist; OFFSET, LIMIT and KIND are node data. */
500 static range_list_node_t *
501 alloc_range_list(range_list_node_t **list,
502 range_list_node_t **tail,
503 range_index_t *ndx,
504 enum range_kind kind,
505 apr_size_t offset,
506 apr_size_t limit,
507 apr_size_t target_offset)
509 range_list_node_t *const node = alloc_block(ndx->pool, &ndx->free_list);
510 node->kind = kind;
511 node->offset = offset;
512 node->limit = limit;
513 node->target_offset = target_offset;
514 if (*list == NULL)
516 node->prev = node->next = NULL;
517 *list = *tail = node;
519 else
521 node->prev = *tail;
522 node->next = NULL;
523 (*tail)->next = node;
524 *tail = node;
526 return *list;
529 /* Free a range list. LIST is the head of the list, NDX holds the freelist. */
530 static void
531 free_range_list(range_list_node_t *list, range_index_t *ndx)
533 while (list)
535 range_list_node_t *const node = list;
536 list = node->next;
537 free_block(node, &ndx->free_list);
542 /* Based on the data in NDX, build a list of ranges that cover
543 [OFFSET, LIMIT) in the "virtual" source data.
544 NOTE: The range index must be splayed to OFFSET! */
546 static range_list_node_t *
547 build_range_list(apr_size_t offset, apr_size_t limit, range_index_t *ndx)
549 range_list_node_t *range_list = NULL;
550 range_list_node_t *last_range = NULL;
551 range_index_node_t *node = ndx->tree;
553 while (offset < limit)
555 if (node == NULL)
556 return alloc_range_list(&range_list, &last_range, ndx,
557 range_from_source,
558 offset, limit, 0);
560 if (offset < node->offset)
562 if (limit <= node->offset)
563 return alloc_range_list(&range_list, &last_range, ndx,
564 range_from_source,
565 offset, limit, 0);
566 else
568 alloc_range_list(&range_list, &last_range, ndx,
569 range_from_source,
570 offset, node->offset, 0);
571 offset = node->offset;
574 else
576 /* TODO: (Potential optimization) Investigate if it would
577 make sense to forbid short range_from_target lengths
578 (this comment originally said "shorter than, say,
579 VD_KEY_SIZE (see vdelta.c)", but Subversion no longer
580 uses vdelta). */
582 if (offset >= node->limit)
583 node = node->next;
584 else
586 const apr_size_t target_offset =
587 offset - node->offset + node->target_offset;
589 if (limit <= node->limit)
590 return alloc_range_list(&range_list, &last_range, ndx,
591 range_from_target,
592 offset, limit, target_offset);
593 else
595 alloc_range_list(&range_list, &last_range, ndx,
596 range_from_target,
597 offset, node->limit, target_offset);
598 offset = node->limit;
599 node = node->next;
605 assert(!"A range's offset isn't smaller than its limit? Impossible!");
606 return range_list;
610 /* Copy the instructions from WINDOW that define the range [OFFSET,
611 LIMIT) in WINDOW's target stream to TARGET_OFFSET in the window
612 represented by BUILD_BATON. Use NDX to find the instructions in
613 WINDOW. Allocate space in BUILD_BATON from POOL. */
615 static void
616 copy_source_ops(apr_size_t offset, apr_size_t limit,
617 apr_size_t target_offset,
618 svn_txdelta__ops_baton_t *build_baton,
619 const svn_txdelta_window_t *window,
620 const offset_index_t *ndx,
621 apr_pool_t *pool)
623 const int first_op = search_offset_index(ndx, offset);
624 const int last_op = search_offset_index(ndx, limit - 1);
625 int op_ndx;
627 for (op_ndx = first_op; op_ndx <= last_op; ++op_ndx)
629 const svn_txdelta_op_t *const op = &window->ops[op_ndx];
630 const apr_size_t *const off = &ndx->offs[op_ndx];
632 const apr_size_t fix_offset = (offset > off[0] ? offset - off[0] : 0);
633 const apr_size_t fix_limit = (off[1] > limit ? off[1] - limit : 0);
635 /* It would be extremely weird if the fixed-up op had zero length. */
636 assert(fix_offset + fix_limit < op->length);
638 if (op->action_code != svn_txdelta_target)
640 /* Delta ops that don't depend on the virtual target can be
641 copied to the composite unchanged. */
642 const char *const new_data = (op->action_code == svn_txdelta_new
643 ? (window->new_data->data
644 + op->offset + fix_offset)
645 : NULL);
647 svn_txdelta__insert_op(build_baton, op->action_code,
648 op->offset + fix_offset,
649 op->length - fix_offset - fix_limit,
650 new_data, pool);
652 else
654 /* The source of a target copy must start before the current
655 offset in the (virtual) target stream. */
656 assert(op->offset < off[0]);
658 if (op->offset + op->length - fix_limit <= off[0])
660 /* The recursion _must_ end, otherwise the delta has
661 circular references, and that is not possible. */
662 copy_source_ops(op->offset + fix_offset,
663 op->offset + op->length - fix_limit,
664 target_offset,
665 build_baton, window, ndx, pool);
667 else
669 /* This is an overlapping target copy.
670 The idea here is to transpose the pattern, then generate
671 another overlapping copy. */
672 const apr_size_t ptn_length = off[0] - op->offset;
673 const apr_size_t ptn_overlap = fix_offset % ptn_length;
674 apr_size_t fix_off = fix_offset;
675 apr_size_t tgt_off = target_offset;
676 assert(ptn_length > ptn_overlap);
678 /* ### FIXME: ptn_overlap is unsigned, so the if() condition
679 below is always true! Either it should be '> 0', or the
680 code block should be unconditional. See also r2288. */
681 if (ptn_overlap >= 0)
683 /* Issue second subrange in the pattern. */
684 const apr_size_t length =
685 MIN(op->length - fix_off - fix_limit,
686 ptn_length - ptn_overlap);
687 copy_source_ops(op->offset + ptn_overlap,
688 op->offset + ptn_overlap + length,
689 tgt_off,
690 build_baton, window, ndx, pool);
691 fix_off += length;
692 tgt_off += length;
695 assert(fix_off + fix_limit <= op->length);
696 if (ptn_overlap > 0
697 && fix_off + fix_limit < op->length)
699 /* Issue the first subrange in the pattern. */
700 const apr_size_t length =
701 MIN(op->length - fix_off - fix_limit, ptn_overlap);
702 copy_source_ops(op->offset,
703 op->offset + length,
704 tgt_off,
705 build_baton, window, ndx, pool);
706 fix_off += length;
707 tgt_off += length;
710 assert(fix_off + fix_limit <= op->length);
711 if (fix_off + fix_limit < op->length)
713 /* Now multiply the pattern */
714 svn_txdelta__insert_op(build_baton, svn_txdelta_target,
715 tgt_off - ptn_length,
716 op->length - fix_off - fix_limit,
717 NULL, pool);
722 /* Adjust the target offset for the next op in the list. */
723 target_offset += op->length - fix_offset - fix_limit;
729 /* ==================================================================== */
730 /* Bringing it all together. */
733 svn_txdelta_window_t *
734 svn_txdelta_compose_windows(const svn_txdelta_window_t *window_A,
735 const svn_txdelta_window_t *window_B,
736 apr_pool_t *pool)
738 svn_txdelta__ops_baton_t build_baton = { 0 };
739 svn_txdelta_window_t *composite;
740 apr_pool_t *subpool = svn_pool_create(pool);
741 offset_index_t *offset_index = create_offset_index(window_A, subpool);
742 range_index_t *range_index = create_range_index(subpool);
743 apr_size_t target_offset = 0;
744 int i;
746 /* Read the description of the delta composition algorithm in
747 notes/fs-improvements.txt before going any further.
748 You have been warned. */
749 build_baton.new_data = svn_stringbuf_create("", pool);
750 for (i = 0; i < window_B->num_ops; ++i)
752 const svn_txdelta_op_t *const op = &window_B->ops[i];
753 if (op->action_code != svn_txdelta_source)
755 /* Delta ops that don't depend on the source can be copied
756 to the composite unchanged. */
757 const char *const new_data =
758 (op->action_code == svn_txdelta_new
759 ? window_B->new_data->data + op->offset
760 : NULL);
761 svn_txdelta__insert_op(&build_baton, op->action_code,
762 op->offset, op->length,
763 new_data, pool);
765 else
767 /* NOTE: Remember that `offset' and `limit' refer to
768 positions in window_B's _source_ stream, which is the
769 same as window_A's _target_ stream! */
770 const apr_size_t offset = op->offset;
771 const apr_size_t limit = op->offset + op->length;
772 range_list_node_t *range_list, *range;
773 apr_size_t tgt_off = target_offset;
775 splay_range_index(offset, range_index);
776 range_list = build_range_list(offset, limit, range_index);
778 for (range = range_list; range; range = range->next)
780 if (range->kind == range_from_target)
781 svn_txdelta__insert_op(&build_baton, svn_txdelta_target,
782 range->target_offset,
783 range->limit - range->offset,
784 NULL, pool);
785 else
786 copy_source_ops(range->offset, range->limit, tgt_off,
787 &build_baton, window_A, offset_index,
788 pool);
790 tgt_off += range->limit - range->offset;
792 assert(tgt_off == target_offset + op->length);
794 free_range_list(range_list, range_index);
795 insert_range(offset, limit, target_offset, range_index);
798 /* Remember the new offset in the would-be target stream. */
799 target_offset += op->length;
802 svn_pool_destroy(subpool);
804 composite = svn_txdelta__make_window(&build_baton, pool);
805 composite->sview_offset = window_A->sview_offset;
806 composite->sview_len = window_A->sview_len;
807 composite->tview_len = window_B->tview_len;
808 return composite;
811 /* This is a private interlibrary compatibility wrapper. */
812 svn_txdelta_window_t *
813 svn_txdelta__compose_windows(const svn_txdelta_window_t *window_A,
814 const svn_txdelta_window_t *window_B,
815 apr_pool_t *pool);
816 svn_txdelta_window_t *
817 svn_txdelta__compose_windows(const svn_txdelta_window_t *window_A,
818 const svn_txdelta_window_t *window_B,
819 apr_pool_t *pool)
821 return svn_txdelta_compose_windows(window_A, window_B, pool);