2 * replay.c: an editor driver for changes made in a given revision
5 * ====================================================================
6 * Copyright (c) 2000-2006 CollabNet. All rights reserved.
8 * This software is licensed as described in the file COPYING, which
9 * you should have received as part of this distribution. The terms
10 * are also available at http://subversion.tigris.org/license-1.html.
11 * If newer versions of this license are posted there, you may use a
12 * newer version instead, at your option.
14 * This software consists of voluntary contributions made by many
15 * individuals. For exact contribution history, see the revision
16 * history and logs, available at http://subversion.tigris.org/.
17 * ====================================================================
25 #include "svn_types.h"
26 #include "svn_delta.h"
29 #include "svn_repos.h"
30 #include "svn_props.h"
31 #include "svn_pools.h"
33 #include "svn_private_config.h"
38 /* The year was 2003. Subversion usage was rampant in the world, and
39 there was a rapidly growing issues database to prove it. To make
40 matters worse, svn_repos_dir_delta() had simply outgrown itself.
41 No longer content to simply describe the differences between two
42 trees, the function had been slowly bearing the added
43 responsibility of representing the actions that had been taken to
44 cause those differences -- a burden it was never meant to bear.
45 Now grown into a twisted mess of razor-sharp metal and glass, and
46 trembling with a sort of momentarily stayed spring force,
47 svn_repos_dir_delta was a timebomb poised for total annihilation of
50 Subversion needed a change.
52 Changes, in fact. And not just in the literary segue sense. What
53 Subversion desperately needed was a new mechanism solely
54 responsible for replaying repository actions back to some
55 interested party -- to translate and retransmit the contents of the
56 Berkeley 'changes' database file. */
60 /* The filesystem keeps a record of high-level actions that affect the
61 files and directories in itself. The 'changes' table records
62 additions, deletions, textual and property modifications, and so
63 on. The goal of the functions in this file is to examine those
64 change records, and use them to drive an editor interface in such a
65 way as to effectively replay those actions.
67 This is critically different than what svn_repos_dir_delta() was
68 designed to do. That function describes, in the simplest way it
69 can, how to transform one tree into another. It doesn't care
70 whether or not this was the same way a user might have done this
71 transformation. More to the point, it doesn't care if this is how
72 those differences *did* come into being. And it is for this reason
73 that it cannot be relied upon for tasks such as the repository
74 dumpfile-generation code, which is supposed to represent not
75 changes, but actions that cause changes.
77 So, what's the plan here?
79 First, we fetch the changes for a particular revision or
80 transaction. We get these as an array, sorted chronologically.
81 From this array we will build a hash, keyed on the path associated
82 with each change item, and whose values are arrays of changes made
83 to that path, again preserving the chronological ordering.
85 Once our hash is built, we then sort all the keys of the hash (the
86 paths) using a depth-first directory sort routine.
88 Finally, we drive an editor, moving down our list of sorted paths,
89 and manufacturing any intermediate editor calls (directory openings
90 and closures) needed to navigate between each successive path. For
91 each path, we replay the sorted actions that occurred at that path.
93 When we've finished the editor drive, we should have fully replayed
94 the filesystem events that occurred in that revision or transaction
95 (though not necessarily in the same order in which they
100 /*** Helper functions. ***/
103 /* Information for an active copy, that is a directory which we are currently
104 working on and which was added with history. */
107 /* Destination path. */
109 /* Copy source. NULL/invalid if this is an add without history,
110 nested inside an add with history. */
111 const char *copyfrom_path
;
112 svn_revnum_t copyfrom_rev
;
115 struct path_driver_cb_baton
117 const svn_delta_editor_t
*editor
;
120 /* The root of the revision we're replaying. */
123 /* The root of the previous revision. If this is non-NULL it means that
124 we are supposed to generate props and text deltas relative to it. */
125 svn_fs_root_t
*compare_root
;
127 apr_hash_t
*changed_paths
;
129 svn_repos_authz_func_t authz_read_func
;
130 void *authz_read_baton
;
132 const char *base_path
;
135 svn_revnum_t low_water_mark
;
136 /* Stack of active copy operations. */
137 apr_array_header_t
*copies
;
139 /* The global pool for this replay operation. */
143 /* Recursively traverse PATH (as it exists under SOURCE_ROOT) emitting
144 the appropriate editor calls to add it and its children without any
145 history. This is meant to be used when either a subset of the tree
146 has been ignored and we need to copy something from that subset to
147 the part of the tree we do care about, or if a subset of the tree is
148 unavailable because of authz and we need to use it as the source of
151 add_subdir(svn_fs_root_t
*source_root
,
152 svn_fs_root_t
*target_root
,
153 const svn_delta_editor_t
*editor
,
157 const char *source_path
,
158 svn_repos_authz_func_t authz_read_func
,
159 void *authz_read_baton
,
160 apr_hash_t
*changed_paths
,
164 apr_pool_t
*subpool
= svn_pool_create(pool
);
165 apr_hash_index_t
*hi
, *phi
;
169 SVN_ERR(editor
->add_directory(path
, parent_baton
, NULL
,
170 SVN_INVALID_REVNUM
, pool
, dir_baton
));
172 SVN_ERR(svn_fs_node_proplist(&props
, target_root
, path
, pool
));
174 for (phi
= apr_hash_first(pool
, props
); phi
; phi
= apr_hash_next(phi
))
179 svn_pool_clear(subpool
);
181 apr_hash_this(phi
, &key
, NULL
, &val
);
183 SVN_ERR(editor
->change_dir_prop(*dir_baton
,
189 /* We have to get the dirents from the source path, not the target,
190 because we want nested copies from *readable* paths to be handled by
191 path_driver_cb_func, not add_subdir (in order to preserve history). */
192 SVN_ERR(svn_fs_dir_entries(&dirents
, source_root
, source_path
, pool
));
194 for (hi
= apr_hash_first(pool
, dirents
); hi
; hi
= apr_hash_next(hi
))
196 svn_fs_path_change_t
*change
;
197 svn_boolean_t readable
= TRUE
;
198 svn_fs_dirent_t
*dent
;
199 const char *new_path
;
202 svn_pool_clear(subpool
);
204 apr_hash_this(hi
, NULL
, NULL
, &val
);
208 new_path
= svn_path_join(path
, dent
->name
, subpool
);
210 /* If a file or subdirectory of the copied directory is listed as a
211 changed path (because it was modified after the copy but before the
212 commit), we remove it from the changed_paths hash so that future
213 calls to path_driver_cb_func will ignore it. */
214 change
= apr_hash_get(changed_paths
, new_path
, APR_HASH_KEY_STRING
);
217 apr_hash_set(changed_paths
, new_path
, APR_HASH_KEY_STRING
, NULL
);
218 /* If it's a delete, skip this entry. */
219 if (change
->change_kind
== svn_fs_path_change_delete
)
224 SVN_ERR(authz_read_func(&readable
, target_root
, new_path
,
225 authz_read_baton
, pool
));
230 if (dent
->kind
== svn_node_dir
)
234 SVN_ERR(add_subdir(source_root
, target_root
, editor
, edit_baton
,
235 new_path
, *dir_baton
,
236 svn_path_join(source_path
, dent
->name
,
238 authz_read_func
, authz_read_baton
,
239 changed_paths
, subpool
, &new_dir_baton
));
241 SVN_ERR(editor
->close_directory(new_dir_baton
, subpool
));
243 else if (dent
->kind
== svn_node_file
)
245 svn_txdelta_window_handler_t delta_handler
;
246 void *delta_handler_baton
, *file_baton
;
247 svn_txdelta_stream_t
*delta_stream
;
248 unsigned char digest
[APR_MD5_DIGESTSIZE
];
250 SVN_ERR(editor
->add_file(new_path
, *dir_baton
, NULL
,
251 SVN_INVALID_REVNUM
, pool
, &file_baton
));
253 SVN_ERR(svn_fs_node_proplist(&props
, target_root
, new_path
, subpool
));
255 for (phi
= apr_hash_first(pool
, props
);
257 phi
= apr_hash_next(phi
))
261 apr_hash_this(phi
, &key
, NULL
, &val
);
263 SVN_ERR(editor
->change_file_prop(file_baton
,
269 SVN_ERR(editor
->apply_textdelta(file_baton
, NULL
, pool
,
271 &delta_handler_baton
));
273 SVN_ERR(svn_fs_get_file_delta_stream
274 (&delta_stream
, NULL
, NULL
, target_root
, new_path
,
277 SVN_ERR(svn_txdelta_send_txstream(delta_stream
,
282 SVN_ERR(svn_fs_file_md5_checksum(digest
,
286 SVN_ERR(editor
->close_file(file_baton
,
287 svn_md5_digest_to_cstring(digest
, pool
),
294 svn_pool_destroy(subpool
);
300 is_within_base_path(const char *path
, const char *base_path
, int base_len
)
302 if (base_path
[0] == '\0')
305 if (strncmp(base_path
, path
, base_len
) == 0
306 && (path
[base_len
] == '/' || path
[base_len
] == '\0'))
313 path_driver_cb_func(void **dir_baton
,
315 void *callback_baton
,
319 struct path_driver_cb_baton
*cb
= callback_baton
;
320 const svn_delta_editor_t
*editor
= cb
->editor
;
321 void *edit_baton
= cb
->edit_baton
;
322 svn_fs_root_t
*root
= cb
->root
;
323 svn_fs_path_change_t
*change
;
324 svn_boolean_t do_add
= FALSE
, do_delete
= FALSE
;
325 svn_node_kind_t kind
;
326 void *file_baton
= NULL
;
327 const char *copyfrom_path
= NULL
;
328 const char *real_copyfrom_path
= NULL
;
329 svn_revnum_t copyfrom_rev
;
330 svn_boolean_t src_readable
= TRUE
;
331 svn_fs_root_t
*source_root
= cb
->compare_root
;
332 const char *source_path
= source_root
? path
: NULL
;
333 const char *base_path
= cb
->base_path
;
334 int base_path_len
= cb
->base_path_len
;
338 /* First, flush the copies stack so it only contains ancestors of path. */
339 while (cb
->copies
->nelts
> 0
340 && ! svn_path_is_ancestor(APR_ARRAY_IDX(cb
->copies
,
341 cb
->copies
->nelts
- 1,
342 struct copy_info
).path
,
346 change
= apr_hash_get(cb
->changed_paths
, path
, APR_HASH_KEY_STRING
);
349 /* This can only happen if the path was removed from cb->changed_paths
350 by an earlier call to add_subdir, which means the path was already
351 handled and we should simply ignore it. */
354 switch (change
->change_kind
)
356 case svn_fs_path_change_add
:
360 case svn_fs_path_change_delete
:
364 case svn_fs_path_change_replace
:
369 case svn_fs_path_change_modify
:
375 /* Handle any deletions. */
377 SVN_ERR(editor
->delete_entry(path
, SVN_INVALID_REVNUM
,
378 parent_baton
, pool
));
380 /* Fetch the node kind if it makes sense to do so. */
381 if (! do_delete
|| do_add
)
383 SVN_ERR(svn_fs_check_path(&kind
, root
, path
, pool
));
384 if ((kind
!= svn_node_dir
) && (kind
!= svn_node_file
))
385 return svn_error_createf
386 (SVN_ERR_FS_NOT_FOUND
, NULL
,
387 _("Filesystem path '%s' is neither a file nor a directory"), path
);
390 /* Handle any adds/opens. */
393 svn_fs_root_t
*copyfrom_root
= NULL
;
394 /* Was this node copied? */
395 SVN_ERR(svn_fs_copied_from(©from_rev
, ©from_path
,
398 if (copyfrom_path
&& SVN_IS_VALID_REVNUM(copyfrom_rev
))
400 SVN_ERR(svn_fs_revision_root(©from_root
,
401 svn_fs_root_fs(root
),
402 copyfrom_rev
, pool
));
404 if (cb
->authz_read_func
)
406 SVN_ERR(cb
->authz_read_func(&src_readable
, copyfrom_root
,
408 cb
->authz_read_baton
, pool
));
412 /* Save away the copyfrom path in case we null it out below. */
413 real_copyfrom_path
= copyfrom_path
;
414 /* If we have a copyfrom path, and we can't read it or we're just
415 ignoring it, or the copyfrom rev is prior to the low water mark
416 then we just null them out and do a raw add with no history at
420 || ! is_within_base_path(copyfrom_path
+ 1, base_path
,
422 || cb
->low_water_mark
> copyfrom_rev
))
424 copyfrom_path
= NULL
;
425 copyfrom_rev
= SVN_INVALID_REVNUM
;
428 /* Do the right thing based on the path KIND. */
429 if (kind
== svn_node_dir
)
431 /* If this is a copy, but we can't represent it as such,
432 then we just do a recursive add of the source path
434 if (real_copyfrom_path
&& ! copyfrom_path
)
436 SVN_ERR(add_subdir(copyfrom_root
, root
, editor
, edit_baton
,
437 path
, parent_baton
, real_copyfrom_path
,
438 cb
->authz_read_func
, cb
->authz_read_baton
,
439 cb
->changed_paths
, pool
, dir_baton
));
443 SVN_ERR(editor
->add_directory(path
, parent_baton
,
444 copyfrom_path
, copyfrom_rev
,
450 SVN_ERR(editor
->add_file(path
, parent_baton
, copyfrom_path
,
451 copyfrom_rev
, pool
, &file_baton
));
454 /* If we represent this as a copy... */
457 /* If it is a directory, make sure descendants get the correct
458 delta source by remembering that we are operating inside a
459 (possibly nested) copy operation. */
460 if (kind
== svn_node_dir
)
462 struct copy_info
*info
= &APR_ARRAY_PUSH(cb
->copies
,
464 info
->path
= apr_pstrdup(cb
->pool
, path
);
465 info
->copyfrom_path
= apr_pstrdup(cb
->pool
, copyfrom_path
);
466 info
->copyfrom_rev
= copyfrom_rev
;
469 /* Save the source so that we can use it later, when we
470 need to generate text and prop deltas. */
471 source_root
= copyfrom_root
;
472 source_path
= copyfrom_path
;
475 /* Else, we are an add without history... */
477 /* If an ancestor is added with history, we need to forget about
478 that here, go on with life and repeat all the mistakes of our
480 if (kind
== svn_node_dir
&& cb
->copies
->nelts
> 0)
482 struct copy_info
*info
= &APR_ARRAY_PUSH(cb
->copies
,
484 info
->path
= apr_pstrdup(cb
->pool
, path
);
485 info
->copyfrom_path
= NULL
;
486 info
->copyfrom_rev
= SVN_INVALID_REVNUM
;
492 else if (! do_delete
)
494 /* Do the right thing based on the path KIND (and the presence
495 of a PARENT_BATON). */
496 if (kind
== svn_node_dir
)
500 SVN_ERR(editor
->open_directory(path
, parent_baton
,
506 SVN_ERR(editor
->open_root(edit_baton
, SVN_INVALID_REVNUM
,
512 SVN_ERR(editor
->open_file(path
, parent_baton
, SVN_INVALID_REVNUM
,
515 /* If we are inside an add with history, we need to adjust the
517 if (cb
->copies
->nelts
> 0)
519 struct copy_info
*info
= &APR_ARRAY_IDX(cb
->copies
,
520 cb
->copies
->nelts
- 1,
522 if (info
->copyfrom_path
)
524 SVN_ERR(svn_fs_revision_root(&source_root
,
525 svn_fs_root_fs(root
),
526 info
->copyfrom_rev
, pool
));
527 source_path
= svn_path_join(info
->copyfrom_path
,
528 svn_path_is_child(info
->path
, path
,
533 /* This is an add without history, nested inside an
534 add with history. We have no delta source in this case. */
541 /* Handle property modifications. */
542 if (! do_delete
|| do_add
)
544 if (change
->prop_mod
)
546 if (cb
->compare_root
)
548 apr_array_header_t
*prop_diffs
;
549 apr_hash_t
*old_props
;
550 apr_hash_t
*new_props
;
554 SVN_ERR(svn_fs_node_proplist
555 (&old_props
, source_root
, source_path
, pool
));
557 old_props
= apr_hash_make(pool
);
559 SVN_ERR(svn_fs_node_proplist(&new_props
, root
, path
, pool
));
561 SVN_ERR(svn_prop_diffs(&prop_diffs
, new_props
, old_props
,
564 for (i
= 0; i
< prop_diffs
->nelts
; ++i
)
566 svn_prop_t
*pc
= &APR_ARRAY_IDX(prop_diffs
, i
, svn_prop_t
);
567 if (kind
== svn_node_dir
)
568 SVN_ERR(editor
->change_dir_prop(*dir_baton
, pc
->name
,
570 else if (kind
== svn_node_file
)
571 SVN_ERR(editor
->change_file_prop(file_baton
, pc
->name
,
577 /* Just do a dummy prop change to signal that there are *any*
579 if (kind
== svn_node_dir
)
580 SVN_ERR(editor
->change_dir_prop(*dir_baton
, "", NULL
,
582 else if (kind
== svn_node_file
)
583 SVN_ERR(editor
->change_file_prop(file_baton
, "", NULL
,
588 /* Handle textual modifications.
590 Note that this needs to happen in the "copy from a file we
591 aren't allowed to see" case since otherwise the caller will
592 have no way to actually get the new file's contents, which
593 they are apparently allowed to see. */
594 if (kind
== svn_node_file
595 && (change
->text_mod
|| (real_copyfrom_path
&& ! copyfrom_path
)))
597 svn_txdelta_window_handler_t delta_handler
;
598 void *delta_handler_baton
;
599 const char *checksum
= NULL
;
601 if (cb
->compare_root
&& source_root
&& source_path
)
603 unsigned char digest
[APR_MD5_DIGESTSIZE
];
604 SVN_ERR(svn_fs_file_md5_checksum(digest
,
608 checksum
= svn_md5_digest_to_cstring(digest
, pool
);
611 SVN_ERR(editor
->apply_textdelta(file_baton
, checksum
, pool
,
613 &delta_handler_baton
));
614 if (cb
->compare_root
)
616 svn_txdelta_stream_t
*delta_stream
;
618 SVN_ERR(svn_fs_get_file_delta_stream
619 (&delta_stream
, source_root
, source_path
,
622 SVN_ERR(svn_txdelta_send_txstream(delta_stream
,
628 SVN_ERR(delta_handler(NULL
, delta_handler_baton
));
632 /* Close the file baton if we opened it. */
635 unsigned char digest
[APR_MD5_DIGESTSIZE
];
636 SVN_ERR(svn_fs_file_md5_checksum(digest
, root
, path
, pool
));
637 SVN_ERR(editor
->close_file(file_baton
,
638 svn_md5_digest_to_cstring(digest
, pool
),
649 svn_repos_replay2(svn_fs_root_t
*root
,
650 const char *base_path
,
651 svn_revnum_t low_water_mark
,
652 svn_boolean_t send_deltas
,
653 const svn_delta_editor_t
*editor
,
655 svn_repos_authz_func_t authz_read_func
,
656 void *authz_read_baton
,
659 apr_hash_t
*fs_changes
;
660 apr_hash_t
*changed_paths
;
661 apr_hash_index_t
*hi
;
662 apr_array_header_t
*paths
;
663 struct path_driver_cb_baton cb_baton
;
666 /* Fetch the paths changed under ROOT. */
667 SVN_ERR(svn_fs_paths_changed(&fs_changes
, root
, pool
));
671 else if (base_path
[0] == '/')
674 base_path_len
= strlen(base_path
);
676 /* Make an array from the keys of our CHANGED_PATHS hash, and copy
677 the values into a new hash whose keys have no leading slashes. */
678 paths
= apr_array_make(pool
, apr_hash_count(fs_changes
),
679 sizeof(const char *));
680 changed_paths
= apr_hash_make(pool
);
681 for (hi
= apr_hash_first(pool
, fs_changes
); hi
; hi
= apr_hash_next(hi
))
687 svn_fs_path_change_t
*change
;
688 svn_boolean_t allowed
= TRUE
;
690 apr_hash_this(hi
, &key
, &keylen
, &val
);
695 SVN_ERR(authz_read_func(&allowed
, root
, path
, authz_read_baton
,
706 /* If the base_path doesn't match the top directory of this path
707 we don't want anything to do with it... */
708 if (is_within_base_path(path
, base_path
, base_path_len
))
710 APR_ARRAY_PUSH(paths
, const char *) = path
;
711 apr_hash_set(changed_paths
, path
, keylen
, change
);
713 /* ...unless this was a change to one of the parent directories of
715 else if (is_within_base_path(base_path
, path
, keylen
))
717 APR_ARRAY_PUSH(paths
, const char *) = path
;
718 apr_hash_set(changed_paths
, path
, keylen
, change
);
723 /* If we were not given a low water mark, assume that everything is there,
724 all the way back to revision 0. */
725 if (! SVN_IS_VALID_REVNUM(low_water_mark
))
728 /* Initialize our callback baton. */
729 cb_baton
.editor
= editor
;
730 cb_baton
.edit_baton
= edit_baton
;
731 cb_baton
.root
= root
;
732 cb_baton
.changed_paths
= changed_paths
;
733 cb_baton
.authz_read_func
= authz_read_func
;
734 cb_baton
.authz_read_baton
= authz_read_baton
;
735 cb_baton
.base_path
= base_path
;
736 cb_baton
.base_path_len
= base_path_len
;
737 cb_baton
.low_water_mark
= low_water_mark
;
738 cb_baton
.compare_root
= NULL
;
742 SVN_ERR(svn_fs_revision_root(&cb_baton
.compare_root
,
743 svn_fs_root_fs(root
),
744 svn_fs_is_revision_root(root
)
745 ? svn_fs_revision_root_revision(root
) - 1
746 : svn_fs_txn_root_base_revision(root
),
750 cb_baton
.copies
= apr_array_make(pool
, 4, sizeof(struct copy_info
));
751 cb_baton
.pool
= pool
;
753 /* Determine the revision to use throughout the edit, and call
754 EDITOR's set_target_revision() function. */
755 if (svn_fs_is_revision_root(root
))
757 svn_revnum_t revision
= svn_fs_revision_root_revision(root
);
758 SVN_ERR(editor
->set_target_revision(edit_baton
, revision
, pool
));
761 /* Call the path-based editor driver. */
762 SVN_ERR(svn_delta_path_driver(editor
, edit_baton
,
763 SVN_INVALID_REVNUM
, paths
,
764 path_driver_cb_func
, &cb_baton
, pool
));
770 svn_repos_replay(svn_fs_root_t
*root
,
771 const svn_delta_editor_t
*editor
,
775 return svn_repos_replay2(root
,
776 "" /* the whole tree */,
777 SVN_INVALID_REVNUM
, /* no low water mark */
778 FALSE
/* no text deltas */,
780 NULL
/* no authz func */,
781 NULL
/* no authz baton */,