* Path for renames during restore and renames during share (thanks to Bryan Aldrich...
[vss2svn.git] / script / vss2svn.pl
blob2b758ebf7b12d6bbf8a7ed34e6584fb895eb9d80
1 #!/usr/bin/perl
3 use warnings;
4 use strict;
6 use Getopt::Long;
7 use DBI;
8 use DBD::SQLite2;
9 use XML::Simple;
10 use File::Find;
11 use File::Path;
12 use Time::CTime;
13 use Data::Dumper;
14 use Benchmark ':hireswallclock';
16 use lib '.';
17 use Vss2Svn::ActionHandler;
18 use Vss2Svn::DataCache;
19 use Vss2Svn::SvnRevHandler;
20 use Vss2Svn::Dumpfile;
22 require Encode;
24 our(%gCfg, %gSth, %gErr, %gFh, $gSysOut, %gActionType, %gNameLookup, %gId);
26 our $VERSION = '0.11.0-nightly.$LastChangedRevision$';
27 $VERSION =~ s/\$.*?(\d+).*\$/$1/; # get only the number out of the svn revision
29 &Initialize;
30 &ConnectDatabase;
32 &SetupGlobals;
33 &ShowHeader;
35 &RunConversion;
37 &ShowSummary;
38 &DisconnectDatabase;
40 ###############################################################################
41 # RunConversion
42 ###############################################################################
43 sub RunConversion {
45 # store a hash of actions to take; allows restarting in case of failed
46 # migration
47 my %joblist =
49 INIT => {handler => sub{ 1; },
50 next => 'LOADVSSNAMES'},
52 # Load the "real" names associated with the stored "short" names
53 LOADVSSNAMES => {handler => \&LoadVssNames,
54 next => 'FINDDBFILES'},
56 # Add a stub entry into the Physical table for each physical
57 # file in the VSS DB
58 FINDDBFILES => {handler => \&FindPhysDbFiles,
59 next => 'GETPHYSHIST'},
61 # Load the history of what happened to the physical files. This
62 # only gets us halfway there because we don't know what the real
63 # filenames are yet
64 GETPHYSHIST => {handler => \&GetPhysVssHistory,
65 next => 'MERGEPARENTDATA'},
67 # Merge data from parent records into child records where possible
68 MERGEPARENTDATA => {handler => \&MergeParentData,
69 next => 'MERGEMOVEDATA'},
71 # Merge data from move actions
72 MERGEMOVEDATA => {handler => \&MergeMoveData,
73 next => 'REMOVETMPCHECKIN'},
75 # Remove temporary check ins
76 REMOVETMPCHECKIN => {handler => \&RemoveTemporaryCheckIns,
77 next => 'MERGEUNPINPIN'},
79 # Remove unnecessary Unpin/pin activities
80 MERGEUNPINPIN => {handler => \&MergeUnpinPinData,
81 next => 'BUILDCOMMENTS'},
83 # Rebuild possible missing comments
84 BUILDCOMMENTS => {handler => \&BuildComments,
85 next => 'BUILDACTIONHIST'},
87 # Take the history of physical actions and convert them to VSS
88 # file actions
89 BUILDACTIONHIST => {handler => \&BuildVssActionHistory,
90 next => 'IMPORTSVN'},
92 # Create a dumpfile or import to repository
93 IMPORTSVN => {handler => \&ImportToSvn,
94 next => 'DONE'},
97 my $info;
99 while ($gCfg{task} ne 'DONE') {
100 $info = $joblist{ $gCfg{task} }
101 or die "FATAL ERROR: Unknown task '$gCfg{task}'\n";
103 print "TASK: $gCfg{task}\n";
104 push @{ $gCfg{tasks} }, $gCfg{task};
106 if ($gCfg{prompt}) {
107 print "Press ENTER to continue...\n";
108 my $temp = <STDIN>;
109 die if $temp =~ m/^quit/i;
112 &{ $info->{handler} };
113 &SetSystemTask( $info->{next} );
116 } # End RunConversion
118 ###############################################################################
119 # LoadVssNames
120 ###############################################################################
121 sub LoadVssNames {
122 &DoSsCmd("info -e$gCfg{encoding} \"$gCfg{vssdatadir}/names.dat\"");
124 my $xs = XML::Simple->new(KeyAttr => [],
125 ForceArray => [qw(NameCacheEntry Entry)],);
127 my $xml = $xs->XMLin($gSysOut);
129 my $namesref = $xml->{NameCacheEntry} || return 1;
131 my($entry, $count, $offset, $name);
133 my $cache = Vss2Svn::DataCache->new('NameLookup')
134 || &ThrowError("Could not create cache 'NameLookup'");
136 ENTRY:
137 foreach $entry (@$namesref) {
138 $count = $entry->{NrOfEntries};
139 $offset = $entry->{offset};
141 # The cache can contain 4 different entries:
142 # id=1: abbreviated DOS 8.3 name for file items
143 # id=2: full name for file items
144 # id=3: abbreviated 27.3 name for file items
145 # id=10: full name for project items
146 # Both ids 1 and 3 are not of any interest for us, since they only
147 # provide abbreviated names for different szenarios. We are only
148 # interested if we have id=2 for file items, or id=10 for project
149 # items.
150 foreach $name (@{$entry->{Entry}}) {
151 if ($name->{id} == 10 || $name->{id} == 2) {
152 $cache->add($offset, $name->{content});
157 $cache->commit();
158 } # End LoadVssNames
160 ###############################################################################
161 # FindPhysDbFiles
162 ###############################################################################
163 sub FindPhysDbFiles {
165 my $cache = Vss2Svn::DataCache->new('Physical')
166 || &ThrowError("Could not create cache 'Physical'");
168 find(sub{ &FoundSsFile($cache) }, $gCfg{vssdatadir});
170 $cache->commit();
172 } # End FindPhysDbFiles
174 ###############################################################################
175 # FoundSsFile
176 ###############################################################################
177 sub FoundSsFile {
178 my($cache) = @_;
180 my $path = $File::Find::name;
181 return if (-d $path);
183 my $vssdatadir = quotemeta($gCfg{vssdatadir});
185 if ($path =~ m:^$vssdatadir/./([a-z]{8})$:i) {
186 $cache->add(uc($1));
189 } # End FoundSsFile
191 ###############################################################################
192 # GetPhysVssHistory
193 ###############################################################################
194 sub GetPhysVssHistory {
195 my($sql, $sth, $row, $physname, $physdir);
197 &LoadNameLookup;
198 my $cache = Vss2Svn::DataCache->new('PhysicalAction', 1)
199 || &ThrowError("Could not create cache 'PhysicalAction'");
201 $sql = "SELECT * FROM Physical";
202 $sth = $gCfg{dbh}->prepare($sql);
203 $sth->execute();
205 my $xs = XML::Simple->new(ForceArray => [qw(Version)]);
207 while (defined($row = $sth->fetchrow_hashref() )) {
208 $physname = $row->{physname};
210 $physdir = "$gCfg{vssdir}/data";
211 my $physfolder = substr($physname, 0, 1);
213 &GetVssPhysInfo($cache, $physdir, $physfolder, $physname, $xs);
216 $cache->commit();
218 } # End GetPhysVssHistory
220 ###############################################################################
221 # FindPhysnameFile
222 ###############################################################################
223 sub FindPhysnameFile {
224 my($physdir, $physfolder, $physname) = @_;
226 # return it if we can find it without any alteration
227 return ($physdir, $physfolder, $physname) if -f "$physdir/$physfolder/$physname";
228 my $lcphysname = lc($physname);
229 my $lcphysfolder = lc($physfolder);
231 # try finding lowercase folder/filename
232 return ($physdir, $lcphysfolder, $lcphysname) if -f "$physdir/$lcphysfolder/$lcphysname";
234 # try finding lowercase folder/uppercase filename
235 return ($physdir, $lcphysfolder, $physname) if -f "$physdir/$lcphysfolder/$physname";
237 # haven't seen this one, but try it...
238 return ($physdir, $physfolder, $lcphysname) if -f "$physdir/$physfolder/$lcphysname";
240 # no idea what to return...
241 return (undef, undef, undef);
244 ###############################################################################
245 # GetVssPhysInfo
246 ###############################################################################
247 sub GetVssPhysInfo {
248 my($cache, $physdir, $physfolder, $physname, $xs) = @_;
250 my @filesegment = &FindPhysnameFile($physdir, $physfolder, $physname);
252 print "physdir: \"$filesegment[0]\", physfolder: \"$filesegment[1]\" physname: \"$filesegment[2]\"\n" if $gCfg{debug};
254 if (!defined $filesegment[0] || !defined $filesegment[1]
255 || !defined $filesegment[2]) {
256 # physical file doesn't exist; it must have been destroyed later
257 &ThrowWarning("Can't retrieve info from physical file "
258 . "'$physname'; it was either destroyed or corrupted");
259 return;
262 &DoSsCmd("info -e$gCfg{encoding} \"$filesegment[0]/$filesegment[1]/$filesegment[2]\"");
264 my $xml = $xs->XMLin($gSysOut);
265 my $parentphys;
267 my $iteminfo = $xml->{ItemInfo};
269 if (!defined($iteminfo) || !defined($iteminfo->{Type}) ||
270 ref($iteminfo->{Type})) {
272 &ThrowWarning("Can't handle file '$physname'; not a project or file\n");
273 return;
276 if ($iteminfo->{Type} == 1) {
277 $parentphys = (uc($physname) eq 'AAAAAAAA')?
278 '' : &GetProjectParent($xml);
279 } elsif ($iteminfo->{Type} == 2) {
280 $parentphys = undef;
281 } else {
282 &ThrowWarning("Can't handle file '$physname'; not a project or file\n");
283 return;
286 &GetVssItemVersions($cache, $physname, $parentphys, $xml);
288 } # End GetVssPhysInfo
290 ###############################################################################
291 # GetProjectParent
292 ###############################################################################
293 sub GetProjectParent {
294 my($xml) = @_;
296 no warnings 'uninitialized';
297 return $xml->{ItemInfo}->{ParentPhys} || undef;
299 } # End GetProjectParent
301 ###############################################################################
302 # GetVssItemVersions
303 ###############################################################################
304 sub GetVssItemVersions {
305 my($cache, $physname, $parentphys, $xml) = @_;
307 return 0 unless defined $xml->{Version};
309 my($parentdata, $version, $vernum, $action, $name, $actionid, $actiontype,
310 $tphysname, $itemname, $itemtype, $parent, $user, $timestamp, $comment,
311 $is_binary, $info, $priority, $sortkey, $label, $cachename);
313 my $last_timestamp = 0;
315 VERSION:
316 foreach $version (@{ $xml->{Version} }) {
317 $action = $version->{Action};
318 $name = $action->{SSName};
319 $tphysname = $action->{Physical} || $physname;
320 $user = $version->{UserName};
322 $itemname = &GetItemName($name);
324 $actionid = $action->{ActionId};
325 $info = $gActionType{$actionid};
327 if (!$info) {
328 &ThrowWarning ("'$physname': Unknown action '$actionid'\n");
329 next VERSION;
332 # check the linear order of timestamps. It could be done better, for
333 # example checking the next version and calculate the middle time stamp
334 # but regardless of what we do here, the result is erroneous, since it
335 # will mess up the labeling.
336 $timestamp = $version->{Date};
337 if ($timestamp < $last_timestamp) {
338 $timestamp = $last_timestamp + 1;
339 &ThrowWarning ("'$physname': wrong timestamp at version "
340 . "'$version->{VersionNumber}'; setting timestamp to "
341 . "'$timestamp'");
343 $last_timestamp = $timestamp;
345 $itemtype = $info->{type};
346 $actiontype = $info->{action};
348 if ($actiontype eq 'IGNORE') {
349 next VERSION;
352 $comment = undef;
353 $is_binary = 0;
354 $info = undef;
355 $parentdata = 0;
356 $priority = 5;
357 $label = undef;
359 if ($version->{Comment} && !ref($version->{Comment})) {
360 $comment = $version->{Comment} || undef;
363 # In case of Label the itemtype is the type of the item currently
364 # under investigation
365 if ($actiontype eq 'LABEL') {
366 my $iteminfo = $xml->{ItemInfo};
367 $itemtype = $iteminfo->{Type};
371 # we can have label actions and labes attached to versions
372 if (defined $action->{Label} && !ref($action->{Label})) {
373 $label = $action->{Label};
375 # append the label comment to a possible version comment
376 if ($action->{LabelComment} && !ref($action->{LabelComment})) {
377 if (defined $comment) {
378 print "Merging LabelComment and Comment for "
379 . "'$tphysname;$version->{VersionNumber}'\n"; # if $gCfg{verbose};
380 $comment .= "\n";
383 $comment .= $action->{LabelComment} || undef;
387 if (defined($comment)) {
388 $comment =~ s/^\s+//s;
389 $comment =~ s/\s+$//s;
392 if ($itemtype == 1 && uc($physname) eq 'AAAAAAAA'
393 && ref($tphysname)) {
395 $tphysname = $physname;
396 $itemname = '';
397 } elsif ($physname ne $tphysname) {
398 # If version's physical name and file's physical name are different,
399 # this is a project describing an action on a child item. Most of
400 # the time, this very same data will be in the child's physical
401 # file and with more detail (such as check-in comment).
403 # However, in some cases (such as renames, or when the child's
404 # physical file was later purged), this is the only place we'll
405 # have the data; also, sometimes the child record doesn't even
406 # have enough information about itself (such as which project it
407 # was created in and which project(s) it's shared in).
409 # So, for a parent record describing a child action, we'll set a
410 # flag, then combine them in the next phase.
412 $parentdata = 1;
414 # OK, since we're describing an action in the child, the parent is
415 # actually this (project) item
417 $parentphys = $physname;
418 } else {
419 $parentphys = undef;
422 if ($itemtype == 1) {
423 $itemname .= '/';
424 } elsif (defined($xml->{ItemInfo}) &&
425 defined($xml->{ItemInfo}->{Binary}) &&
426 $xml->{ItemInfo}->{Binary}) {
428 $is_binary = 1;
431 if ($actiontype eq 'RENAME') {
432 # if a rename, we store the new name in the action's 'info' field
434 $info = &GetItemName($action->{NewSSName});
436 if ($itemtype == 1) {
437 $info .= '/';
439 } elsif ($actiontype eq 'BRANCH') {
440 $info = $action->{Parent};
443 $vernum = ($parentdata)? undef : $version->{VersionNumber};
445 # since there is no corresponding client action for PIN, we need to
446 # enter the concrete version number here manually
447 # In a share action the pinnedToVersion attribute can also be set
448 # if ($actiontype eq 'PIN') {
449 $vernum = $action->{PinnedToVersion} if (defined $action->{PinnedToVersion});
452 # for unpin actions also remeber the unpinned version
453 $info = $action->{UnpinnedFromVersion} if (defined $action->{UnpinnedFromVersion});
455 $priority -= 4 if $actiontype eq 'ADD'; # Adds are always first
456 $priority -= 3 if $actiontype eq 'SHARE';
457 $priority -= 3 if $actiontype eq 'PIN';
458 $priority -= 2 if $actiontype eq 'BRANCH';
460 # store the reversed physname as a sortkey; a bit wasteful but makes
461 # debugging easier for the time being...
462 $sortkey = reverse($tphysname);
464 $cache->add($tphysname, $vernum, $parentphys, $actiontype, $itemname,
465 $itemtype, $timestamp, $user, $is_binary, $info, $priority,
466 $sortkey, $parentdata, $label, $comment);
468 # Handle version labels as a secondary action for the same version
469 # version labels and label action use the same location to store the
470 # label. Therefore it is not possible to assign a version label to
471 # version where the actiontype was LABEL. But ssphys will report the
472 # same label twice. Therefore filter the Labeling versions here.
473 if (defined $version->{Label} && !ref($version->{Label})
474 && $actiontype ne 'LABEL') {
475 my ($labelComment);
477 if ($version->{LabelComment} && !ref($version->{LabelComment})) {
478 $labelComment = $version->{LabelComment};
480 else {
481 $labelComment = "assigned label '$version->{Label}' to version $vernum of physical file '$tphysname'";
483 $cache->add($tphysname, $vernum, $parentphys, 'LABEL', $itemname,
484 $itemtype, $timestamp, $user, $is_binary, $info, 5,
485 $sortkey, $parentdata, $version->{Label}, $labelComment);
489 } # End GetVssItemVersions
491 ###############################################################################
492 # GetItemName
493 ###############################################################################
494 sub GetItemName {
495 my($nameelem) = @_;
497 my $itemname = $nameelem->{content};
499 if (defined($nameelem->{offset})) {
500 # see if we have a better name in the cache
501 my $cachename = $gNameLookup{ $nameelem->{offset} };
503 if (defined($cachename)) {
504 print "Changing name of '$itemname' to '$cachename' from "
505 . "name cache\n" if $gCfg{debug};
506 $itemname = $cachename;
510 return $itemname;
512 } # End GetItemName
514 ###############################################################################
515 # LoadNameLookup
516 ###############################################################################
517 sub LoadNameLookup {
518 my($sth, $row);
520 $sth = $gCfg{dbh}->prepare('SELECT offset, name FROM NameLookup');
521 $sth->execute();
523 while(defined($row = $sth->fetchrow_hashref() )) {
524 $gNameLookup{ $row->{offset} } = Encode::decode_utf8( $row->{name} );
526 } # End LoadNameLookup
528 ###############################################################################
529 # MergeParentData
530 ###############################################################################
531 sub MergeParentData {
532 # VSS has a funny way of not placing enough information to rebuild history
533 # in one data file; for example, renames are stored in the parent project
534 # rather than in that item's data file. Also, it's sometimes impossible to
535 # tell from a child record which was eventually shared to multiple folders,
536 # which folder it was originally created in.
538 # So, at this stage we look for any parent records which described child
539 # actions, then update those records with data from the child objects. We
540 # then delete the separate child objects to avoid duplication.
542 my($sth, $rows, $row);
543 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
544 . 'WHERE parentdata > 0');
545 $sth->execute();
547 # need to pull in all recs at once, since we'll be updating/deleting data
548 $rows = $sth->fetchall_arrayref( {} );
550 my($childrecs, $child, $id, $depth);
551 my @delchild = ();
553 foreach $row (@$rows) {
554 $childrecs = &GetChildRecs($row);
556 if (scalar @$childrecs > 1) {
557 &ThrowWarning("Multiple child recs for parent rec "
558 . "'$row->{action_id}'");
561 $depth = &GetPathDepth($row);
563 foreach $child (@$childrecs) {
564 &UpdateParentRec($row, $child);
565 push(@delchild, $child->{action_id});
569 foreach $id (@delchild) {
570 &DeleteChildRec($id);
575 } # End MergeParentData
577 ###############################################################################
578 # GetPathDepth
579 ###############################################################################
580 sub GetPathDepth {
581 my($row) = @_;
583 # If we've already worked out the depth of this row, return it immediately
584 if ($row->{parentdata} > 1) {
585 return $row->{parentdata};
588 my($maxParentDepth, $depth, $parents, $parent);
590 # Get the row(s) corresponding to the parent(s) of this row, and work out
591 # the maximum depth
593 my $sql = <<"EOSQL";
594 SELECT
596 FROM
597 PhysicalAction
598 WHERE
599 parentdata > 0
600 AND physname = ?
601 AND actiontype = ?
602 AND timestamp <= ?
603 EOSQL
605 my $sth = $gCfg{dbh}->prepare($sql);
606 $sth->execute( @{ $row }{qw(parentphys actiontype timestamp)} );
608 $parents = $sth->fetchall_arrayref( {} );
609 $maxParentDepth = 0;
610 foreach $parent (@$parents) {
611 $depth = &GetPathDepth($parent);
612 $maxParentDepth = ($depth > $maxParentDepth) ? $depth : $maxParentDepth;
615 # Depth of this path becomes one more than the maximum parent depth
616 $depth = $maxParentDepth + 1;
618 # Update the row for this record
619 &UpdateDepth($row, $depth);
621 return $depth;
622 } # End GetPathDepth
624 ###############################################################################
625 # UpdateDepth
626 ###############################################################################
627 sub UpdateDepth {
628 my($row, $depth) = @_;
630 my $sql = <<"EOSQL";
631 UPDATE
632 PhysicalAction
634 parentdata = ?
635 WHERE
636 action_id = ?
637 EOSQL
639 my $sth = $gCfg{dbh}->prepare($sql);
640 $sth->execute( $depth, $row->{action_id} );
642 } # End UpdateDepth
644 ###############################################################################
645 # GetChildRecs
646 ###############################################################################
647 sub GetChildRecs {
648 my($parentrec, $parentdata) = @_;
650 # Here we need to find any child rows which give us additional info on the
651 # parent rows. There's no definitive way to find matching rows, but joining
652 # on physname, actiontype, timestamp, and author gets us close. The problem
653 # is that the "two" actions may not have happened in the exact same second,
654 # so we need to also look for any that are some time apart and hope
655 # we don't get the wrong row.
657 $parentdata = 0 unless defined $parentdata;
658 $parentdata = 1 if $parentdata != 0;
660 my $sql = <<"EOSQL";
661 SELECT
663 FROM
664 PhysicalAction
665 WHERE
666 MIN(parentdata, 1) = ?
667 AND physname = ?
668 AND actiontype = ?
669 AND author = ?
670 ORDER BY
671 ABS(? - timestamp)
672 EOSQL
674 my $sth = $gCfg{dbh}->prepare($sql);
675 $sth->execute( $parentdata, @{ $parentrec }{qw(physname actiontype author timestamp)} );
677 return $sth->fetchall_arrayref( {} );
678 } # End GetChildRecs
680 ###############################################################################
681 # UpdateParentRec
682 ###############################################################################
683 sub UpdateParentRec {
684 my($row, $child) = @_;
686 # The child record has the "correct" version number (relative to the child
687 # and not the parent), as well as the comment info and whether the file is
688 # binary
690 my $comment;
693 no warnings 'uninitialized';
694 $comment = "$row->{comment}\n$child->{comment}";
695 $comment =~ s/^\n+//;
696 $comment =~ s/\n+$//;
699 my $sql = <<"EOSQL";
700 UPDATE
701 PhysicalAction
703 version = ?,
704 is_binary = ?,
705 comment = ?
706 WHERE
707 action_id = ?
708 EOSQL
710 my $sth = $gCfg{dbh}->prepare($sql);
711 $sth->execute( $child->{version}, $child->{is_binary}, $comment,
712 $row->{action_id} );
714 } # End UpdateParentRec
716 ###############################################################################
717 # MergeMoveData
718 ###############################################################################
719 sub MergeMoveData {
720 # Similar to the MergeParentData, the MergeMove Data combines two the src
721 # and target move actions into one move action. Since both items are parents
722 # the MergeParentData function can not deal with this specific problem
724 my($sth, $rows, $row);
726 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
727 . 'WHERE actiontype = "MOVE_FROM"');
728 $sth->execute();
730 # need to pull in all recs at once, since we'll be updating/deleting data
731 $rows = $sth->fetchall_arrayref( {} );
733 my($childrecs, $child, $id);
735 foreach $row (@$rows) {
736 $row->{actiontype} = 'MOVE_TO';
737 $childrecs = &GetChildRecs($row, 1);
739 my $source = undef;
740 my $target = $row->{parentphys};
742 my $chosenChildRecord;
743 my $childRecord;
745 foreach $childRecord (@$childrecs) {
746 if (!(defined $chosenChildRecord)
747 && $childRecord->{timestamp} == $row->{timestamp}
748 && !($childRecord->{parentphys} eq $row->{parentphys})) {
750 $chosenChildRecord = $childRecord;
754 if (defined $chosenChildRecord) {
755 $source = $chosenChildRecord->{parentphys};
756 &DeleteChildRec($chosenChildRecord->{action_id});
758 my $sql = <<"EOSQL";
759 UPDATE
760 PhysicalAction
762 actiontype = 'MOVE',
763 parentphys = ?,
764 info = ?
765 WHERE
766 action_id = ?
767 EOSQL
768 my $update;
769 $update = $gCfg{dbh}->prepare($sql);
771 $update->execute( $target, $source, $row->{action_id});
772 } else {
773 #the record did not have a matching MOVE_TO. call it a RESTORE
774 print "Changing $row->{action_id} to a RESTORE\n";
776 my $sql = <<"EOSQL";
777 UPDATE
778 PhysicalAction
780 actiontype = 'RESTORE'
781 WHERE
782 action_id = ?
783 EOSQL
784 my $update;
785 $update = $gCfg{dbh}->prepare($sql);
787 $update->execute( $row->{action_id});
792 # change all remaining MOVE_TO records into MOVE records and swap the src and target
793 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
794 . 'WHERE actiontype = "MOVE_TO"');
795 $sth->execute();
796 $rows = $sth->fetchall_arrayref( {} );
798 foreach $row (@$rows) {
799 my $update;
800 $update = $gCfg{dbh}->prepare('UPDATE PhysicalAction SET '
801 . 'actiontype = "MOVE", '
802 . 'parentphys = ?, '
803 . 'info = ? '
804 . 'WHERE action_id = ?');
805 $update->execute($row->{info}, $row->{parentphys}, $row->{action_id});
808 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction WHERE actiontype = "RESTORE"');
809 $sth->execute();
810 $rows = $sth->fetchall_arrayref( {} );
812 foreach $row (@$rows) {
813 #calculate last name of this file. Store it in $info
815 my $sql = "SELECT * FROM PhysicalAction WHERE physname = ? AND timestamp < ? ORDER BY timestamp DESC";
817 $sth = $gCfg{dbh}->prepare($sql);
818 $sth->execute( $row->{physname}, $row->{timestamp} );
820 my $myOlderRecords = $sth->fetchall_arrayref( {} );
822 if (scalar @$myOlderRecords > 0) {
823 my $update = $gCfg{dbh}->prepare('UPDATE PhysicalAction SET info = ? WHERE action_id = ?');
824 $update->execute(@$myOlderRecords[0]->{itemname}, $row->{action_id});
830 } # End MergeMoveData
832 ###############################################################################
833 # RemoveTemporaryCheckIns
834 # remove temporary checkins that where create to detect MS VSS capabilities
835 ###############################################################################
836 sub RemoveTemporaryCheckIns {
837 my($sth, $rows, $row);
838 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
839 . 'WHERE comment = "Temporary file created by Visual Studio .NET to detect Microsoft Visual SourceSafe capabilities."'
840 . ' AND actiontype = "ADD"'
841 . ' AND itemtype = 2'); # only delete files, not projects
842 $sth->execute();
844 # need to pull in all recs at once, since we'll be updating/deleting data
845 $rows = $sth->fetchall_arrayref( {} );
847 foreach $row (@$rows) {
848 my $physname = $row->{physname};
850 my $sql = 'SELECT * FROM PhysicalAction WHERE physname = ?';
851 my $update = $gCfg{dbh}->prepare($sql);
853 $update->execute( $physname );
855 # need to pull in all recs at once, since we'll be updating/deleting data
856 my $recs = $update->fetchall_arrayref( {} );
858 foreach my $rec (@$recs) {
859 print "Remove action_id $rec->{action_id}, $rec->{physname}, $rec->{actiontype}, $rec->{itemname}\n";
860 print " $rec->{comment}\n" if defined ($rec->{comment});
861 &DeleteChildRec($rec->{action_id});
868 ###############################################################################
869 # MergeUnpinPinData
870 ###############################################################################
871 sub MergeUnpinPinData {
872 my($sth, $rows, $row, $r, $next_row);
873 my $sql = 'SELECT * FROM PhysicalAction ORDER BY timestamp ASC, '
874 . 'itemtype ASC, priority ASC, parentdata ASC, sortkey ASC, action_id ASC';
875 $sth = $gCfg{dbh}->prepare($sql);
876 $sth->execute();
878 # need to pull in all recs at once, since we'll be updating/deleting data
879 $rows = $sth->fetchall_arrayref( {} );
881 return if ($rows == -1);
882 return if (@$rows < 2);
884 my @delchild = ();
886 for $r (0 .. @$rows-2) {
887 $row = $rows->[$r];
889 if ($row->{actiontype} eq 'PIN' && !defined $row->{version}) # UNPIN
891 # Search for a matching pin action
892 my $u;
893 for ($u = $r+1; $u <= @$rows-2; $u++) {
894 $next_row = $rows->[$u];
896 if ( $next_row->{actiontype} eq 'PIN'
897 && defined $next_row->{version} # PIN
898 && $row->{physname} eq $next_row->{physname}
899 && $row->{parentphys} eq $next_row->{parentphys}
900 # && $next_row->{timestamp} - $row->{timestamp} < 60
901 # && $next_row->{action_id} - $row->{action_id} == 1
903 print "found UNPIN/PIN combination for $row->{parentphys}/$row->{physname}"
904 . "($row->{itemname}) @ ID $row->{action_id}\n" if $gCfg{verbose};
906 # if we have a unpinFromVersion number copy this one to the PIN handler
907 if (defined $row->{info})
909 my $sql2 = "UPDATE PhysicalAction SET info = ? WHERE action_id = ?";
910 my $sth2 = $gCfg{dbh}->prepare($sql2);
911 $sth2->execute($row->{info}, $next_row->{action_id});
914 push (@delchild, $row->{action_id});
917 # if the next action is anything else than a pin stop the search
918 $u = @$rows if ($next_row->{actiontype} ne 'PIN' );
923 my $id;
924 foreach $id (@delchild) {
925 &DeleteChildRec($id);
930 } # End MergeUnpinPinData
932 ###############################################################################
933 # BuildComments
934 ###############################################################################
935 sub BuildComments {
936 my($sth, $rows, $row, $r, $next_row);
937 my $sql = 'SELECT * FROM PhysicalAction WHERE actiontype="PIN" AND itemtype=2 ORDER BY physname ASC';
938 $sth = $gCfg{dbh}->prepare($sql);
939 $sth->execute();
941 # need to pull in all recs at once, since we'll be updating/deleting data
942 $rows = $sth->fetchall_arrayref( {} );
944 foreach $row (@$rows) {
946 # technically we have the following situations:
947 # PIN only: we come from the younger version and PIN to a older one: the
948 # younger version is the currenty version of the timestamp of the PIN action
949 # UNPIN only: we unpin from a older version to the current version, the
950 # timestamp of the action will again define the younger version
951 # UNPIN/PIN with known UNPIN version: we merge from UNPIN version to PIN version
952 # UNPIN/PIN with unknown UNPIN version: we are lost in this case and we
953 # can not distinguish this case from the PIN only case.
955 my $sql2;
956 my $prefix;
958 # PIN only
959 if ( defined $row->{version} # PIN version number
960 && !defined $row->{info}) { # no UNPIN version number
961 $sql2 = 'SELECT * FROM PhysicalAction'
962 . ' WHERE physname="' . $row->{physname} . '"'
963 . ' AND parentphys ISNULL'
964 . ' AND itemtype=2'
965 . ' AND version>=' . $row->{version}
966 . ' AND timestamp<=' . $row->{timestamp}
967 . ' ORDER BY version DESC';
968 $prefix = "reverted changes for: \n";
971 # UNPIN only
972 if ( !defined $row->{version} # no PIN version number
973 && defined $row->{info}) { # UNPIN version number
974 $sql2 = 'SELECT * FROM PhysicalAction'
975 . ' WHERE physname="' . $row->{physname} . '"'
976 . ' AND parentphys ISNULL'
977 . ' AND itemtype=2'
978 . ' AND timestamp<=' . $row->{timestamp}
979 . ' AND version>' . $row->{info}
980 . ' ORDER BY version ASC';
983 # UNPIN/PIN
984 if ( defined $row->{version} # PIN version number
985 && defined $row->{info}) { # UNPIN version number
986 $sql2 = 'SELECT * FROM PhysicalAction'
987 . ' WHERE physname="' . $row->{physname} . '"'
988 . ' AND parentphys ISNULL'
989 . ' AND itemtype=2'
990 . ' AND version>' . $row->{info}
991 . ' AND version<=' . $row->{version}
992 . ' ORDER BY version ';
994 if ($row->{info} > $row->{version}) {
995 $sql2 .= "DESC";
996 $prefix = "reverted changes for: \n";
998 else {
999 $sql2 .= "ASC";
1004 next if !defined $sql2;
1006 my $sth2 = $gCfg{dbh}->prepare($sql2);
1007 $sth2->execute();
1009 my $comments = $sth2->fetchall_arrayref( {} );
1010 my $comment;
1011 print "merging comments for $row->{physname}" if $gCfg{verbose};
1012 print " from $row->{info}" if ($gCfg{verbose} && defined $row->{info});
1013 print " to $row->{version}" if ($gCfg{verbose} && defined $row->{version});
1014 print "\n" if $gCfg{verbose};
1016 foreach my $c(@$comments) {
1017 print " $c->{version}: $c->{comment}\n" if $gCfg{verbose};
1018 $comment .= $c->{comment} . "\n";
1019 $comment =~ s/^\n+//;
1020 $comment =~ s/\n+$//;
1023 if (defined $comment && !defined $row->{comment}) {
1024 $comment = $prefix . $comment if defined $prefix;
1025 $comment =~ s/"/""/g;
1026 my $sql3 = 'UPDATE PhysicalAction SET comment="' . $comment . '" WHERE action_id = ' . $row->{action_id};
1027 my $sth3 = $gCfg{dbh}->prepare($sql3);
1028 $sth3->execute();
1033 } # End BuildComments
1035 ###############################################################################
1036 # DeleteChildRec
1037 ###############################################################################
1038 sub DeleteChildRec {
1039 my($id) = @_;
1041 my $sql = "DELETE FROM PhysicalAction WHERE action_id = ?";
1043 my $sth = $gCfg{dbh}->prepare($sql);
1044 $sth->execute($id);
1045 } # End DeleteChildRec
1047 ###############################################################################
1048 # BuildVssActionHistory
1049 ###############################################################################
1050 sub BuildVssActionHistory {
1051 my $vsscache = Vss2Svn::DataCache->new('VssAction', 1)
1052 || &ThrowError("Could not create cache 'VssAction'");
1054 my $joincache = Vss2Svn::DataCache->new('SvnRevisionVssAction')
1055 || &ThrowError("Could not create cache 'SvnRevisionVssAction'");
1057 my $labelcache = Vss2Svn::DataCache->new('Label')
1058 || &ThrowError("Could not create cache 'Label'");
1060 # This will keep track of the current SVN revision, and increment it when
1061 # the author or comment changes, the timestamps span more than an hour
1062 # (by default), or the same physical file is affected twice
1064 my $svnrevs = Vss2Svn::SvnRevHandler->new()
1065 || &ThrowError("Could not create SVN revision handler");
1066 $svnrevs->{verbose} = $gCfg{verbose};
1068 my($sth, $row, $action, $handler, $physinfo, $itempaths, $allitempaths);
1070 my $sql = 'SELECT * FROM PhysicalAction ORDER BY timestamp ASC, '
1071 . 'itemtype ASC, priority ASC, parentdata ASC, sortkey ASC, action_id ASC';
1073 $sth = $gCfg{dbh}->prepare($sql);
1074 $sth->execute();
1076 ROW:
1077 while(defined($row = $sth->fetchrow_hashref() )) {
1078 $action = $row->{actiontype};
1080 $handler = Vss2Svn::ActionHandler->new($row);
1081 $handler->{verbose} = $gCfg{verbose};
1082 $handler->{trunkdir} = $gCfg{trunkdir};
1083 $physinfo = $handler->physinfo();
1085 if (defined($physinfo) && $physinfo->{type} != $row->{itemtype} ) {
1086 &ThrowWarning("Inconsistent item type for '$row->{physname}'; "
1087 . "'$row->{itemtype}' unexpected");
1088 next ROW;
1091 $row->{itemname} = Encode::decode_utf8( $row->{itemname} );
1092 $row->{info} = Encode::decode_utf8( $row->{info} );
1093 $row->{comment} = Encode::decode_utf8( $row->{comment} );
1094 $row->{author} = Encode::decode_utf8( $row->{author} );
1095 $row->{label} = Encode::decode_utf8( $row->{label} );
1097 # The handler's job is to keep track of physical-to-real name mappings
1098 # and return the full item paths corresponding to the physical item. In
1099 # case of a rename, it will return the old name, so we then do another
1100 # lookup on the new name.
1102 # Commits and renames can apply to multiple items if that item is
1103 # shared; since SVN has no notion of such shares, we keep track of
1104 # those ourself and replicate the functionality using multiple actions.
1106 if (!$handler->handle($action)) {
1107 &ThrowWarning($handler->{errmsg})
1108 if $handler->{errmsg};
1109 next ROW;
1112 $itempaths = $handler->{itempaths};
1114 # In cases of a corrupted share source, the handler may change the
1115 # action from 'SHARE' to 'ADD'
1116 $row->{actiontype} = $handler->{action};
1118 if (!defined $itempaths) {
1119 # Couldn't determine name of item
1120 &ThrowWarning($handler->{errmsg})
1121 if $handler->{errmsg};
1123 # If we were adding or modifying a file, commit it to lost+found;
1124 # otherwise give up on it
1125 if ($row->{itemtype} == 2 && ($row->{actiontype} eq 'ADD' ||
1126 $row->{actiontype} eq 'COMMIT')) {
1128 $itempaths = [undef];
1129 } else {
1130 next ROW;
1134 # we need to check for the next rev number, after all pathes that can
1135 # prematurally call the next row. Otherwise, we get an empty revision.
1136 $svnrevs->check($row);
1138 # May contain add'l info for the action depending on type:
1139 # RENAME: the new name (without path)
1140 # SHARE: the source path which was shared
1141 # MOVE: the old path
1142 # PIN: the path of the version that was pinned
1143 # LABEL: the name of the label
1144 $row->{info} = $handler->{info};
1146 # The version may have changed
1147 if (defined $handler->{version}) {
1148 $row->{version} = $handler->{version};
1151 $allitempaths = join("\t", @$itempaths);
1152 $row->{itempaths} = $allitempaths;
1154 $vsscache->add(@$row{ qw(parentphys physname version actiontype itempaths
1155 itemtype is_binary info) });
1156 $joincache->add( $svnrevs->{revnum}, $vsscache->{pkey} );
1158 if (defined $row->{label}) {
1159 $labelcache->add(@$row{ qw(physname version label itempaths) });
1164 $vsscache->commit();
1165 $svnrevs->commit();
1166 $joincache->commit();
1167 $labelcache->commit();
1169 } # End BuildVssActionHistory
1171 ###############################################################################
1172 # ImportToSvn
1173 ###############################################################################
1174 sub ImportToSvn {
1175 # For the time being, we support only creating a dumpfile and not directly
1176 # importing to SVN. We could perhaps add this functionality by making the
1177 # CreateSvnDumpfile logic more generic and using polymorphism to switch out
1178 # the Vss2Svn::Dumpfile object with one that handles imports.
1180 &CreateSvnDumpfile;
1181 } # End ImportToSvn
1183 ###############################################################################
1184 # CreateSvnDumpfile
1185 ###############################################################################
1186 sub CreateSvnDumpfile {
1187 my $fh;
1189 my $file = $gCfg{dumpfile};
1190 open $fh, ">$file"
1191 or &ThrowError("Could not create dumpfile '$file'");
1193 my($sql, $sth, $action_sth, $row, $revision, $actions, $action, $physname, $itemtype);
1195 my %exported = ();
1197 $sql = 'SELECT * FROM SvnRevision ORDER BY revision_id ASC';
1199 $sth = $gCfg{dbh}->prepare($sql);
1200 $sth->execute();
1202 $sql = <<"EOSQL";
1203 SELECT * FROM
1204 VssAction
1205 WHERE action_id IN
1206 (SELECT action_id FROM SvnRevisionVssAction WHERE revision_id = ?)
1207 ORDER BY action_id
1208 EOSQL
1210 $action_sth = $gCfg{dbh}->prepare($sql);
1212 my $autoprops = Vss2Svn::Dumpfile::AutoProps->new($gCfg{auto_props}) if $gCfg{auto_props};
1213 my $labelmapper = Vss2Svn::Dumpfile::LabelMapper->new($gCfg{label_mapper}) if $gCfg{label_mapper};
1214 my $dumpfile = Vss2Svn::Dumpfile->new($fh, $autoprops, $gCfg{md5}, $labelmapper);
1215 Vss2Svn::Dumpfile->SetTempDir($gCfg{tempdir});
1217 REVISION:
1218 while(defined($row = $sth->fetchrow_hashref() )) {
1220 my $t0 = new Benchmark;
1222 $revision = $row->{revision_id};
1223 $dumpfile->begin_revision($row);
1225 # next REVISION if $revision == 0;
1227 $action_sth->execute($revision);
1228 $actions = $action_sth->fetchall_arrayref( {} );
1230 ACTION:
1231 foreach $action(@$actions) {
1232 $physname = $action->{physname};
1233 $itemtype = $action->{itemtype};
1235 # if (!exists $exported{$physname}) {
1236 my $version = $action->{version};
1237 if ( !defined $version
1238 && ( $action->{action} eq 'ADD'
1239 || $action->{action} eq 'COMMIT')) {
1240 &ThrowWarning("'$physname': no version specified for retrieval");
1242 # fall through and try with version 1.
1243 $version = 1;
1246 if ($itemtype == 2 && defined $version) {
1247 $exported{$physname} = &ExportVssPhysFile($physname, $version);
1248 } else {
1249 $exported{$physname} = undef;
1253 # do_action needs to know the revision_id, so paste it on
1254 $action->{revision_id} = $revision;
1255 $dumpfile->do_action($action, $exported{$physname});
1257 print "revision $revision: ", timestr(timediff(new Benchmark, $t0)),"\n"
1258 if $gCfg{timing};
1261 my @err = @{ $dumpfile->{errors} };
1263 if (scalar @err > 0) {
1264 map { &ThrowWarning($_) } @err;
1267 $dumpfile->finish();
1268 close $fh;
1270 } # End CreateSvnDumpfile
1272 ###############################################################################
1273 # ExportVssPhysFile
1274 ###############################################################################
1275 sub ExportVssPhysFile {
1276 my($physname, $version) = @_;
1278 $physname =~ m/^((.).)/;
1280 my $exportdir = "$gCfg{vssdata}/$1";
1281 my @filesegment = &FindPhysnameFile("$gCfg{vssdir}/data", $2, $physname);
1283 if (!defined $filesegment[0] || !defined $filesegment[1] || !defined $filesegment[2]) {
1284 # physical file doesn't exist; it must have been destroyed later
1285 &ThrowWarning("Can't retrieve revisions from physical file "
1286 . "'$physname'; it was either destroyed or corrupted");
1287 return undef;
1289 my $physpath = "$filesegment[0]/$filesegment[1]/$filesegment[2]";
1291 if (! -f $physpath) {
1292 # physical file doesn't exist; it must have been destroyed later
1293 &ThrowWarning("Can't retrieve revisions from physical file "
1294 . "'$physname'; it was either destroyed or corrupted");
1295 return undef;
1298 mkpath($exportdir) if ! -e $exportdir;
1300 # MergeParentData normally will merge two corresponding item and parent
1301 # actions. But if the actions are more appart than the maximum allowed
1302 # timespan, we will end up with an undefined version in an ADD action here
1303 # As a hot fix, we define the version to 1, which will also revert to the
1304 # alpha 1 version behavoir.
1305 if (! defined $version) {
1306 &ThrowWarning("'$physname': no version specified for retrieval");
1308 # fall through and try with version 1.
1309 $version = 1;
1312 if (! -e "$exportdir/$physname.$version" ) {
1313 &DoSsCmd("get -b -v$version --force-overwrite -e$gCfg{encoding} \"$physpath\" $exportdir/$physname");
1316 return $exportdir;
1317 } # End ExportVssPhysFile
1319 ###############################################################################
1320 # ShowHeader
1321 ###############################################################################
1322 sub ShowHeader {
1323 my $info = $gCfg{task} eq 'INIT'? 'BEGINNING CONVERSION...' :
1324 "RESUMING CONVERSION FROM TASK '$gCfg{task}' AT STEP $gCfg{step}...";
1325 my $starttime = ctime($^T);
1327 my $ssversion = &GetSsVersion();
1328 my $auto_props = (!defined $gCfg{auto_props}) ? "" : $gCfg{auto_props};
1329 my $label_mapper = (!defined $gCfg{label_mapper}) ? "" : $gCfg{label_mapper};
1331 print <<"EOTXT";
1332 ======== VSS2SVN ========
1333 $info
1334 Start Time : $starttime
1336 VSS Dir : $gCfg{vssdir}
1337 Temp Dir : $gCfg{tempdir}
1338 Dumpfile : $gCfg{dumpfile}
1339 VSS Encoding : $gCfg{encoding}
1340 Auto Props : $auto_props
1341 trunk dir : $gCfg{trunkdir}
1342 md5 : $gCfg{md5}
1343 label dir : $gCfg{labeldir}
1344 label mapper : $label_mapper
1346 VSS2SVN ver : $VERSION
1347 SSPHYS exe : $gCfg{ssphys}
1348 SSPHYS ver : $ssversion
1349 XML Parser : $gCfg{xmlParser}
1351 EOTXT
1353 my @version = split '\.', $ssversion;
1354 # we need at least ssphys 0.22
1355 if ($version[0] == 0 && $version[1] < 22) {
1356 &ThrowError("The conversion needs at least ssphys version 0.22");
1359 } # End ShowHeader
1361 ###############################################################################
1362 # ShowSummary
1363 ###############################################################################
1364 sub ShowSummary {
1366 if (keys(%gErr) || $gCfg{resume}) {
1367 print <<"EOTXT";
1368 =============================================================================
1369 ERROR SUMMARY
1371 EOTXT
1373 if($gCfg{resume}) {
1374 print <<"EOTXT";
1375 **NOTICE** Because this run was resumed from a previous run, this may be only
1376 a partial list; other errors may have been reported during previous run.
1378 EOTXT
1381 foreach my $task (@{ $gCfg{errortasks} }) {
1382 print "\n$task:\n ";
1383 print join("\n ", @{ $gErr{$task} }),"\n";
1387 print <<"EOTXT";
1388 =============================================================================
1389 END OF CONVERSION
1391 The VSS to SVN conversion is complete. You should now use the "svnadmin load"
1392 command to load the generated dumpfile '$gCfg{dumpfile}'. The "svnadmin"
1393 utility is provided as part of the Subversion command-line toolset; use a
1394 command such as the following:
1395 svnadmin load <repodir> < "$gCfg{dumpfile}"
1397 You may need to precede this with "svnadmin create <repodir>" if you have not
1398 yet created a repository. Type "svnadmin help <cmd>" for more information on
1399 "create" and/or "load".
1401 If any errors occurred during the conversion, they are summarized above.
1403 For more information on the vss2svn project, see:
1404 http://www.pumacode.org/projects/vss2svn/
1406 EOTXT
1408 my $starttime = ctime($^T);
1409 chomp $starttime;
1410 my $endtime = ctime(time);
1411 chomp $endtime;
1412 my $elapsed;
1415 use integer;
1416 my $secs = time - $^T;
1418 my $hours = $secs / 3600;
1419 $secs -= ($hours * 3600);
1421 my $mins = $secs / 60;
1422 $secs -= ($mins * 60);
1424 $elapsed = sprintf("%2.2i:%2.2i:%2.2i", $hours, $mins, $secs);
1427 my($actions, $revisions, $mintime, $maxtime) = &GetStats();
1429 print <<"EOTXT";
1430 Started at : $starttime
1431 Ended at : $endtime
1432 Elapsed time : $elapsed (H:M:S)
1434 VSS Actions read : $actions
1435 SVN Revisions converted : $revisions
1436 Date range (YYYY/MM/DD) : $mintime to $maxtime
1438 EOTXT
1440 } # End ShowSummary
1442 ###############################################################################
1443 # GetStats
1444 ###############################################################################
1445 sub GetStats {
1446 my($sql, $actions, $revisions, $mintime, $maxtime);
1448 $sql = <<"EOSQL";
1449 SELECT
1450 COUNT(*)
1451 FROM
1452 VssAction
1453 EOSQL
1455 ($actions) = $gCfg{dbh}->selectrow_array($sql);
1457 $sql = <<"EOSQL";
1458 SELECT
1459 COUNT(*)
1460 FROM
1461 SvnRevision
1462 EOSQL
1464 ($revisions) = $gCfg{dbh}->selectrow_array($sql);
1466 $sql = <<"EOSQL";
1467 SELECT
1468 MIN(timestamp), MAX(timestamp)
1469 FROM
1470 PhysicalAction
1471 EOSQL
1473 ($mintime, $maxtime) = $gCfg{dbh}->selectrow_array($sql);
1475 foreach($mintime, $maxtime) {
1476 $_ = &Vss2Svn::Dumpfile::SvnTimestamp($_);
1477 s:T.*::;
1478 s:-:/:g;
1481 # initial creation of the repo wasn't considered an action or revision
1482 return($actions - 1, $revisions - 1, $mintime, $maxtime);
1484 } # End GetStats
1486 ###############################################################################
1487 # DoSsCmd
1488 ###############################################################################
1489 sub DoSsCmd {
1490 my($cmd) = @_;
1492 my $ok = &DoSysCmd("\"$gCfg{ssphys}\" $cmd", 1);
1494 $gSysOut =~ s/\x00//g; # remove null bytes
1495 $gSysOut =~ s/.\x08//g; # yes, I've seen VSS store backspaces in names!
1496 # allow all characters in the windows-1252 codepage: see http://de.wikipedia.org/wiki/Windows-1252
1497 $gSysOut =~ s/[\x00-\x09\x11\x12\x14-\x1F\x81\x8D\x8F\x90\x9D]/_/g; # just to be sure
1499 } # End DoSsCmd
1501 ###############################################################################
1502 # DoSysCmd
1503 ###############################################################################
1504 sub DoSysCmd {
1505 my($cmd, $allowfail) = @_;
1507 print "$cmd\n" if $gCfg{verbose};
1508 $gSysOut = `$cmd`;
1510 print $gSysOut if $gCfg{debug};
1512 my $rv = 1;
1514 if ($? == -1) {
1515 &ThrowWarning("FAILED to execute: $!");
1516 die unless $allowfail;
1518 $rv = 0;
1519 } elsif ($?) {
1520 &ThrowWarning(sprintf "FAILED with non-zero exit status %d (cmd: %s)", $? >> 8, $cmd);
1521 die unless $allowfail;
1523 $rv = 0;
1526 return $rv;
1528 } # End DoSysCmd
1530 ###############################################################################
1531 # GetSsVersion
1532 ###############################################################################
1533 sub GetSsVersion {
1534 my $out = `\"$gCfg{ssphys}\" --version 2>&1`;
1535 # Build numbers look like:
1536 # a.) ssphys 0.20.0, Build 123
1537 # b.) ssphys 0.20.0, Build 123:150
1538 # c.) ssphys 0.20.0, Build 123:150 (locally modified)
1539 $out =~ m/^ssphys (.*?), Build (.*?)[ \n]/m;
1541 # turn it into
1542 # a.) 0.20.0.123
1543 # b.) 0.20.0.123:150
1544 # c.) 0.20.0.123:150
1545 return $1 . "." . $2 || 'unknown';
1546 } # End GetSsVersion
1548 ###############################################################################
1549 # ThrowWarning
1550 ###############################################################################
1551 sub ThrowWarning {
1552 my($msg, $callinfo) = @_;
1554 $callinfo ||= [caller()];
1556 $msg .= "\nat $callinfo->[1] line $callinfo->[2]";
1558 warn "ERROR -- $msg\n";
1560 my $task = $gCfg{task};
1562 if(!defined $gErr{$task}) {
1563 $gErr{$task} = [];
1564 push @{ $gCfg{errortasks} }, $task;
1567 push @{ $gErr{$task} }, $msg;
1569 } # End ThrowWarning
1571 ###############################################################################
1572 # ThrowError
1573 ###############################################################################
1574 sub ThrowError {
1575 &ThrowWarning(@_, [caller()]);
1576 &StopConversion;
1577 } # End ThrowError
1579 ###############################################################################
1580 # StopConversion
1581 ###############################################################################
1582 sub StopConversion {
1583 &DisconnectDatabase;
1584 &CloseAllFiles;
1586 exit(1);
1587 } # End StopConversion
1589 ###############################################################################
1590 # CloseAllFiles
1591 ###############################################################################
1592 sub CloseAllFiles {
1594 } # End CloseAllFiles
1596 ###############################################################################
1597 # SetSystemTask
1598 ###############################################################################
1599 sub SetSystemTask {
1600 my($task, $leavestep) = @_;
1602 print "\nSETTING TASK $task\n" if $gCfg{verbose};
1604 my($sql, $sth);
1606 $sth = $gSth{'SYSTEMTASK'};
1608 if (!defined $sth) {
1609 $sql = <<"EOSQL";
1610 UPDATE
1611 SystemInfo
1613 task = ?
1614 EOSQL
1616 $sth = $gSth{'SYSTEMTASK'} = $gCfg{dbh}->prepare($sql);
1619 $sth->execute($task);
1621 $gCfg{task} = $task;
1623 &SetSystemStep(0) unless $leavestep;
1625 } # End SetSystemTask
1627 ###############################################################################
1628 # SetSystemStep
1629 ###############################################################################
1630 sub SetSystemStep {
1631 my($step) = @_;
1633 print "\nSETTING STEP $step\n" if $gCfg{verbose};
1635 my($sql, $sth);
1637 $sth = $gSth{'SYSTEMSTEP'};
1639 if (!defined $sth) {
1640 $sql = <<"EOSQL";
1641 UPDATE
1642 SystemInfo
1644 step = ?
1645 EOSQL
1647 $sth = $gCfg{'SYSTEMSTEP'} = $gCfg{dbh}->prepare($sql);
1650 $sth->execute($step);
1652 $gCfg{step} = $step;
1654 } # End SetSystemStep
1656 ###############################################################################
1657 # ConnectDatabase
1658 ###############################################################################
1659 sub ConnectDatabase {
1660 my $db = $gCfg{sqlitedb};
1662 if (-e $db && (!$gCfg{resume} ||
1663 (defined($gCfg{task}) && $gCfg{task} eq 'INIT'))) {
1665 unlink $db or &ThrowError("Could not delete existing database "
1666 .$gCfg{sqlitedb});
1669 print "Connecting to database $db\n\n";
1671 $gCfg{dbh} = DBI->connect("dbi:SQLite2:dbname=$db", '', '',
1672 {RaiseError => 1, AutoCommit => 1})
1673 or die "Couldn't connect database $db: $DBI::errstr";
1675 } # End ConnectDatabase
1677 ###############################################################################
1678 # DisconnectDatabase
1679 ###############################################################################
1680 sub DisconnectDatabase {
1681 $gCfg{dbh}->disconnect if defined $gCfg{dbh};
1682 } # End DisconnectDatabase
1684 ###############################################################################
1685 # SetupGlobals
1686 ###############################################################################
1687 sub SetupGlobals {
1688 if (defined($gCfg{task}) && $gCfg{task} eq 'INIT') {
1689 &InitSysTables;
1690 } else {
1691 &ReloadSysTables;
1694 $gCfg{ssphys} = 'ssphys' if !defined($gCfg{ssphys});
1695 $gCfg{vssdatadir} = "$gCfg{vssdir}/data";
1697 (-d "$gCfg{vssdatadir}") or &ThrowError("$gCfg{vssdir} does not appear "
1698 . "to be a valid VSS database");
1700 &SetupActionTypes;
1702 Vss2Svn::DataCache->SetCacheDir($gCfg{tempdir});
1703 Vss2Svn::DataCache->SetDbHandle($gCfg{dbh});
1704 Vss2Svn::DataCache->SetVerbose($gCfg{verbose});
1706 Vss2Svn::SvnRevHandler->SetRevTimeRange($gCfg{revtimerange})
1707 if defined $gCfg{revtimerange};
1709 } # End SetupGlobals
1711 ###############################################################################
1712 # SetupActionTypes
1713 ###############################################################################
1714 sub SetupActionTypes {
1715 # RollBack is only seen in combiation with a BranchFile activity, so actually
1716 # RollBack is the item view on the activity and BranchFile is the parent side
1717 # ==> map RollBack to BRANCH, so that we can join the two actions in the
1718 # MergeParentData step
1719 # RestoredProject seems to act like CreatedProject, except that the
1720 # project was recreated from an archive file, and its timestamp is
1721 # the time of restoration. Timestamps of the child files retain
1722 # their original values.
1723 %gActionType = (
1724 CreatedProject => {type => 1, action => 'ADD'},
1725 AddedProject => {type => 1, action => 'ADD'},
1726 RestoredProject => {type => 1, action => 'RESTOREDPROJECT'},
1727 RenamedProject => {type => 1, action => 'RENAME'},
1728 MovedProjectTo => {type => 1, action => 'MOVE_TO'},
1729 MovedProjectFrom => {type => 1, action => 'MOVE_FROM'},
1730 DeletedProject => {type => 1, action => 'DELETE'},
1731 DestroyedProject => {type => 1, action => 'DELETE'},
1732 RecoveredProject => {type => 1, action => 'RECOVER'},
1733 ArchiveProject => {type => 1, action => 'DELETE'},
1734 RestoredProject => {type => 1, action => 'RESTORE'},
1735 CheckedIn => {type => 2, action => 'COMMIT'},
1736 CreatedFile => {type => 2, action => 'ADD'},
1737 AddedFile => {type => 2, action => 'ADD'},
1738 RenamedFile => {type => 2, action => 'RENAME'},
1739 DeletedFile => {type => 2, action => 'DELETE'},
1740 DestroyedFile => {type => 2, action => 'DELETE'},
1741 RecoveredFile => {type => 2, action => 'RECOVER'},
1742 ArchiveVersionsofFile => {type => 2, action => 'ADD'},
1743 ArchiveVersionsofProject => {type => 1, action => 'ADD'},
1744 ArchiveFile => {type => 2, action => 'DELETE'},
1745 RestoredFile => {type => 2, action => 'RESTORE'},
1746 SharedFile => {type => 2, action => 'SHARE'},
1747 BranchFile => {type => 2, action => 'BRANCH'},
1748 PinnedFile => {type => 2, action => 'PIN'},
1749 RollBack => {type => 2, action => 'BRANCH'},
1750 UnpinnedFile => {type => 2, action => 'PIN'},
1751 Labeled => {type => 2, action => 'LABEL'},
1754 } # End SetupActionTypes
1756 ###############################################################################
1757 # InitSysTables
1758 ###############################################################################
1759 sub InitSysTables {
1760 my($sql, $sth);
1762 $sql = <<"EOSQL";
1763 CREATE TABLE
1764 Physical (
1765 physname VARCHAR
1767 EOSQL
1769 $sth = $gCfg{dbh}->prepare($sql);
1770 $sth->execute;
1772 $sql = <<"EOSQL";
1773 CREATE TABLE
1774 NameLookup (
1775 offset INTEGER,
1776 name VARCHAR
1778 EOSQL
1780 $sth = $gCfg{dbh}->prepare($sql);
1781 $sth->execute;
1783 $sql = <<"EOSQL";
1784 CREATE TABLE
1785 PhysicalAction (
1786 action_id INTEGER PRIMARY KEY,
1787 physname VARCHAR,
1788 version INTEGER,
1789 parentphys VARCHAR,
1790 actiontype VARCHAR,
1791 itemname VARCHAR,
1792 itemtype INTEGER,
1793 timestamp INTEGER,
1794 author VARCHAR,
1795 is_binary INTEGER,
1796 info VARCHAR,
1797 priority INTEGER,
1798 sortkey VARCHAR,
1799 parentdata INTEGER,
1800 label VARCHAR,
1801 comment TEXT
1803 EOSQL
1805 $sth = $gCfg{dbh}->prepare($sql);
1806 $sth->execute;
1808 $sql = <<"EOSQL";
1809 CREATE INDEX
1810 PhysicalAction_IDX1 ON PhysicalAction (
1811 timestamp ASC,
1812 priority ASC,
1813 sortkey ASC
1815 EOSQL
1817 $sth = $gCfg{dbh}->prepare($sql);
1818 $sth->execute;
1820 $sql = <<"EOSQL";
1821 CREATE INDEX
1822 PhysicalAction_IDX2 ON PhysicalAction (
1823 physname ASC,
1824 parentphys ASC,
1825 actiontype ASC,
1826 timestamp ASC,
1827 author ASC
1829 EOSQL
1831 $sth = $gCfg{dbh}->prepare($sql);
1832 $sth->execute;
1834 $sql = <<"EOSQL";
1835 CREATE TABLE
1836 VssAction (
1837 action_id INTEGER PRIMARY KEY,
1838 parentphys VARCHAR,
1839 physname VARCHAR,
1840 version INTEGER,
1841 action VARCHAR,
1842 itempaths VARCHAR,
1843 itemtype INTEGER,
1844 is_binary INTEGER,
1845 info VARCHAR
1847 EOSQL
1849 $sth = $gCfg{dbh}->prepare($sql);
1850 $sth->execute;
1852 $sql = <<"EOSQL";
1853 CREATE INDEX
1854 VssAction_IDX1 ON VssAction (
1855 action_id ASC
1857 EOSQL
1859 $sth = $gCfg{dbh}->prepare($sql);
1860 $sth->execute;
1862 $sql = <<"EOSQL";
1863 CREATE TABLE
1864 SvnRevision (
1865 revision_id INTEGER PRIMARY KEY,
1866 timestamp INTEGER,
1867 author VARCHAR,
1868 comment TEXT
1870 EOSQL
1872 $sth = $gCfg{dbh}->prepare($sql);
1873 $sth->execute;
1875 $sql = <<"EOSQL";
1876 CREATE TABLE
1877 SvnRevisionVssAction (
1878 revision_id INTEGER,
1879 action_id INTEGER
1881 EOSQL
1883 $sth = $gCfg{dbh}->prepare($sql);
1884 $sth->execute;
1886 $sql = <<"EOSQL";
1887 CREATE INDEX
1888 SvnRevisionVssAction_IDX1 ON SvnRevisionVssAction (
1889 revision_id ASC,
1890 action_id ASC
1892 EOSQL
1894 $sth = $gCfg{dbh}->prepare($sql);
1895 $sth->execute;
1897 $sql = <<"EOSQL";
1898 CREATE TABLE
1899 Label (
1900 physical VARCHAR,
1901 version INTEGER,
1902 label VARCHAR,
1903 imtempaths VARCHAR
1905 EOSQL
1907 $sth = $gCfg{dbh}->prepare($sql);
1908 $sth->execute;
1910 my @cfgitems = qw(task step vssdir svnurl svnuser svnpwd ssphys tempdir
1911 setsvndate starttime);
1913 my $fielddef = join(",\n ",
1914 map {sprintf('%-12.12s VARCHAR', $_)} @cfgitems);
1916 $sql = <<"EOSQL";
1917 CREATE TABLE
1918 SystemInfo (
1919 $fielddef
1921 EOSQL
1923 $sth = $gCfg{dbh}->prepare($sql);
1924 $sth->execute;
1926 my $fields = join(', ', @cfgitems);
1927 my $args = join(', ', map {'?'} @cfgitems);
1929 $sql = <<"EOSQL";
1930 INSERT INTO
1931 SystemInfo ($fields)
1932 VALUES
1933 ($args)
1934 EOSQL
1936 $sth = $gCfg{dbh}->prepare($sql);
1937 $sth->execute(map {$gCfg{$_}} @cfgitems);
1938 $sth->finish();
1940 } # End InitSysTables
1942 ###############################################################################
1943 # ReloadSysTables
1944 ###############################################################################
1945 sub ReloadSysTables {
1946 my($sql, $sth, $sthup, $row, $field, $val);
1948 $sql = "SELECT * FROM SystemInfo";
1950 $sth = $gCfg{dbh}->prepare($sql);
1951 $sth->execute();
1953 $row = $sth->fetchrow_hashref();
1955 FIELD:
1956 while (($field, $val) = each %$row) {
1957 if (defined($gCfg{$field})) { # allow user to override saved vals
1958 $sql = "UPDATE SystemInfo SET $field = ?";
1959 $sthup = $gCfg{dbh}->prepare($sql);
1960 $sthup->execute($gCfg{$field});
1961 } else {
1962 $gCfg{$field} = $val;
1966 $sth->finish();
1967 &SetSystemTask($gCfg{task});
1969 } # End ReloadSysTables
1971 ###############################################################################
1972 # Initialize
1973 ###############################################################################
1974 sub Initialize {
1975 $| = 1;
1977 GetOptions(\%gCfg,'vssdir=s','tempdir=s','dumpfile=s','resume','verbose',
1978 'debug','timing+','task=s','revtimerange=i','ssphys=s',
1979 'encoding=s','trunkdir=s','auto_props=s', 'label_mapper=s', 'md5');
1981 &GiveHelp("Must specify --vssdir") if !defined($gCfg{vssdir});
1982 $gCfg{tempdir} = './_vss2svn' if !defined($gCfg{tempdir});
1983 $gCfg{dumpfile} = 'vss2svn-dumpfile.dat' if !defined($gCfg{dumpfile});
1985 if (defined($gCfg{auto_props}) && ! -r $gCfg{auto_props}) {
1986 die "auto_props file '$gCfg{auto_props}' is not readable";
1989 if (defined($gCfg{label_mapper}) && ! -r $gCfg{label_mapper}) {
1990 die "label_mapper file '$gCfg{label_mapper}' is not readable";
1993 $gCfg{sqlitedb} = "$gCfg{tempdir}/vss_data.db";
1995 # XML output from ssphysout placed here.
1996 $gCfg{ssphysout} = "$gCfg{tempdir}/ssphysout";
1997 $gCfg{encoding} = 'windows-1252' if !defined($gCfg{encoding});
1999 # Commit messages for SVN placed here.
2000 $gCfg{svncomment} = "$gCfg{tempdir}/svncomment.tmp.txt";
2001 mkdir $gCfg{tempdir} unless (-d $gCfg{tempdir});
2003 # Directories for holding VSS revisions
2004 $gCfg{vssdata} = "$gCfg{tempdir}/vssdata";
2006 if ($gCfg{resume} && !-e $gCfg{sqlitedb}) {
2007 warn "WARNING: --resume set but no database exists; starting new "
2008 . "conversion...";
2009 $gCfg{resume} = 0;
2012 if ($gCfg{debug}) {
2013 $gCfg{verbose} = 1;
2015 $gCfg{timing} = 0 unless defined $gCfg{timing};
2016 $gCfg{md5} = 0 unless defined $gCfg{md5};
2018 $gCfg{starttime} = scalar localtime($^T);
2020 # trunkdir should (must?) be without trailing slash
2021 $gCfg{trunkdir} = '' unless defined $gCfg{trunkdir};
2022 $gCfg{trunkdir} =~ s:\\:/:g;
2023 $gCfg{trunkdir} =~ s:/$::;
2025 $gCfg{junkdir} = '/lost+found';
2027 $gCfg{labeldir} = '/labels';
2029 $gCfg{errortasks} = [];
2032 no warnings 'once';
2033 $gCfg{usingExe} = (defined($PerlApp::TOOL));
2036 &ConfigureXmlParser();
2038 ### Don't go past here if resuming a previous run ###
2039 if ($gCfg{resume}) {
2040 return 1;
2043 rmtree($gCfg{vssdata}) if (-e $gCfg{vssdata});
2044 mkdir $gCfg{vssdata};
2046 $gCfg{ssphys} ||= 'ssphys';
2047 $gCfg{svn} ||= 'SVN.exe';
2049 $gCfg{task} = 'INIT';
2050 $gCfg{step} = 0;
2051 } # End Initialize
2053 ###############################################################################
2054 # ConfigureXmlParser
2055 ###############################################################################
2056 sub ConfigureXmlParser {
2058 if(defined($ENV{XML_SIMPLE_PREFERRED_PARSER})) {
2059 # user has defined a preferred parser; don't mess with it
2060 $gCfg{xmlParser} = $ENV{XML_SIMPLE_PREFERRED_PARSER};
2061 return 1;
2064 $gCfg{xmlParser} = 'XML::Simple';
2066 eval { require XML::SAX; };
2068 if($@) {
2069 # no XML::SAX; let XML::Simple use its own parser
2070 return 1;
2072 elsif($gCfg{usingExe}) {
2073 # Prevent the ParserDetails.ini error message when running from .exe
2074 XML::SAX->load_parsers($INC[1]);
2077 $gCfg{xmlParser} = 'XML::SAX::Expat';
2078 $XML::SAX::ParserPackage = $gCfg{xmlParser};
2080 my $p;
2082 eval { $p = XML::SAX::ParserFactory->parser(); };
2084 if(!$@) {
2085 # XML::SAX::Expat installed; use it
2087 # for exe version, XML::Parser::Expat needs help finding its encmaps
2088 no warnings 'once';
2090 my $encdir;
2091 foreach my $dir (@INC) {
2092 $encdir = "$dir/encodings";
2093 $encdir =~ s:\\:/:g;
2094 $encdir =~ s://:/:g;
2095 if(-d $encdir) {
2096 print "Adding '$encdir' to encodings file path\n";
2097 push(@XML::Parser::Expat::Encoding_Path, $encdir);
2101 return 1;
2104 undef $XML::SAX::ParserPackage;
2105 eval { $p = XML::SAX::ParserFactory->parser(); };
2107 if(!$@) {
2108 $gCfg{xmlParser} = ref $p;
2109 return 1;
2112 # couldn't find a better package; go back to XML::Simple
2113 $gCfg{'xmlParser'} = 'XML::Simple';
2114 return 1;
2116 } # End ConfigureXmlParser
2118 ###############################################################################
2119 # GiveHelp
2120 ###############################################################################
2121 sub GiveHelp {
2122 my($msg) = @_;
2124 $msg ||= 'Online Help';
2126 print <<"EOTXT";
2128 $msg
2130 USAGE: perl vss2svn.pl --vssdir <dir> [options]
2132 REQUIRED PARAMETERS:
2133 --vssdir <dir> : Directory where VSS database is located. This should be
2134 the directory in which the "srcsafe.ini" file is located.
2136 OPTIONAL PARAMETERS:
2137 --ssphys <path> : Full path to ssphys.exe program; uses PATH otherwise
2138 --tempdir <dir> : Temp directory to use during conversion;
2139 default is ./_vss2svn
2140 --dumpfile <file> : specify the subversion dumpfile to be created;
2141 default is ./vss2svn-dumpfile.dat
2142 --revtimerange <sec> : specify the difference between two ss actions
2143 that are treated as one subversion revision;
2144 default is 3600 seconds (== 1hour)
2146 --resume : Resume a failed or aborted previous run
2147 --task <task> : specify the task to resume; task is one of the following
2148 INIT, LOADVSSNAMES, FINDDBFILES, GETPHYSHIST,
2149 MERGEPARENTDATA, MERGEMOVEDATA, REMOVETMPCHECKIN,
2150 MERGEUNPINPIN, BUILDACTIONHIST, IMPORTSVN
2152 --verbose : Print more info about the items being processed
2153 --debug : Print lots of debugging info.
2154 --timing : Show timing information during various steps
2155 --encoding : Specify the encoding used in VSS;
2156 Default is windows-1252
2157 --trunkdir : Specify where to map the VSS Project Root in the
2158 converted repository (default = "/")
2159 --auto_props : Specify an autoprops ini file to use, e.g.
2160 --auto_props="c:/Dokumente und Einstellungen/user/Anwendungsdaten/Subversion/config"
2161 --md5 : generate md5 checksums
2162 --label_mapper : INI style file to map labels to different locataions
2163 EOTXT
2165 exit(1);
2166 } # End GiveHelp