From 3038a39341270ffb30311beae3836eabcf7d8d7a Mon Sep 17 00:00:00 2001 From: toby Date: Fri, 7 Apr 2006 05:04:50 +0000 Subject: [PATCH] vss2svn.pl: Improve error reporting and statistics Vss2Svn/ActionHandler.pm: Remove unused get_current_item_name function Vss2Svn/Dumpfile.pm: Add "sanity checking" during dumpfile creation to ensure files being created are logical and attempt to take corrective action otherwise git-svn-id: http://vss2svn.googlecode.com/svn/trunk@183 2cfd5912-9055-84bd-9a12-e3c18a4b6e42 --- script/Vss2Svn/ActionHandler.pm | 20 --- script/Vss2Svn/Dumpfile.pm | 374 +++++++++++++++++++++++++++++++++++++--- script/vss2svn.pl | 170 ++++++++++++++---- 3 files changed, 493 insertions(+), 71 deletions(-) diff --git a/script/Vss2Svn/ActionHandler.pm b/script/Vss2Svn/ActionHandler.pm index 7e4711d..63c6f1f 100644 --- a/script/Vss2Svn/ActionHandler.pm +++ b/script/Vss2Svn/ActionHandler.pm @@ -302,8 +302,6 @@ sub _delete_handler { # protect for delete/purge cycles: if the parentphys isn't in the shares # anymore, the file was already deleted from the parent and is now purged -# my %look = map {$_ => 1} @{ $physinfo->{sharedphys} }; -# return 0 unless defined($look{$physinfo->{parentphys}}); my $parentFound = defined $physinfo->{parentphys}; foreach my $parent (@{ $physinfo->{sharedphys} }) { $parentFound = 1 if ($physinfo->{parentphys} eq $parent); @@ -466,24 +464,6 @@ PARENT: } # End _get_item_paths -############################################################################### -# _get_current_item_name -############################################################################### -sub _get_current_item_name { - my($self) = @_; - - my $physname = $self->{row}->{physname}; - my $physinfo = $gPhysInfo{$physname}; - - if (!defined $physinfo) { - $self->{errmsg} .= "Could not determine real name for '$physname':\n" - . "$self->{physname_seen}\n"; - return undef; - } - - return $physinfo->{name}; -} # End _get_current_item_name - 1; diff --git a/script/Vss2Svn/Dumpfile.pm b/script/Vss2Svn/Dumpfile.pm index a817a82..feeaff7 100644 --- a/script/Vss2Svn/Dumpfile.pm +++ b/script/Vss2Svn/Dumpfile.pm @@ -37,18 +37,20 @@ sub new { errors => [], modified_cache => {}, deleted_cache => {}, + svn_items => {}, + junk_itempaths => {}, + need_junkdir => 0, + need_missing_dirs => [], }; - # prevent perl from doing line-ending conversions, but this means we'll - # need to explicitly output DOS-style line endings between info lines + # prevent perl from doing line-ending conversions binmode($fh); my $old = select($fh); $| = 1; select($old); - #TODO: take out UUID - print $fh "SVN-fs-dump-format-version: 2\n\nUUID: 2d233e98-0cb8-4f47-9081-4b0a55eb6c6b\n"; + print $fh "SVN-fs-dump-format-version: 2\n\n"; $self = bless($self, $class); return $self; @@ -82,11 +84,11 @@ sub begin_revision { $comment = '' if !defined($comment); $author = '' if !defined($author); - + # convert to utf8 from_to ($comment, "windows-1252", "utf8"); from_to ($author, "windows-1252", "utf8"); - + if ($revision > 0) { push @$props, ['svn:log', $comment]; push @$props, ['svn:author', $author]; @@ -106,7 +108,6 @@ sub do_action { my($self, $data, $expdir) = @_; my $action = $data->{action}; - my $handler = $gHandlers{$action}; my $nodes = []; @@ -119,7 +120,48 @@ sub do_action { $self->{modified_cache} = {}; $self->{deleted_cache} = {}; + my($handler, $this_action); + foreach my $itempath (split "\t", $data->{itempaths}) { + $this_action = $action; + + if(defined($itempath)) { + ($this_action, $itempath) = + $self->_action_path_sanity_check($this_action, $itempath, $data); + + return 0 unless defined($itempath); + + } else { + # if the item's path isn't defined, its real name was corrupted in + # vss, so we'll check it in to the junk drawer as an add + if (defined $main::gCfg{junkdir}) { + $itempath = $self->_get_junk_itempath($main::gCfg{junkdir}, + join('.', @$data{ qw(physname version revision_id) })); + + $self->add_error("Using filename '$itempath' for item with " + . "unrecoverable name at revision $data->{revision_id}"); + + $this_action = 'ADD'; + } else { + return 0; + } + } + + # if need_junkdir = 1, the first item is just about to be added to the + # junk drawer, so create the dumpfile node to add this directory + if ($self->{need_junkdir} == 1) { + $self->_add_svn_dir($nodes, $main::gCfg{junkdir}); + $self->{need_junkdir} = -1; + } + + foreach my $dir (@{ $self->{need_missing_dirs} }) { + $self->_add_svn_dir($nodes, $dir); + $self->add_error("Creating missing directory '$dir' for item " + . "'$itempath' at revision $data->{revision_id}"); + } + + $handler = $gHandlers{$this_action}; + $self->$handler($itempath, $nodes, $data, $expdir); $self->{is_primary} = 0; } @@ -140,6 +182,283 @@ sub do_action { } # End do_action ############################################################################### +# _get_junk_itempath +############################################################################### +sub _get_junk_itempath { + my($self, $dir, $base) = @_; + + $base =~ s:.*/::; + my $itempath = "$dir/$base"; + my $count = 1; + + if($self->{need_junkdir} == 0) { + $self->{need_junkdir} = 1; + } + + if(!defined($self->{junk_itempaths}->{$itempath})) { + $self->{junk_itempaths}->{$itempath} = 1; + return $itempath; + } + + my($file, $ext); + + if($base =~ m/^(.*)\.(.*)/) { + ($file, $ext) = ($1, ".$2"); + } else { + ($file, $ext) = ($base, ''); + } + + while(defined($self->{junk_itempaths}->{$itempath})) { + $itempath = "$dir/$file.$count$ext"; + $count++; + } + + return $itempath; +} # End _get_junk_itempath + +############################################################################### +# _action_path_sanity_check +############################################################################### +sub _action_path_sanity_check { + my($self, $action, $itempath, $data) = @_; + + my($itemtype, $revision_id) = @{ $data }{qw(itemtype revision_id)}; + + return($action, $itempath) if ($itempath eq '' || $itempath eq '/'); + + my($newaction, $newpath) = ($action, $itempath); + my $success; + + $self->{need_missing_dirs} = []; + + if($action eq 'ADD' || $action eq 'SHARE' || $action eq 'RECOVER') { + $success = $self->_add_svn_struct_item($itempath, $itemtype); + + if(!defined($success)) { + $newpath = undef; + $self->add_error("Path consistency failure while trying to add " + . "item '$itempath' at revision $revision_id; skipping"); + + } elsif($success == 0) { + # trying to re-add existing item; if file, change it to a commit + if ($itemtype == 1) { + + $newpath = undef; + $self->add_error("Attempt to re-add directory '$itempath' at " + . "revision $revision_id; possibly missing delete"); + + } else { + + $newaction = 'COMMIT'; + $self->add_error("Attempt to re-add file '$itempath' at " + . "revision $revision_id, changing to modify; possibly " + . "missing delete"); + + } + } + + } elsif ($action eq 'DELETE') { + $success = $self->_delete_svn_struct_item($itempath, $itemtype); + + if(!$success) { + $newpath = undef; + $self->add_error("Attempt to delete non-existent item '$itempath' " + . "at revision $revision_id; skipping..."); + } + + } elsif ($action eq 'RENAME') { + $success = $self->_rename_svn_struct_item($itempath, $itemtype, + $data->{info}); + + if(!$success) { + $newpath = undef; + $self->add_error("Attempt to rename non-existent item '$itempath' " + . "at revision $revision_id; skipping..."); + } + } elsif ($action eq 'MOVE') { + my ($ref, $item) = $self->_get_svn_struct_ref_for_move($itempath); + + if(!$ref) { + $newpath = undef; + $self->add_error("Attempt to move non-existent directory '$itempath' " + . "at revision $revision_id; skipping..."); + } + + $success = $self->_add_svn_struct_item($data->{info}, 1, $ref->{$item}); + + if(!$success) { + $newpath = undef; + $self->add_error("Error while attempting to move directory '$itempath' " + . "at revision $revision_id; skipping..."); + } + + delete $ref->{$item}; + } + + return($newaction, $newpath); + +} # End _action_path_sanity_check + +############################################################################### +# _add_svn_struct_item +############################################################################### +sub _add_svn_struct_item { + my($self, $itempath, $itemtype, $newref) = @_; + + $itempath =~ s:^/::; + my @subdirs = split '/', $itempath; + + my $item = pop(@subdirs); + my $ref = $self->{svn_items}; + + my $thispath = ''; + + foreach my $subdir (@subdirs) { + $thispath .= "$subdir/"; + + if(ref($ref) ne 'HASH') { + return undef; + } + if(!defined($ref->{$subdir})) { + # parent directory doesn't exist; add it to list of missing dirs + # to build up + push @{ $self->{need_missing_dirs} }, $thispath; + + $ref->{$subdir} = {}; + } + + $ref = $ref->{$subdir}; + } + + if(ref($ref) ne 'HASH') { + # parent "directory" is actually a file + return undef; + } + + if(defined($ref->{$item})) { + # item already exists; can't add it + return 0; + } + + if(defined($newref)) { + $ref->{$item} = $newref; + } else { + $ref->{$item} = ($itemtype == 1)? {} : 1; + } + + return 1; + +} # End _add_svn_struct_item + +############################################################################### +# _delete_svn_struct_item +############################################################################### +sub _delete_svn_struct_item { + my($self, $itempath, $itemtype) = @_; + + return $self->_delete_rename_svn_struct_item($itempath, $itemtype); +} # End _delete_svn_struct_item + +############################################################################### +# _rename_svn_struct_item +############################################################################### +sub _rename_svn_struct_item { + my($self, $itempath, $itemtype, $newname) = @_; + + return $self->_delete_rename_svn_struct_item($itempath, $itemtype, $newname); +} # End _rename_svn_struct_item + +############################################################################### +# _delete_rename_svn_struct_item +############################################################################### +sub _delete_rename_svn_struct_item { + my($self, $itempath, $itemtype, $newname, $movedref) = @_; + + $itempath =~ s:^/::; + $newname =~ s:/$:: if defined($newname); + my @subdirs = split '/', $itempath; + + my $item = pop(@subdirs); + my $ref = $self->{svn_items}; + + foreach my $subdir (@subdirs) { + if(!(ref($ref) eq 'HASH') || !defined($ref->{$subdir})) { + # can't get to item because a parent directory doesn't exist; give up + return undef; + } + + $ref = $ref->{$subdir}; + } + + if((ref($ref) ne 'HASH') || !defined($ref->{$item})) { + # item doesn't exist; can't delete/rename it + return 0; + } + + if(defined $newname) { + $ref->{$newname} = $ref->{$item}; + } + + delete $ref->{$item}; + + return 1; + +} # End _delete_rename_svn_struct_item + +############################################################################### +# _get_svn_struct_ref_for_move +############################################################################### +sub _get_svn_struct_ref_for_move { + my($self, $itempath) = @_; + + $itempath =~ s:^/::; + my @subdirs = split '/', $itempath; + + my $item = pop(@subdirs); + my $ref = $self->{svn_items}; + + my $thispath = ''; + + foreach my $subdir (@subdirs) { + $thispath .= "$subdir/"; + + if(ref($ref) ne 'HASH') { + return undef; + } + if(!defined($ref->{$subdir})) { + return undef; + } + + $ref = $ref->{$subdir}; + } + + if((ref($ref) ne 'HASH') || !defined($ref->{$item}) || + (ref($ref->{$item} ne 'HASH'))) { + return undef; + } + + return ($ref, $item); + +} # End _get_svn_struct_ref_for_move + +############################################################################### +# _add_svn_dir +############################################################################### +sub _add_svn_dir { + my($self, $nodes, $dir) = @_; + + my $node = Vss2Svn::Dumpfile::Node->new(); + my $data = { itemtype => 1, is_binary => 0 }; + + $node->set_initial_props($dir, $data); + $node->{action} = 'add'; + + push @$nodes, $node; + $self->_add_svn_struct_item($dir, 1); + +} # End _add_svn_dir + +############################################################################### # _add_handler ############################################################################### sub _add_handler { @@ -191,11 +510,7 @@ sub _rename_handler { my $newpath = $itempath; - if ($data->{itemtype} == 1) { - $newpath =~ s:(.*/)?.+$:$1$newname/:; - } else { - $newpath =~ s:(.*/)?.*:$1$newname:; - } + $newpath =~ s:(.*/)?.*:$1$newname:; my $node = Vss2Svn::Dumpfile::Node->new(); $node->set_initial_props($newpath, $data); @@ -253,7 +568,7 @@ sub _branch_handler { my($self, $itempath, $nodes, $data, $expdir) = @_; # branching is a no-op in SVN - + # if the file is copied later, we need to track, the revision of this branch # see the shareBranchShareModify Test $self->track_modified($data->{physname}, $data->{revision_id}, $itempath); @@ -326,9 +641,9 @@ sub _recover_handler { my($copyrev, $copypath) = $self->last_deleted_rev_path($data->{physname}); if (!defined $copyrev) { - push @{ $self->{errors} }, + $self->add_error( "Could not recover path $itempath at revision $data->{revision_id};" - . " unable to determine deleted revision"; + . " unable to determine deleted revision"); return 0; } @@ -404,16 +719,15 @@ sub get_export_contents { if (!defined($expdir)) { return 0; } elsif (!defined($data->{version})) { - push @{ $self->{errors} }, - "Attempt to retrieve file contents with unknown version number"; + $self->add_error( + "Attempt to retrieve file contents with unknown version number"); return 0; } my $file = "$expdir\\$data->{physname}.$data->{version}"; if (!open EXP, "$file") { - push @{ $self->{errors} }, - "Could not open export file '$file'"; + $self->add_error("Could not open export file '$file'"); return 0; } @@ -491,6 +805,16 @@ sub output_content { sub svn_timestamp { my($self, $vss_timestamp) = @_; + return &SvnTimestamp($vss_timestamp); + +} # End svn_timestamp + +############################################################################### +# SvnTimestamp +############################################################################### +sub SvnTimestamp { + my($vss_timestamp) = @_; + my($sec, $min, $hour, $day, $mon, $year) = gmtime($vss_timestamp); $year += 1900; @@ -499,7 +823,17 @@ sub svn_timestamp { return sprintf("%4.4i-%2.2i-%2.2iT%2.2i:%2.2i:%2.2i.%6.6iZ", $year, $mon, $day, $hour, $min, $sec, 0); -} # End svn_timestamp +} # End SvnTimestamp + +############################################################################### +# add_error +############################################################################### +sub add_error { + my($self, $msg) = @_; + + push @{ $self->{errors} }, $msg; +} # End add_error + 1; diff --git a/script/vss2svn.pl b/script/vss2svn.pl index 07227b9..c4add74 100755 --- a/script/vss2svn.pl +++ b/script/vss2svn.pl @@ -19,7 +19,7 @@ use Vss2Svn::DataCache; use Vss2Svn::SvnRevHandler; use Vss2Svn::Dumpfile; -our(%gCfg, %gSth, @gErr, %gFh, $gSysOut, %gActionType, %gNameLookup, %gId); +our(%gCfg, %gSth, %gErr, %gFh, $gSysOut, %gActionType, %gNameLookup, %gId); our $VERSION = '0.10'; @@ -31,8 +31,8 @@ our $VERSION = '0.10'; &RunConversion; -&DisconnectDatabase; &ShowSummary; +&DisconnectDatabase; ############################################################################### # RunConversion @@ -82,6 +82,7 @@ sub RunConversion { or die "FATAL ERROR: Unknown task '$gCfg{task}'\n"; print "TASK: $gCfg{task}\n"; + push @{ $gCfg{tasks} }, $gCfg{task}; if ($gCfg{prompt}) { print "Press ENTER to continue...\n"; @@ -229,9 +230,9 @@ sub FindPhysnameFile { sub GetVssPhysInfo { my($cache, $physdir, $physfolder, $physname, $xs) = @_; - my @filesegment = FindPhysnameFile($physdir, $physfolder, $physname); + my @filesegment = &FindPhysnameFile($physdir, $physfolder, $physname); - print "physdir: \"$filesegment[0]\", physfolder: \"$filesegment[1]\" physname: \"$filesegment[2]\"\n" if $gCfg{debug}; + print "physdir: \"$filesegment[0]\", physfolder: \"$filesegment[1]\" physname: \"$filesegment[2]\"\n" if $gCfg{debug}; if (!defined $filesegment[0] || !defined $filesegment[1] || !defined $filesegment[2]) { @@ -625,21 +626,31 @@ ROW: $itempaths = $handler->{itempaths}; + # In cases of a corrupted share source, the handler may change the + # action from 'SHARE' to 'ADD' + $row->{actiontype} = $handler->{action}; + if (!defined $itempaths) { + # Couldn't determine name of item &ThrowWarning($handler->{errmsg}) if $handler->{errmsg}; - next ROW; - } - # In cases of a corrupted share source, the handler may change the - # action from 'SHARE' to 'ADD' - $row->{actiontype} = $handler->{action}; + # If we were adding or modifying a file, commit it to lost+found; + # otherwise give up on it + if ($row->{itemtype} == 2 && ($row->{actiontype} eq 'ADD' || + $row->{actiontype} eq 'COMMIT')) { + + $itempaths = [undef]; + } else { + next ROW; + } + } # May contain add'l info for the action depending on type: # RENAME: the new name (without path) # SHARE: the source path which was shared # MOVE: the new path - # PIN: the version that was pinned + # PIN: the version that was pinned $row->{info} = $handler->{info}; $allitempaths = join("\t", @$itempaths); @@ -690,9 +701,9 @@ sub CreateSvnDumpfile { $sql = <<"EOSQL"; SELECT * FROM - VssAction -WHERE action_id IN - (SELECT action_id FROM SvnRevisionVssAction WHERE revision_id = ?) + VssAction +WHERE action_id IN + (SELECT action_id FROM SvnRevisionVssAction WHERE revision_id = ?) ORDER BY action_id EOSQL @@ -702,9 +713,9 @@ EOSQL REVISION: while(defined($row = $sth->fetchrow_hashref() )) { - + my $t0 = new Benchmark; - + $revision = $row->{revision_id}; $dumpfile->begin_revision($row); @@ -738,8 +749,7 @@ ACTION: my @err = @{ $dumpfile->{errors} }; if (scalar @err > 0) { - print "\nERRORS during dumpfile creation:\n "; - print join("\n ", @err); + map { &ThrowWarning($_) } @err; } $dumpfile->finish(); @@ -756,7 +766,7 @@ sub ExportVssPhysFile { $physname =~ m/^((.).)/; my $exportdir = "$gCfg{vssdata}/$1"; - my @filesegment = FindPhysnameFile("$gCfg{vssdir}/data", $2, $physname); + my @filesegment = &FindPhysnameFile("$gCfg{vssdir}/data", $2, $physname); if (!defined $filesegment[0] || !defined $filesegment[1] || !defined $filesegment[2]) { # physical file doesn't exist; it must have been destroyed later @@ -782,11 +792,11 @@ sub ExportVssPhysFile { # alpha 1 version behavoir. if (! defined $version) { &ThrowWarning("'$physname': no version specified for retrieval"); - + # fall through and try with version 1. $version = 1; } - + if (! -e "$exportdir/$physname.$version" ) { &DoSsCmd("get -b -v$version --force-overwrite \"$physpath\" $exportdir/$physname"); } @@ -825,6 +835,48 @@ EOTXT ############################################################################### sub ShowSummary { + if (keys(%gErr) || $gCfg{resume}) { + print <<"EOTXT"; +============================================================================= + ERROR SUMMARY + +EOTXT + + if($gCfg{resume}) { + print <<"EOTXT"; +**NOTICE** Because this run was resumed from a previous run, this may be only +a partial list; other errors may have been reported during previous run. + +EOTXT + } + + foreach my $task (@{ $gCfg{errortasks} }) { + print "\n$task:\n "; + print join("\n ", @{ $gErr{$task} }),"\n"; + } + } + + print <<"EOTXT"; +============================================================================= + END OF CONVERSION + +The VSS to SVN conversion is complete. You should now use the "svnadmin load" +command to load the generated dumpfile '$gCfg{dumpfile}'. The "svnadmin" +utility is provided as part of the Subversion command-line toolset; use a +command such as the following: + svnadmin load < "$gCfg{dumpfile}" + +You may need to precede this with "svnadmin create " if you have not +yet created a repository. Type "svnadmin help " for more information on +"create" and/or "load". + +If any errors occurred during the conversion, they are summarized above. + +For more information on the vss2svn project, see: +http://www.pumacode.org/projects/vss2svn/ + +EOTXT + my $starttime = ctime($^T); chomp $starttime; my $endtime = ctime(time); @@ -844,16 +896,66 @@ sub ShowSummary { $elapsed = sprintf("%2.2i:%2.2i:%2.2i", $hours, $mins, $secs); } + my($actions, $revisions, $mintime, $maxtime) = &GetStats(); + print <<"EOTXT"; -Started at : $starttime -Ended at : $endtime -Elapsed time : $elapsed (H:M:S) +Started at : $starttime +Ended at : $endtime +Elapsed time : $elapsed (H:M:S) + +VSS Actions read : $actions +SVN Revisions converted : $revisions +Date range (YYYY/MM/DD) : $mintime to $maxtime EOTXT } # End ShowSummary ############################################################################### +# GetStats +############################################################################### +sub GetStats { + my($sql, $actions, $revisions, $mintime, $maxtime); + + $sql = <<"EOSQL"; +SELECT + COUNT(*) +FROM + VssAction +EOSQL + + ($actions) = $gCfg{dbh}->selectrow_array($sql); + + $sql = <<"EOSQL"; +SELECT + COUNT(*) +FROM + SvnRevision +EOSQL + + ($revisions) = $gCfg{dbh}->selectrow_array($sql); + + $sql = <<"EOSQL"; +SELECT + MIN(timestamp), MAX(timestamp) +FROM + PhysicalAction +EOSQL + + ($mintime, $maxtime) = $gCfg{dbh}->selectrow_array($sql); + + foreach($mintime, $maxtime) { + $_ = &Vss2Svn::Dumpfile::SvnTimestamp($_); + s:T.*::; + s:-:/:g; + } + + # initial creation of the repo wasn't considered an action or revision + return($actions - 1, $revisions - 1, $mintime, $maxtime); + +} # End GetStats + +############################################################################### # DoSsCmd ############################################################################### sub DoSsCmd { @@ -919,7 +1021,14 @@ sub ThrowWarning { warn "ERROR -- $msg\n"; - push @gErr, $msg; + my $task = $gCfg{task}; + + if(!defined $gErr{$task}) { + $gErr{$task} = []; + push @{ $gCfg{errortasks} }, $task; + } + + push @{ $gErr{$task} }, $msg; } # End ThrowWarning @@ -1304,6 +1413,10 @@ sub Initialize { $gCfg{starttime} = scalar localtime($^T); + $gCfg{junkdir} = '/lost+found'; + + $gCfg{errortasks} = []; + ### Don't go past here if resuming a previous run ### if ($gCfg{resume}) { return 1; @@ -1312,11 +1425,6 @@ sub Initialize { rmtree($gCfg{vssdata}) if (-e $gCfg{vssdata}); mkdir $gCfg{vssdata}; - #foreach my $check (qw(svnurl)) { - # &GiveHelp("ERROR: missing required parameter $check") - # unless defined $gCfg{$check}; - #} - $gCfg{ssphys} ||= 'SSPHYS.exe'; $gCfg{svn} ||= 'SVN.exe'; @@ -1351,7 +1459,7 @@ OPTIONAL PARAMETERS: --revtimerange : specify the difference between two ss actions that are treated as one subversion revision; default is 3600 seconds (== 1hour) - + --resume : Resume a failed or aborted previous run --task : specify the task to resume; task is one of the following INIT, LOADVSSNAMES, FINDDBFILES, GETPHYSHIST, @@ -1373,7 +1481,7 @@ EOTXT # RollBack is the item view on the activity and BranchFile is the parent side # ==> map RollBack to BRANCH, so that we can join the two actions in the # MergeParentData step - + __DATA__ CreatedProject 1 ADD AddedProject 1 ADD -- 2.11.4.GIT