Add patch from Richard Hughes to fix infinite recursion issue when a former
[vss2svn.git] / script / vss2svn.pl
blob3930e79e0fecd4b38dac39c9351c5318713fefd1
1 #!/usr/bin/perl
3 use warnings;
4 use strict;
6 use Getopt::Long;
7 use DBI;
8 use DBD::SQLite2;
9 use XML::Simple;
10 use File::Find;
11 use File::Path;
12 use Time::CTime;
13 use Data::Dumper;
14 use Benchmark ':hireswallclock';
16 use lib '.';
17 use Vss2Svn::ActionHandler;
18 use Vss2Svn::DataCache;
19 use Vss2Svn::SvnRevHandler;
20 use Vss2Svn::Dumpfile;
22 require Encode;
24 our(%gCfg, %gSth, %gErr, %gFh, $gSysOut, %gActionType, %gNameLookup, %gId);
26 our $VERSION = '0.11.0-nightly.$LastChangedRevision$';
27 $VERSION =~ s/\$.*?(\d+).*\$/$1/; # get only the number out of the svn revision
29 &Initialize;
30 &ConnectDatabase;
32 &SetupGlobals;
33 &ShowHeader;
35 &RunConversion;
37 &ShowSummary;
38 &DisconnectDatabase;
40 ###############################################################################
41 # RunConversion
42 ###############################################################################
43 sub RunConversion {
45 # store a hash of actions to take; allows restarting in case of failed
46 # migration
47 my %joblist =
49 INIT => {handler => sub{ 1; },
50 next => 'LOADVSSNAMES'},
52 # Load the "real" names associated with the stored "short" names
53 LOADVSSNAMES => {handler => \&LoadVssNames,
54 next => 'FINDDBFILES'},
56 # Add a stub entry into the Physical table for each physical
57 # file in the VSS DB
58 FINDDBFILES => {handler => \&FindPhysDbFiles,
59 next => 'GETPHYSHIST'},
61 # Load the history of what happened to the physical files. This
62 # only gets us halfway there because we don't know what the real
63 # filenames are yet
64 GETPHYSHIST => {handler => \&GetPhysVssHistory,
65 next => 'MERGEPARENTDATA'},
67 # Merge data from parent records into child records where possible
68 MERGEPARENTDATA => {handler => \&MergeParentData,
69 next => 'MERGEMOVEDATA'},
71 # Merge data from move actions
72 MERGEMOVEDATA => {handler => \&MergeMoveData,
73 next => 'REMOVETMPCHECKIN'},
75 # Remove temporary check ins
76 REMOVETMPCHECKIN => {handler => \&RemoveTemporaryCheckIns,
77 next => 'MERGEUNPINPIN'},
79 # Remove unnecessary Unpin/pin activities
80 MERGEUNPINPIN => {handler => \&MergeUnpinPinData,
81 next => 'BUILDCOMMENTS'},
83 # Rebuild possible missing comments
84 BUILDCOMMENTS => {handler => \&BuildComments,
85 next => 'BUILDACTIONHIST'},
87 # Take the history of physical actions and convert them to VSS
88 # file actions
89 BUILDACTIONHIST => {handler => \&BuildVssActionHistory,
90 next => 'IMPORTSVN'},
92 # Create a dumpfile or import to repository
93 IMPORTSVN => {handler => \&ImportToSvn,
94 next => 'DONE'},
97 my $info;
99 while ($gCfg{task} ne 'DONE') {
100 $info = $joblist{ $gCfg{task} }
101 or die "FATAL ERROR: Unknown task '$gCfg{task}'\n";
103 print "TASK: $gCfg{task}\n";
104 push @{ $gCfg{tasks} }, $gCfg{task};
106 if ($gCfg{prompt}) {
107 print "Press ENTER to continue...\n";
108 my $temp = <STDIN>;
109 die if $temp =~ m/^quit/i;
112 &{ $info->{handler} };
113 &SetSystemTask( $info->{next} );
116 } # End RunConversion
118 ###############################################################################
119 # LoadVssNames
120 ###############################################################################
121 sub LoadVssNames {
122 &DoSsCmd("info -e$gCfg{encoding} \"$gCfg{vssdatadir}/names.dat\"");
124 my $xs = XML::Simple->new(KeyAttr => [],
125 ForceArray => [qw(NameCacheEntry Entry)],);
127 my $xml = $xs->XMLin($gSysOut);
129 my $namesref = $xml->{NameCacheEntry} || return 1;
131 my($entry, $count, $offset, $name);
133 my $cache = Vss2Svn::DataCache->new('NameLookup')
134 || &ThrowError("Could not create cache 'NameLookup'");
136 ENTRY:
137 foreach $entry (@$namesref) {
138 $count = $entry->{NrOfEntries};
139 $offset = $entry->{offset};
141 # The cache can contain 4 different entries:
142 # id=1: abbreviated DOS 8.3 name for file items
143 # id=2: full name for file items
144 # id=3: abbreviated 27.3 name for file items
145 # id=10: full name for project items
146 # Both ids 1 and 3 are not of any interest for us, since they only
147 # provide abbreviated names for different szenarios. We are only
148 # interested if we have id=2 for file items, or id=10 for project
149 # items.
150 foreach $name (@{$entry->{Entry}}) {
151 if ($name->{id} == 10 || $name->{id} == 2) {
152 $cache->add($offset, $name->{content});
157 $cache->commit();
158 } # End LoadVssNames
160 ###############################################################################
161 # FindPhysDbFiles
162 ###############################################################################
163 sub FindPhysDbFiles {
165 my $cache = Vss2Svn::DataCache->new('Physical')
166 || &ThrowError("Could not create cache 'Physical'");
168 find(sub{ &FoundSsFile($cache) }, $gCfg{vssdatadir});
170 $cache->commit();
172 } # End FindPhysDbFiles
174 ###############################################################################
175 # FoundSsFile
176 ###############################################################################
177 sub FoundSsFile {
178 my($cache) = @_;
180 my $path = $File::Find::name;
181 return if (-d $path);
183 my $vssdatadir = quotemeta($gCfg{vssdatadir});
185 if ($path =~ m:^$vssdatadir/./([a-z]{8})$:i) {
186 $cache->add(uc($1));
189 } # End FoundSsFile
191 ###############################################################################
192 # GetPhysVssHistory
193 ###############################################################################
194 sub GetPhysVssHistory {
195 my($sql, $sth, $row, $physname, $physdir);
197 &LoadNameLookup;
198 my $cache = Vss2Svn::DataCache->new('PhysicalAction', 1)
199 || &ThrowError("Could not create cache 'PhysicalAction'");
201 $sql = "SELECT * FROM Physical";
202 $sth = $gCfg{dbh}->prepare($sql);
203 $sth->execute();
205 my $xs = XML::Simple->new(ForceArray => [qw(Version)]);
207 while (defined($row = $sth->fetchrow_hashref() )) {
208 $physname = $row->{physname};
210 $physdir = "$gCfg{vssdir}/data";
211 my $physfolder = substr($physname, 0, 1);
213 &GetVssPhysInfo($cache, $physdir, $physfolder, $physname, $xs);
216 $cache->commit();
218 } # End GetPhysVssHistory
220 ###############################################################################
221 # FindPhysnameFile
222 ###############################################################################
223 sub FindPhysnameFile {
224 my($physdir, $physfolder, $physname) = @_;
226 # return it if we can find it without any alteration
227 return ($physdir, $physfolder, $physname) if -f "$physdir/$physfolder/$physname";
228 my $lcphysname = lc($physname);
229 my $lcphysfolder = lc($physfolder);
231 # try finding lowercase folder/filename
232 return ($physdir, $lcphysfolder, $lcphysname) if -f "$physdir/$lcphysfolder/$lcphysname";
234 # try finding lowercase folder/uppercase filename
235 return ($physdir, $lcphysfolder, $physname) if -f "$physdir/$lcphysfolder/$physname";
237 # haven't seen this one, but try it...
238 return ($physdir, $physfolder, $lcphysname) if -f "$physdir/$physfolder/$lcphysname";
240 # no idea what to return...
241 return (undef, undef, undef);
244 ###############################################################################
245 # GetVssPhysInfo
246 ###############################################################################
247 sub GetVssPhysInfo {
248 my($cache, $physdir, $physfolder, $physname, $xs) = @_;
250 my @filesegment = &FindPhysnameFile($physdir, $physfolder, $physname);
252 print "physdir: \"$filesegment[0]\", physfolder: \"$filesegment[1]\" physname: \"$filesegment[2]\"\n" if $gCfg{debug};
254 if (!defined $filesegment[0] || !defined $filesegment[1]
255 || !defined $filesegment[2]) {
256 # physical file doesn't exist; it must have been destroyed later
257 &ThrowWarning("Can't retrieve info from physical file "
258 . "'$physname'; it was either destroyed or corrupted");
259 return;
262 &DoSsCmd("info -e$gCfg{encoding} \"$filesegment[0]/$filesegment[1]/$filesegment[2]\"");
264 my $xml = $xs->XMLin($gSysOut);
265 my $parentphys;
267 my $iteminfo = $xml->{ItemInfo};
269 if (!defined($iteminfo) || !defined($iteminfo->{Type}) ||
270 ref($iteminfo->{Type})) {
272 &ThrowWarning("Can't handle file '$physname'; not a project or file\n");
273 return;
276 if ($iteminfo->{Type} == 1) {
277 $parentphys = (uc($physname) eq 'AAAAAAAA')?
278 '' : &GetProjectParent($xml);
279 } elsif ($iteminfo->{Type} == 2) {
280 $parentphys = undef;
281 } else {
282 &ThrowWarning("Can't handle file '$physname'; not a project or file\n");
283 return;
286 &GetVssItemVersions($cache, $physname, $parentphys, $xml);
288 } # End GetVssPhysInfo
290 ###############################################################################
291 # GetProjectParent
292 ###############################################################################
293 sub GetProjectParent {
294 my($xml) = @_;
296 no warnings 'uninitialized';
297 return $xml->{ItemInfo}->{ParentPhys} || undef;
299 } # End GetProjectParent
301 ###############################################################################
302 # GetVssItemVersions
303 ###############################################################################
304 sub GetVssItemVersions {
305 my($cache, $physname, $parentphys, $xml) = @_;
307 return 0 unless defined $xml->{Version};
309 my($parentdata, $version, $vernum, $action, $name, $actionid, $actiontype,
310 $tphysname, $itemname, $itemtype, $parent, $user, $timestamp, $comment,
311 $is_binary, $info, $priority, $sortkey, $label, $cachename);
313 my $last_timestamp = 0;
315 VERSION:
316 foreach $version (@{ $xml->{Version} }) {
317 $action = $version->{Action};
318 $name = $action->{SSName};
319 $tphysname = $action->{Physical} || $physname;
320 $user = $version->{UserName};
322 $itemname = &GetItemName($name);
324 $actionid = $action->{ActionId};
325 $info = $gActionType{$actionid};
327 if (!$info) {
328 &ThrowWarning ("'$physname': Unknown action '$actionid'\n");
329 next VERSION;
332 # check the linear order of timestamps. It could be done better, for
333 # example checking the next version and calculate the middle time stamp
334 # but regardless of what we do here, the result is erroneous, since it
335 # will mess up the labeling.
336 $timestamp = $version->{Date};
337 if ($timestamp < $last_timestamp) {
338 $timestamp = $last_timestamp + 1;
339 &ThrowWarning ("'$physname': wrong timestamp at version "
340 . "'$version->{VersionNumber}'; setting timestamp to "
341 . "'$timestamp'");
343 $last_timestamp = $timestamp;
345 $itemtype = $info->{type};
346 $actiontype = $info->{action};
348 if ($actiontype eq 'IGNORE') {
349 next VERSION;
352 $comment = undef;
353 $is_binary = 0;
354 $info = undef;
355 $parentdata = 0;
356 $priority = 5;
357 $label = undef;
359 if ($version->{Comment} && !ref($version->{Comment})) {
360 $comment = $version->{Comment} || undef;
363 # In case of Label the itemtype is the type of the item currently
364 # under investigation
365 if ($actiontype eq 'LABEL') {
366 my $iteminfo = $xml->{ItemInfo};
367 $itemtype = $iteminfo->{Type};
371 # we can have label actions and labes attached to versions
372 if (defined $action->{Label} && !ref($action->{Label})) {
373 $label = $action->{Label};
375 # append the label comment to a possible version comment
376 if ($action->{LabelComment} && !ref($action->{LabelComment})) {
377 if (defined $comment) {
378 print "Merging LabelComment and Comment for "
379 . "'$tphysname;$version->{VersionNumber}'\n"; # if $gCfg{verbose};
380 $comment .= "\n";
383 $comment .= $action->{LabelComment} || undef;
387 if (defined($comment)) {
388 $comment =~ s/^\s+//s;
389 $comment =~ s/\s+$//s;
392 if ($itemtype == 1 && uc($physname) eq 'AAAAAAAA'
393 && ref($tphysname)) {
395 $tphysname = $physname;
396 $itemname = '';
397 } elsif ($physname ne $tphysname) {
398 # If version's physical name and file's physical name are different,
399 # this is a project describing an action on a child item. Most of
400 # the time, this very same data will be in the child's physical
401 # file and with more detail (such as check-in comment).
403 # However, in some cases (such as renames, or when the child's
404 # physical file was later purged), this is the only place we'll
405 # have the data; also, sometimes the child record doesn't even
406 # have enough information about itself (such as which project it
407 # was created in and which project(s) it's shared in).
409 # So, for a parent record describing a child action, we'll set a
410 # flag, then combine them in the next phase.
412 $parentdata = 1;
414 # OK, since we're describing an action in the child, the parent is
415 # actually this (project) item
417 $parentphys = $physname;
418 } else {
419 $parentphys = undef;
422 if ($itemtype == 1) {
423 $itemname .= '/';
424 } elsif (defined($xml->{ItemInfo}) &&
425 defined($xml->{ItemInfo}->{Binary}) &&
426 $xml->{ItemInfo}->{Binary}) {
428 $is_binary = 1;
431 if ($actiontype eq 'RENAME') {
432 # if a rename, we store the new name in the action's 'info' field
434 $info = &GetItemName($action->{NewSSName});
436 if ($itemtype == 1) {
437 $info .= '/';
439 } elsif ($actiontype eq 'BRANCH') {
440 $info = $action->{Parent};
443 $vernum = ($parentdata)? undef : $version->{VersionNumber};
445 # since there is no corresponding client action for PIN, we need to
446 # enter the concrete version number here manually
447 # In a share action the pinnedToVersion attribute can also be set
448 # if ($actiontype eq 'PIN') {
449 $vernum = $action->{PinnedToVersion} if (defined $action->{PinnedToVersion});
452 # for unpin actions also remeber the unpinned version
453 $info = $action->{UnpinnedFromVersion} if (defined $action->{UnpinnedFromVersion});
455 $priority -= 4 if $actiontype eq 'ADD'; # Adds are always first
456 $priority -= 3 if $actiontype eq 'SHARE';
457 $priority -= 3 if $actiontype eq 'PIN';
458 $priority -= 2 if $actiontype eq 'BRANCH';
460 # store the reversed physname as a sortkey; a bit wasteful but makes
461 # debugging easier for the time being...
462 $sortkey = reverse($tphysname);
464 $cache->add($tphysname, $vernum, $parentphys, $actiontype, $itemname,
465 $itemtype, $timestamp, $user, $is_binary, $info, $priority,
466 $sortkey, $parentdata, $label, $comment);
468 # Handle version labels as a secondary action for the same version
469 # version labels and label action use the same location to store the
470 # label. Therefore it is not possible to assign a version label to
471 # version where the actiontype was LABEL. But ssphys will report the
472 # same label twice. Therefore filter the Labeling versions here.
473 if (defined $version->{Label} && !ref($version->{Label})
474 && $actiontype ne 'LABEL') {
475 my ($labelComment);
477 if ($version->{LabelComment} && !ref($version->{LabelComment})) {
478 $labelComment = $version->{LabelComment};
480 else {
481 $labelComment = "assigned label '$version->{Label}' to version $vernum of physical file '$tphysname'";
483 $cache->add($tphysname, $vernum, $parentphys, 'LABEL', $itemname,
484 $itemtype, $timestamp, $user, $is_binary, $info, 5,
485 $sortkey, $parentdata, $version->{Label}, $labelComment);
489 } # End GetVssItemVersions
491 ###############################################################################
492 # GetItemName
493 ###############################################################################
494 sub GetItemName {
495 my($nameelem) = @_;
497 my $itemname = $nameelem->{content};
499 if (defined($nameelem->{offset})) {
500 # see if we have a better name in the cache
501 my $cachename = $gNameLookup{ $nameelem->{offset} };
503 if (defined($cachename)) {
504 print "Changing name of '$itemname' to '$cachename' from "
505 . "name cache\n" if $gCfg{debug};
506 $itemname = $cachename;
510 return $itemname;
512 } # End GetItemName
514 ###############################################################################
515 # LoadNameLookup
516 ###############################################################################
517 sub LoadNameLookup {
518 my($sth, $row);
520 $sth = $gCfg{dbh}->prepare('SELECT offset, name FROM NameLookup');
521 $sth->execute();
523 while(defined($row = $sth->fetchrow_hashref() )) {
524 $gNameLookup{ $row->{offset} } = Encode::decode_utf8( $row->{name} );
526 } # End LoadNameLookup
528 ###############################################################################
529 # MergeParentData
530 ###############################################################################
531 sub MergeParentData {
532 # VSS has a funny way of not placing enough information to rebuild history
533 # in one data file; for example, renames are stored in the parent project
534 # rather than in that item's data file. Also, it's sometimes impossible to
535 # tell from a child record which was eventually shared to multiple folders,
536 # which folder it was originally created in.
538 # So, at this stage we look for any parent records which described child
539 # actions, then update those records with data from the child objects. We
540 # then delete the separate child objects to avoid duplication.
542 my($sth, $rows, $row);
543 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
544 . 'WHERE parentdata > 0');
545 $sth->execute();
547 # need to pull in all recs at once, since we'll be updating/deleting data
548 $rows = $sth->fetchall_arrayref( {} );
550 my($childrecs, $child, $id, $depth);
551 my @delchild = ();
553 foreach $row (@$rows) {
554 $childrecs = &GetChildRecs($row);
556 if (scalar @$childrecs > 1) {
557 &ThrowWarning("Multiple child recs for parent rec "
558 . "'$row->{action_id}'");
561 $depth = &GetPathDepth($row);
563 foreach $child (@$childrecs) {
564 &UpdateParentRec($row, $child);
565 push(@delchild, $child->{action_id});
569 foreach $id (@delchild) {
570 &DeleteChildRec($id);
575 } # End MergeParentData
577 ###############################################################################
578 # GetPathDepth
579 ###############################################################################
580 sub GetPathDepth {
581 my($row) = @_;
583 # If we've already worked out the depth of this row, return it immediately
584 if ($row->{parentdata} > 1) {
585 return $row->{parentdata};
588 my($maxParentDepth, $depth, $parents, $parent);
590 # Get the row(s) corresponding to the parent(s) of this row, and work out
591 # the maximum depth
593 my $sql = <<"EOSQL";
594 SELECT
596 FROM
597 PhysicalAction
598 WHERE
599 parentdata > 0
600 AND physname = ?
601 AND actiontype = ?
602 AND timestamp <= ?
603 EOSQL
605 my $sth = $gCfg{dbh}->prepare($sql);
606 $sth->execute( @{ $row }{qw(parentphys actiontype timestamp)} );
608 $parents = $sth->fetchall_arrayref( {} );
609 $maxParentDepth = 0;
610 foreach $parent (@$parents) {
611 $depth = &GetPathDepth($parent);
612 $maxParentDepth = ($depth > $maxParentDepth) ? $depth : $maxParentDepth;
615 # Depth of this path becomes one more than the maximum parent depth
616 $depth = $maxParentDepth + 1;
618 # Update the row for this record
619 &UpdateDepth($row, $depth);
621 return $depth;
622 } # End GetPathDepth
624 ###############################################################################
625 # UpdateDepth
626 ###############################################################################
627 sub UpdateDepth {
628 my($row, $depth) = @_;
630 my $sql = <<"EOSQL";
631 UPDATE
632 PhysicalAction
634 parentdata = ?
635 WHERE
636 action_id = ?
637 EOSQL
639 my $sth = $gCfg{dbh}->prepare($sql);
640 $sth->execute( $depth, $row->{action_id} );
642 } # End UpdateDepth
644 ###############################################################################
645 # GetChildRecs
646 ###############################################################################
647 sub GetChildRecs {
648 my($parentrec, $parentdata) = @_;
650 # Here we need to find any child rows which give us additional info on the
651 # parent rows. There's no definitive way to find matching rows, but joining
652 # on physname, actiontype, timestamp, and author gets us close. The problem
653 # is that the "two" actions may not have happened in the exact same second,
654 # so we need to also look for any that are some time apart and hope
655 # we don't get the wrong row.
657 $parentdata = 0 unless defined $parentdata;
659 my $sql = <<"EOSQL";
660 SELECT
662 FROM
663 PhysicalAction
664 WHERE
665 parentdata = ?
666 AND physname = ?
667 AND actiontype = ?
668 AND author = ?
669 ORDER BY
670 ABS(? - timestamp)
671 EOSQL
673 my $sth = $gCfg{dbh}->prepare($sql);
674 $sth->execute( $parentdata, @{ $parentrec }{qw(physname actiontype author timestamp)} );
676 return $sth->fetchall_arrayref( {} );
677 } # End GetChildRecs
679 ###############################################################################
680 # UpdateParentRec
681 ###############################################################################
682 sub UpdateParentRec {
683 my($row, $child) = @_;
685 # The child record has the "correct" version number (relative to the child
686 # and not the parent), as well as the comment info and whether the file is
687 # binary
689 my $comment;
692 no warnings 'uninitialized';
693 $comment = "$row->{comment}\n$child->{comment}";
694 $comment =~ s/^\n+//;
695 $comment =~ s/\n+$//;
698 my $sql = <<"EOSQL";
699 UPDATE
700 PhysicalAction
702 version = ?,
703 is_binary = ?,
704 comment = ?
705 WHERE
706 action_id = ?
707 EOSQL
709 my $sth = $gCfg{dbh}->prepare($sql);
710 $sth->execute( $child->{version}, $child->{is_binary}, $comment,
711 $row->{action_id} );
713 } # End UpdateParentRec
715 ###############################################################################
716 # MergeMoveData
717 ###############################################################################
718 sub MergeMoveData {
719 # Similar to the MergeParentData, the MergeMove Data combines two the src
720 # and target move actions into one move action. Since both items are parents
721 # the MergeParentData function can not deal with this specific problem
723 my($sth, $rows, $row);
724 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
725 . 'WHERE actiontype = "MOVE_FROM"');
726 $sth->execute();
728 # need to pull in all recs at once, since we'll be updating/deleting data
729 $rows = $sth->fetchall_arrayref( {} );
731 my($childrecs, $child, $id);
733 foreach $row (@$rows) {
734 $row->{actiontype} = 'MOVE_TO';
735 $childrecs = &GetChildRecs($row, 1);
737 my $source = undef;
738 my $target = $row->{parentphys};
740 if (scalar @$childrecs > 1) {
741 &ThrowWarning("Multiple child recs for parent MOVE rec "
742 . "'$row->{action_id}'");
745 if (scalar @$childrecs >= 1) {
746 # only merge MOVE records that have the same timestamp
747 if ($row->{timestamp} == @$childrecs[0]->{timestamp}) {
748 $source = @$childrecs[0]->{parentphys};
749 &DeleteChildRec(@$childrecs[0]->{action_id});
753 my $sql = <<"EOSQL";
754 UPDATE
755 PhysicalAction
757 actiontype = 'MOVE',
758 parentphys = ?,
759 info = ?
760 WHERE
761 action_id = ?
762 EOSQL
763 my $update;
764 $update = $gCfg{dbh}->prepare($sql);
766 $update->execute( $target, $source, $row->{action_id});
770 # change all remaining MOVE_TO records into MOVE records and swap the src and target
771 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
772 . 'WHERE actiontype = "MOVE_TO"');
773 $sth->execute();
774 $rows = $sth->fetchall_arrayref( {} );
776 foreach $row (@$rows) {
777 my $update;
778 $update = $gCfg{dbh}->prepare('UPDATE PhysicalAction SET '
779 . 'actiontype = "MOVE", '
780 . 'parentphys = ?, '
781 . 'info = ? '
782 . 'WHERE action_id = ?');
783 $update->execute($row->{info}, $row->{parentphys}, $row->{action_id});
789 } # End MergeMoveData
791 ###############################################################################
792 # RemoveTemporaryCheckIns
793 # remove temporary checkins that where create to detect MS VSS capabilities
794 ###############################################################################
795 sub RemoveTemporaryCheckIns {
796 my($sth, $rows, $row);
797 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
798 . 'WHERE comment = "Temporary file created by Visual Studio .NET to detect Microsoft Visual SourceSafe capabilities."'
799 . ' AND actiontype = "ADD"'
800 . ' AND itemtype = 2'); # only delete files, not projects
801 $sth->execute();
803 # need to pull in all recs at once, since we'll be updating/deleting data
804 $rows = $sth->fetchall_arrayref( {} );
806 foreach $row (@$rows) {
807 my $physname = $row->{physname};
809 my $sql = 'SELECT * FROM PhysicalAction WHERE physname = ?';
810 my $update = $gCfg{dbh}->prepare($sql);
812 $update->execute( $physname );
814 # need to pull in all recs at once, since we'll be updating/deleting data
815 my $recs = $update->fetchall_arrayref( {} );
817 foreach my $rec (@$recs) {
818 print "Remove action_id $rec->{action_id}, $rec->{physname}, $rec->{actiontype}, $rec->{itemname}\n";
819 print " $rec->{comment}\n" if defined ($rec->{comment});
820 &DeleteChildRec($rec->{action_id});
827 ###############################################################################
828 # MergeUnpinPinData
829 ###############################################################################
830 sub MergeUnpinPinData {
831 my($sth, $rows, $row, $r, $next_row);
832 my $sql = 'SELECT * FROM PhysicalAction ORDER BY timestamp ASC, '
833 . 'itemtype ASC, priority ASC, parentdata ASC, sortkey ASC, action_id ASC';
834 $sth = $gCfg{dbh}->prepare($sql);
835 $sth->execute();
837 # need to pull in all recs at once, since we'll be updating/deleting data
838 $rows = $sth->fetchall_arrayref( {} );
840 return if ($rows == -1);
841 return if (@$rows < 2);
843 my @delchild = ();
845 for $r (0 .. @$rows-2) {
846 $row = $rows->[$r];
848 if ($row->{actiontype} eq 'PIN' && !defined $row->{version}) # UNPIN
850 # Search for a matching pin action
851 my $u;
852 for ($u = $r+1; $u <= @$rows-2; $u++) {
853 $next_row = $rows->[$u];
855 if ( $next_row->{actiontype} eq 'PIN'
856 && defined $next_row->{version} # PIN
857 && $row->{physname} eq $next_row->{physname}
858 && $row->{parentphys} eq $next_row->{parentphys}
859 # && $next_row->{timestamp} - $row->{timestamp} < 60
860 # && $next_row->{action_id} - $row->{action_id} == 1
862 print "found UNPIN/PIN combination for $row->{parentphys}/$row->{physname}"
863 . "($row->{itemname}) @ ID $row->{action_id}\n" if $gCfg{verbose};
865 # if we have a unpinFromVersion number copy this one to the PIN handler
866 if (defined $row->{info})
868 my $sql2 = "UPDATE PhysicalAction SET info = ? WHERE action_id = ?";
869 my $sth2 = $gCfg{dbh}->prepare($sql2);
870 $sth2->execute($row->{info}, $next_row->{action_id});
873 push (@delchild, $row->{action_id});
876 # if the next action is anything else than a pin stop the search
877 $u = @$rows if ($next_row->{actiontype} ne 'PIN' );
882 my $id;
883 foreach $id (@delchild) {
884 &DeleteChildRec($id);
889 } # End MergeUnpinPinData
891 ###############################################################################
892 # BuildComments
893 ###############################################################################
894 sub BuildComments {
895 my($sth, $rows, $row, $r, $next_row);
896 my $sql = 'SELECT * FROM PhysicalAction WHERE actiontype="PIN" AND itemtype=2 ORDER BY physname ASC';
897 $sth = $gCfg{dbh}->prepare($sql);
898 $sth->execute();
900 # need to pull in all recs at once, since we'll be updating/deleting data
901 $rows = $sth->fetchall_arrayref( {} );
903 foreach $row (@$rows) {
905 # technically we have the following situations:
906 # PIN only: we come from the younger version and PIN to a older one: the
907 # younger version is the currenty version of the timestamp of the PIN action
908 # UNPIN only: we unpin from a older version to the current version, the
909 # timestamp of the action will again define the younger version
910 # UNPIN/PIN with known UNPIN version: we merge from UNPIN version to PIN version
911 # UNPIN/PIN with unknown UNPIN version: we are lost in this case and we
912 # can not distinguish this case from the PIN only case.
914 my $sql2;
915 my $prefix;
917 # PIN only
918 if ( defined $row->{version} # PIN version number
919 && !defined $row->{info}) { # no UNPIN version number
920 $sql2 = 'SELECT * FROM PhysicalAction'
921 . ' WHERE physname="' . $row->{physname} . '"'
922 . ' AND parentphys ISNULL'
923 . ' AND itemtype=2'
924 . ' AND version>=' . $row->{version}
925 . ' AND timestamp<=' . $row->{timestamp}
926 . ' ORDER BY version DESC';
927 $prefix = "reverted changes for: \n";
930 # UNPIN only
931 if ( !defined $row->{version} # no PIN version number
932 && defined $row->{info}) { # UNPIN version number
933 $sql2 = 'SELECT * FROM PhysicalAction'
934 . ' WHERE physname="' . $row->{physname} . '"'
935 . ' AND parentphys ISNULL'
936 . ' AND itemtype=2'
937 . ' AND timestamp<=' . $row->{timestamp}
938 . ' AND version>' . $row->{info}
939 . ' ORDER BY version ASC';
942 # UNPIN/PIN
943 if ( defined $row->{version} # PIN version number
944 && defined $row->{info}) { # UNPIN version number
945 $sql2 = 'SELECT * FROM PhysicalAction'
946 . ' WHERE physname="' . $row->{physname} . '"'
947 . ' AND parentphys ISNULL'
948 . ' AND itemtype=2'
949 . ' AND version>' . $row->{info}
950 . ' AND version<=' . $row->{version}
951 . ' ORDER BY version ';
953 if ($row->{info} > $row->{version}) {
954 $sql2 .= "DESC";
955 $prefix = "reverted changes for: \n";
957 else {
958 $sql2 .= "ASC";
963 next if !defined $sql2;
965 my $sth2 = $gCfg{dbh}->prepare($sql2);
966 $sth2->execute();
968 my $comments = $sth2->fetchall_arrayref( {} );
969 my $comment;
970 print "merging comments for $row->{physname}" if $gCfg{verbose};
971 print " from $row->{info}" if ($gCfg{verbose} && defined $row->{info});
972 print " to $row->{version}" if ($gCfg{verbose} && defined $row->{version});
973 print "\n" if $gCfg{verbose};
975 foreach my $c(@$comments) {
976 print " $c->{version}: $c->{comment}\n" if $gCfg{verbose};
977 $comment .= $c->{comment} . "\n";
978 $comment =~ s/^\n+//;
979 $comment =~ s/\n+$//;
982 if (defined $comment && !defined $row->{comment}) {
983 $comment = $prefix . $comment if defined $prefix;
984 $comment =~ s/"/""/g;
985 my $sql3 = 'UPDATE PhysicalAction SET comment="' . $comment . '" WHERE action_id = ' . $row->{action_id};
986 my $sth3 = $gCfg{dbh}->prepare($sql3);
987 $sth3->execute();
992 } # End BuildComments
994 ###############################################################################
995 # DeleteChildRec
996 ###############################################################################
997 sub DeleteChildRec {
998 my($id) = @_;
1000 my $sql = "DELETE FROM PhysicalAction WHERE action_id = ?";
1002 my $sth = $gCfg{dbh}->prepare($sql);
1003 $sth->execute($id);
1004 } # End DeleteChildRec
1006 ###############################################################################
1007 # BuildVssActionHistory
1008 ###############################################################################
1009 sub BuildVssActionHistory {
1010 my $vsscache = Vss2Svn::DataCache->new('VssAction', 1)
1011 || &ThrowError("Could not create cache 'VssAction'");
1013 my $joincache = Vss2Svn::DataCache->new('SvnRevisionVssAction')
1014 || &ThrowError("Could not create cache 'SvnRevisionVssAction'");
1016 my $labelcache = Vss2Svn::DataCache->new('Label')
1017 || &ThrowError("Could not create cache 'Label'");
1019 # This will keep track of the current SVN revision, and increment it when
1020 # the author or comment changes, the timestamps span more than an hour
1021 # (by default), or the same physical file is affected twice
1023 my $svnrevs = Vss2Svn::SvnRevHandler->new()
1024 || &ThrowError("Could not create SVN revision handler");
1025 $svnrevs->{verbose} = $gCfg{verbose};
1027 my($sth, $row, $action, $handler, $physinfo, $itempaths, $allitempaths);
1029 my $sql = 'SELECT * FROM PhysicalAction ORDER BY timestamp ASC, '
1030 . 'itemtype ASC, priority ASC, parentdata ASC, sortkey ASC, action_id ASC';
1032 $sth = $gCfg{dbh}->prepare($sql);
1033 $sth->execute();
1035 ROW:
1036 while(defined($row = $sth->fetchrow_hashref() )) {
1037 $action = $row->{actiontype};
1039 $handler = Vss2Svn::ActionHandler->new($row);
1040 $handler->{verbose} = $gCfg{verbose};
1041 $handler->{trunkdir} = $gCfg{trunkdir};
1042 $physinfo = $handler->physinfo();
1044 if (defined($physinfo) && $physinfo->{type} != $row->{itemtype} ) {
1045 &ThrowWarning("Inconsistent item type for '$row->{physname}'; "
1046 . "'$row->{itemtype}' unexpected");
1047 next ROW;
1050 $row->{itemname} = Encode::decode_utf8( $row->{itemname} );
1051 $row->{info} = Encode::decode_utf8( $row->{info} );
1052 $row->{comment} = Encode::decode_utf8( $row->{comment} );
1053 $row->{author} = Encode::decode_utf8( $row->{author} );
1054 $row->{label} = Encode::decode_utf8( $row->{label} );
1056 # The handler's job is to keep track of physical-to-real name mappings
1057 # and return the full item paths corresponding to the physical item. In
1058 # case of a rename, it will return the old name, so we then do another
1059 # lookup on the new name.
1061 # Commits and renames can apply to multiple items if that item is
1062 # shared; since SVN has no notion of such shares, we keep track of
1063 # those ourself and replicate the functionality using multiple actions.
1065 if (!$handler->handle($action)) {
1066 &ThrowWarning($handler->{errmsg})
1067 if $handler->{errmsg};
1068 next ROW;
1071 $itempaths = $handler->{itempaths};
1073 # In cases of a corrupted share source, the handler may change the
1074 # action from 'SHARE' to 'ADD'
1075 $row->{actiontype} = $handler->{action};
1077 if (!defined $itempaths) {
1078 # Couldn't determine name of item
1079 &ThrowWarning($handler->{errmsg})
1080 if $handler->{errmsg};
1082 # If we were adding or modifying a file, commit it to lost+found;
1083 # otherwise give up on it
1084 if ($row->{itemtype} == 2 && ($row->{actiontype} eq 'ADD' ||
1085 $row->{actiontype} eq 'COMMIT')) {
1087 $itempaths = [undef];
1088 } else {
1089 next ROW;
1093 # we need to check for the next rev number, after all pathes that can
1094 # prematurally call the next row. Otherwise, we get an empty revision.
1095 $svnrevs->check($row);
1097 # May contain add'l info for the action depending on type:
1098 # RENAME: the new name (without path)
1099 # SHARE: the source path which was shared
1100 # MOVE: the old path
1101 # PIN: the path of the version that was pinned
1102 # LABEL: the name of the label
1103 $row->{info} = $handler->{info};
1105 # The version may have changed
1106 if (defined $handler->{version}) {
1107 $row->{version} = $handler->{version};
1110 $allitempaths = join("\t", @$itempaths);
1111 $row->{itempaths} = $allitempaths;
1113 $vsscache->add(@$row{ qw(parentphys physname version actiontype itempaths
1114 itemtype is_binary info) });
1115 $joincache->add( $svnrevs->{revnum}, $vsscache->{pkey} );
1117 if (defined $row->{label}) {
1118 $labelcache->add(@$row{ qw(physname version label itempaths) });
1123 $vsscache->commit();
1124 $svnrevs->commit();
1125 $joincache->commit();
1126 $labelcache->commit();
1128 } # End BuildVssActionHistory
1130 ###############################################################################
1131 # ImportToSvn
1132 ###############################################################################
1133 sub ImportToSvn {
1134 # For the time being, we support only creating a dumpfile and not directly
1135 # importing to SVN. We could perhaps add this functionality by making the
1136 # CreateSvnDumpfile logic more generic and using polymorphism to switch out
1137 # the Vss2Svn::Dumpfile object with one that handles imports.
1139 &CreateSvnDumpfile;
1140 } # End ImportToSvn
1142 ###############################################################################
1143 # CreateSvnDumpfile
1144 ###############################################################################
1145 sub CreateSvnDumpfile {
1146 my $fh;
1148 my $file = $gCfg{dumpfile};
1149 open $fh, ">$file"
1150 or &ThrowError("Could not create dumpfile '$file'");
1152 my($sql, $sth, $action_sth, $row, $revision, $actions, $action, $physname, $itemtype);
1154 my %exported = ();
1156 $sql = 'SELECT * FROM SvnRevision ORDER BY revision_id ASC';
1158 $sth = $gCfg{dbh}->prepare($sql);
1159 $sth->execute();
1161 $sql = <<"EOSQL";
1162 SELECT * FROM
1163 VssAction
1164 WHERE action_id IN
1165 (SELECT action_id FROM SvnRevisionVssAction WHERE revision_id = ?)
1166 ORDER BY action_id
1167 EOSQL
1169 $action_sth = $gCfg{dbh}->prepare($sql);
1171 my $autoprops = Vss2Svn::Dumpfile::AutoProps->new($gCfg{auto_props}) if $gCfg{auto_props};
1172 my $labelmapper = Vss2Svn::Dumpfile::LabelMapper->new($gCfg{label_mapper}) if $gCfg{label_mapper};
1173 my $dumpfile = Vss2Svn::Dumpfile->new($fh, $autoprops, $gCfg{md5}, $labelmapper);
1174 Vss2Svn::Dumpfile->SetTempDir($gCfg{tempdir});
1176 REVISION:
1177 while(defined($row = $sth->fetchrow_hashref() )) {
1179 my $t0 = new Benchmark;
1181 $revision = $row->{revision_id};
1182 $dumpfile->begin_revision($row);
1184 # next REVISION if $revision == 0;
1186 $action_sth->execute($revision);
1187 $actions = $action_sth->fetchall_arrayref( {} );
1189 ACTION:
1190 foreach $action(@$actions) {
1191 $physname = $action->{physname};
1192 $itemtype = $action->{itemtype};
1194 # if (!exists $exported{$physname}) {
1195 my $version = $action->{version};
1196 if ( !defined $version
1197 && ( $action->{action} eq 'ADD'
1198 || $action->{action} eq 'COMMIT')) {
1199 &ThrowWarning("'$physname': no version specified for retrieval");
1201 # fall through and try with version 1.
1202 $version = 1;
1205 if ($itemtype == 2 && defined $version) {
1206 $exported{$physname} = &ExportVssPhysFile($physname, $version);
1207 } else {
1208 $exported{$physname} = undef;
1212 # do_action needs to know the revision_id, so paste it on
1213 $action->{revision_id} = $revision;
1214 $dumpfile->do_action($action, $exported{$physname});
1216 print "revision $revision: ", timestr(timediff(new Benchmark, $t0)),"\n"
1217 if $gCfg{timing};
1220 my @err = @{ $dumpfile->{errors} };
1222 if (scalar @err > 0) {
1223 map { &ThrowWarning($_) } @err;
1226 $dumpfile->finish();
1227 close $fh;
1229 } # End CreateSvnDumpfile
1231 ###############################################################################
1232 # ExportVssPhysFile
1233 ###############################################################################
1234 sub ExportVssPhysFile {
1235 my($physname, $version) = @_;
1237 $physname =~ m/^((.).)/;
1239 my $exportdir = "$gCfg{vssdata}/$1";
1240 my @filesegment = &FindPhysnameFile("$gCfg{vssdir}/data", $2, $physname);
1242 if (!defined $filesegment[0] || !defined $filesegment[1] || !defined $filesegment[2]) {
1243 # physical file doesn't exist; it must have been destroyed later
1244 &ThrowWarning("Can't retrieve revisions from physical file "
1245 . "'$physname'; it was either destroyed or corrupted");
1246 return undef;
1248 my $physpath = "$filesegment[0]/$filesegment[1]/$filesegment[2]";
1250 if (! -f $physpath) {
1251 # physical file doesn't exist; it must have been destroyed later
1252 &ThrowWarning("Can't retrieve revisions from physical file "
1253 . "'$physname'; it was either destroyed or corrupted");
1254 return undef;
1257 mkpath($exportdir) if ! -e $exportdir;
1259 # MergeParentData normally will merge two corresponding item and parent
1260 # actions. But if the actions are more appart than the maximum allowed
1261 # timespan, we will end up with an undefined version in an ADD action here
1262 # As a hot fix, we define the version to 1, which will also revert to the
1263 # alpha 1 version behavoir.
1264 if (! defined $version) {
1265 &ThrowWarning("'$physname': no version specified for retrieval");
1267 # fall through and try with version 1.
1268 $version = 1;
1271 if (! -e "$exportdir/$physname.$version" ) {
1272 &DoSsCmd("get -b -v$version --force-overwrite -e$gCfg{encoding} \"$physpath\" $exportdir/$physname");
1275 return $exportdir;
1276 } # End ExportVssPhysFile
1278 ###############################################################################
1279 # ShowHeader
1280 ###############################################################################
1281 sub ShowHeader {
1282 my $info = $gCfg{task} eq 'INIT'? 'BEGINNING CONVERSION...' :
1283 "RESUMING CONVERSION FROM TASK '$gCfg{task}' AT STEP $gCfg{step}...";
1284 my $starttime = ctime($^T);
1286 my $ssversion = &GetSsVersion();
1287 my $auto_props = (!defined $gCfg{auto_props}) ? "" : $gCfg{auto_props};
1288 my $label_mapper = (!defined $gCfg{label_mapper}) ? "" : $gCfg{label_mapper};
1290 print <<"EOTXT";
1291 ======== VSS2SVN ========
1292 $info
1293 Start Time : $starttime
1295 VSS Dir : $gCfg{vssdir}
1296 Temp Dir : $gCfg{tempdir}
1297 Dumpfile : $gCfg{dumpfile}
1298 VSS Encoding : $gCfg{encoding}
1299 Auto Props : $auto_props
1300 trunk dir : $gCfg{trunkdir}
1301 md5 : $gCfg{md5}
1302 label dir : $gCfg{labeldir}
1303 label mapper : $label_mapper
1305 VSS2SVN ver : $VERSION
1306 SSPHYS exe : $gCfg{ssphys}
1307 SSPHYS ver : $ssversion
1308 XML Parser : $gCfg{xmlParser}
1310 EOTXT
1312 my @version = split '\.', $ssversion;
1313 # we need at least ssphys 0.22
1314 if ($version[0] == 0 && $version[1] < 22) {
1315 &ThrowError("The conversion needs at least ssphys version 0.22");
1318 } # End ShowHeader
1320 ###############################################################################
1321 # ShowSummary
1322 ###############################################################################
1323 sub ShowSummary {
1325 if (keys(%gErr) || $gCfg{resume}) {
1326 print <<"EOTXT";
1327 =============================================================================
1328 ERROR SUMMARY
1330 EOTXT
1332 if($gCfg{resume}) {
1333 print <<"EOTXT";
1334 **NOTICE** Because this run was resumed from a previous run, this may be only
1335 a partial list; other errors may have been reported during previous run.
1337 EOTXT
1340 foreach my $task (@{ $gCfg{errortasks} }) {
1341 print "\n$task:\n ";
1342 print join("\n ", @{ $gErr{$task} }),"\n";
1346 print <<"EOTXT";
1347 =============================================================================
1348 END OF CONVERSION
1350 The VSS to SVN conversion is complete. You should now use the "svnadmin load"
1351 command to load the generated dumpfile '$gCfg{dumpfile}'. The "svnadmin"
1352 utility is provided as part of the Subversion command-line toolset; use a
1353 command such as the following:
1354 svnadmin load <repodir> < "$gCfg{dumpfile}"
1356 You may need to precede this with "svnadmin create <repodir>" if you have not
1357 yet created a repository. Type "svnadmin help <cmd>" for more information on
1358 "create" and/or "load".
1360 If any errors occurred during the conversion, they are summarized above.
1362 For more information on the vss2svn project, see:
1363 http://www.pumacode.org/projects/vss2svn/
1365 EOTXT
1367 my $starttime = ctime($^T);
1368 chomp $starttime;
1369 my $endtime = ctime(time);
1370 chomp $endtime;
1371 my $elapsed;
1374 use integer;
1375 my $secs = time - $^T;
1377 my $hours = $secs / 3600;
1378 $secs -= ($hours * 3600);
1380 my $mins = $secs / 60;
1381 $secs -= ($mins * 60);
1383 $elapsed = sprintf("%2.2i:%2.2i:%2.2i", $hours, $mins, $secs);
1386 my($actions, $revisions, $mintime, $maxtime) = &GetStats();
1388 print <<"EOTXT";
1389 Started at : $starttime
1390 Ended at : $endtime
1391 Elapsed time : $elapsed (H:M:S)
1393 VSS Actions read : $actions
1394 SVN Revisions converted : $revisions
1395 Date range (YYYY/MM/DD) : $mintime to $maxtime
1397 EOTXT
1399 } # End ShowSummary
1401 ###############################################################################
1402 # GetStats
1403 ###############################################################################
1404 sub GetStats {
1405 my($sql, $actions, $revisions, $mintime, $maxtime);
1407 $sql = <<"EOSQL";
1408 SELECT
1409 COUNT(*)
1410 FROM
1411 VssAction
1412 EOSQL
1414 ($actions) = $gCfg{dbh}->selectrow_array($sql);
1416 $sql = <<"EOSQL";
1417 SELECT
1418 COUNT(*)
1419 FROM
1420 SvnRevision
1421 EOSQL
1423 ($revisions) = $gCfg{dbh}->selectrow_array($sql);
1425 $sql = <<"EOSQL";
1426 SELECT
1427 MIN(timestamp), MAX(timestamp)
1428 FROM
1429 PhysicalAction
1430 EOSQL
1432 ($mintime, $maxtime) = $gCfg{dbh}->selectrow_array($sql);
1434 foreach($mintime, $maxtime) {
1435 $_ = &Vss2Svn::Dumpfile::SvnTimestamp($_);
1436 s:T.*::;
1437 s:-:/:g;
1440 # initial creation of the repo wasn't considered an action or revision
1441 return($actions - 1, $revisions - 1, $mintime, $maxtime);
1443 } # End GetStats
1445 ###############################################################################
1446 # DoSsCmd
1447 ###############################################################################
1448 sub DoSsCmd {
1449 my($cmd) = @_;
1451 my $ok = &DoSysCmd("\"$gCfg{ssphys}\" $cmd", 1);
1453 $gSysOut =~ s/\x00//g; # remove null bytes
1454 $gSysOut =~ s/.\x08//g; # yes, I've seen VSS store backspaces in names!
1455 # allow all characters in the windows-1252 codepage: see http://de.wikipedia.org/wiki/Windows-1252
1456 $gSysOut =~ s/[\x00-\x09\x11\x12\x14-\x1F\x81\x8D\x8F\x90\x9D]/_/g; # just to be sure
1458 } # End DoSsCmd
1460 ###############################################################################
1461 # DoSysCmd
1462 ###############################################################################
1463 sub DoSysCmd {
1464 my($cmd, $allowfail) = @_;
1466 print "$cmd\n" if $gCfg{verbose};
1467 $gSysOut = `$cmd`;
1469 print $gSysOut if $gCfg{debug};
1471 my $rv = 1;
1473 if ($? == -1) {
1474 &ThrowWarning("FAILED to execute: $!");
1475 die unless $allowfail;
1477 $rv = 0;
1478 } elsif ($?) {
1479 &ThrowWarning(sprintf "FAILED with non-zero exit status %d (cmd: %s)", $? >> 8, $cmd);
1480 die unless $allowfail;
1482 $rv = 0;
1485 return $rv;
1487 } # End DoSysCmd
1489 ###############################################################################
1490 # GetSsVersion
1491 ###############################################################################
1492 sub GetSsVersion {
1493 my $out = `\"$gCfg{ssphys}\" --version 2>&1`;
1494 # Build numbers look like:
1495 # a.) ssphys 0.20.0, Build 123
1496 # b.) ssphys 0.20.0, Build 123:150
1497 # c.) ssphys 0.20.0, Build 123:150 (locally modified)
1498 $out =~ m/^ssphys (.*?), Build (.*?)[ \n]/m;
1500 # turn it into
1501 # a.) 0.20.0.123
1502 # b.) 0.20.0.123:150
1503 # c.) 0.20.0.123:150
1504 return $1 . "." . $2 || 'unknown';
1505 } # End GetSsVersion
1507 ###############################################################################
1508 # ThrowWarning
1509 ###############################################################################
1510 sub ThrowWarning {
1511 my($msg, $callinfo) = @_;
1513 $callinfo ||= [caller()];
1515 $msg .= "\nat $callinfo->[1] line $callinfo->[2]";
1517 warn "ERROR -- $msg\n";
1519 my $task = $gCfg{task};
1521 if(!defined $gErr{$task}) {
1522 $gErr{$task} = [];
1523 push @{ $gCfg{errortasks} }, $task;
1526 push @{ $gErr{$task} }, $msg;
1528 } # End ThrowWarning
1530 ###############################################################################
1531 # ThrowError
1532 ###############################################################################
1533 sub ThrowError {
1534 &ThrowWarning(@_, [caller()]);
1535 &StopConversion;
1536 } # End ThrowError
1538 ###############################################################################
1539 # StopConversion
1540 ###############################################################################
1541 sub StopConversion {
1542 &DisconnectDatabase;
1543 &CloseAllFiles;
1545 exit(1);
1546 } # End StopConversion
1548 ###############################################################################
1549 # CloseAllFiles
1550 ###############################################################################
1551 sub CloseAllFiles {
1553 } # End CloseAllFiles
1555 ###############################################################################
1556 # SetSystemTask
1557 ###############################################################################
1558 sub SetSystemTask {
1559 my($task, $leavestep) = @_;
1561 print "\nSETTING TASK $task\n" if $gCfg{verbose};
1563 my($sql, $sth);
1565 $sth = $gSth{'SYSTEMTASK'};
1567 if (!defined $sth) {
1568 $sql = <<"EOSQL";
1569 UPDATE
1570 SystemInfo
1572 task = ?
1573 EOSQL
1575 $sth = $gSth{'SYSTEMTASK'} = $gCfg{dbh}->prepare($sql);
1578 $sth->execute($task);
1580 $gCfg{task} = $task;
1582 &SetSystemStep(0) unless $leavestep;
1584 } # End SetSystemTask
1586 ###############################################################################
1587 # SetSystemStep
1588 ###############################################################################
1589 sub SetSystemStep {
1590 my($step) = @_;
1592 print "\nSETTING STEP $step\n" if $gCfg{verbose};
1594 my($sql, $sth);
1596 $sth = $gSth{'SYSTEMSTEP'};
1598 if (!defined $sth) {
1599 $sql = <<"EOSQL";
1600 UPDATE
1601 SystemInfo
1603 step = ?
1604 EOSQL
1606 $sth = $gCfg{'SYSTEMSTEP'} = $gCfg{dbh}->prepare($sql);
1609 $sth->execute($step);
1611 $gCfg{step} = $step;
1613 } # End SetSystemStep
1615 ###############################################################################
1616 # ConnectDatabase
1617 ###############################################################################
1618 sub ConnectDatabase {
1619 my $db = $gCfg{sqlitedb};
1621 if (-e $db && (!$gCfg{resume} ||
1622 (defined($gCfg{task}) && $gCfg{task} eq 'INIT'))) {
1624 unlink $db or &ThrowError("Could not delete existing database "
1625 .$gCfg{sqlitedb});
1628 print "Connecting to database $db\n\n";
1630 $gCfg{dbh} = DBI->connect("dbi:SQLite2:dbname=$db", '', '',
1631 {RaiseError => 1, AutoCommit => 1})
1632 or die "Couldn't connect database $db: $DBI::errstr";
1634 } # End ConnectDatabase
1636 ###############################################################################
1637 # DisconnectDatabase
1638 ###############################################################################
1639 sub DisconnectDatabase {
1640 $gCfg{dbh}->disconnect if defined $gCfg{dbh};
1641 } # End DisconnectDatabase
1643 ###############################################################################
1644 # SetupGlobals
1645 ###############################################################################
1646 sub SetupGlobals {
1647 if (defined($gCfg{task}) && $gCfg{task} eq 'INIT') {
1648 &InitSysTables;
1649 } else {
1650 &ReloadSysTables;
1653 $gCfg{ssphys} = 'ssphys' if !defined($gCfg{ssphys});
1654 $gCfg{vssdatadir} = "$gCfg{vssdir}/data";
1656 (-d "$gCfg{vssdatadir}") or &ThrowError("$gCfg{vssdir} does not appear "
1657 . "to be a valid VSS database");
1659 &SetupActionTypes;
1661 Vss2Svn::DataCache->SetCacheDir($gCfg{tempdir});
1662 Vss2Svn::DataCache->SetDbHandle($gCfg{dbh});
1663 Vss2Svn::DataCache->SetVerbose($gCfg{verbose});
1665 Vss2Svn::SvnRevHandler->SetRevTimeRange($gCfg{revtimerange})
1666 if defined $gCfg{revtimerange};
1668 } # End SetupGlobals
1670 ###############################################################################
1671 # SetupActionTypes
1672 ###############################################################################
1673 sub SetupActionTypes {
1674 # RollBack is only seen in combiation with a BranchFile activity, so actually
1675 # RollBack is the item view on the activity and BranchFile is the parent side
1676 # ==> map RollBack to BRANCH, so that we can join the two actions in the
1677 # MergeParentData step
1678 # RestoredProject seems to act like CreatedProject, except that the
1679 # project was recreated from an archive file, and its timestamp is
1680 # the time of restoration. Timestamps of the child files retain
1681 # their original values.
1682 %gActionType = (
1683 CreatedProject => {type => 1, action => 'ADD'},
1684 AddedProject => {type => 1, action => 'ADD'},
1685 RestoredProject => {type => 1, action => 'RESTOREDPROJECT'},
1686 RenamedProject => {type => 1, action => 'RENAME'},
1687 MovedProjectTo => {type => 1, action => 'MOVE_TO'},
1688 MovedProjectFrom => {type => 1, action => 'MOVE_FROM'},
1689 DeletedProject => {type => 1, action => 'DELETE'},
1690 DestroyedProject => {type => 1, action => 'DELETE'},
1691 RecoveredProject => {type => 1, action => 'RECOVER'},
1692 ArchiveProject => {type => 1, action => 'DELETE'},
1693 RestoredProject => {type => 1, action => 'RESTORE'},
1694 CheckedIn => {type => 2, action => 'COMMIT'},
1695 CreatedFile => {type => 2, action => 'ADD'},
1696 AddedFile => {type => 2, action => 'ADD'},
1697 RenamedFile => {type => 2, action => 'RENAME'},
1698 DeletedFile => {type => 2, action => 'DELETE'},
1699 DestroyedFile => {type => 2, action => 'DELETE'},
1700 RecoveredFile => {type => 2, action => 'RECOVER'},
1701 ArchiveVersionsofFile => {type => 2, action => 'ADD'},
1702 ArchiveVersionsofProject => {type => 1, action => 'ADD'},
1703 ArchiveFile => {type => 2, action => 'DELETE'},
1704 RestoredFile => {type => 2, action => 'RESTORE'},
1705 SharedFile => {type => 2, action => 'SHARE'},
1706 BranchFile => {type => 2, action => 'BRANCH'},
1707 PinnedFile => {type => 2, action => 'PIN'},
1708 RollBack => {type => 2, action => 'BRANCH'},
1709 UnpinnedFile => {type => 2, action => 'PIN'},
1710 Labeled => {type => 2, action => 'LABEL'},
1713 } # End SetupActionTypes
1715 ###############################################################################
1716 # InitSysTables
1717 ###############################################################################
1718 sub InitSysTables {
1719 my($sql, $sth);
1721 $sql = <<"EOSQL";
1722 CREATE TABLE
1723 Physical (
1724 physname VARCHAR
1726 EOSQL
1728 $sth = $gCfg{dbh}->prepare($sql);
1729 $sth->execute;
1731 $sql = <<"EOSQL";
1732 CREATE TABLE
1733 NameLookup (
1734 offset INTEGER,
1735 name VARCHAR
1737 EOSQL
1739 $sth = $gCfg{dbh}->prepare($sql);
1740 $sth->execute;
1742 $sql = <<"EOSQL";
1743 CREATE TABLE
1744 PhysicalAction (
1745 action_id INTEGER PRIMARY KEY,
1746 physname VARCHAR,
1747 version INTEGER,
1748 parentphys VARCHAR,
1749 actiontype VARCHAR,
1750 itemname VARCHAR,
1751 itemtype INTEGER,
1752 timestamp INTEGER,
1753 author VARCHAR,
1754 is_binary INTEGER,
1755 info VARCHAR,
1756 priority INTEGER,
1757 sortkey VARCHAR,
1758 parentdata INTEGER,
1759 label VARCHAR,
1760 comment TEXT
1762 EOSQL
1764 $sth = $gCfg{dbh}->prepare($sql);
1765 $sth->execute;
1767 $sql = <<"EOSQL";
1768 CREATE INDEX
1769 PhysicalAction_IDX1 ON PhysicalAction (
1770 timestamp ASC,
1771 priority ASC,
1772 sortkey ASC
1774 EOSQL
1776 $sth = $gCfg{dbh}->prepare($sql);
1777 $sth->execute;
1779 $sql = <<"EOSQL";
1780 CREATE INDEX
1781 PhysicalAction_IDX2 ON PhysicalAction (
1782 physname ASC,
1783 parentphys ASC,
1784 actiontype ASC,
1785 timestamp ASC,
1786 author ASC
1788 EOSQL
1790 $sth = $gCfg{dbh}->prepare($sql);
1791 $sth->execute;
1793 $sql = <<"EOSQL";
1794 CREATE TABLE
1795 VssAction (
1796 action_id INTEGER PRIMARY KEY,
1797 parentphys VARCHAR,
1798 physname VARCHAR,
1799 version INTEGER,
1800 action VARCHAR,
1801 itempaths VARCHAR,
1802 itemtype INTEGER,
1803 is_binary INTEGER,
1804 info VARCHAR
1806 EOSQL
1808 $sth = $gCfg{dbh}->prepare($sql);
1809 $sth->execute;
1811 $sql = <<"EOSQL";
1812 CREATE INDEX
1813 VssAction_IDX1 ON VssAction (
1814 action_id ASC
1816 EOSQL
1818 $sth = $gCfg{dbh}->prepare($sql);
1819 $sth->execute;
1821 $sql = <<"EOSQL";
1822 CREATE TABLE
1823 SvnRevision (
1824 revision_id INTEGER PRIMARY KEY,
1825 timestamp INTEGER,
1826 author VARCHAR,
1827 comment TEXT
1829 EOSQL
1831 $sth = $gCfg{dbh}->prepare($sql);
1832 $sth->execute;
1834 $sql = <<"EOSQL";
1835 CREATE TABLE
1836 SvnRevisionVssAction (
1837 revision_id INTEGER,
1838 action_id INTEGER
1840 EOSQL
1842 $sth = $gCfg{dbh}->prepare($sql);
1843 $sth->execute;
1845 $sql = <<"EOSQL";
1846 CREATE INDEX
1847 SvnRevisionVssAction_IDX1 ON SvnRevisionVssAction (
1848 revision_id ASC,
1849 action_id ASC
1851 EOSQL
1853 $sth = $gCfg{dbh}->prepare($sql);
1854 $sth->execute;
1856 $sql = <<"EOSQL";
1857 CREATE TABLE
1858 Label (
1859 physical VARCHAR,
1860 version INTEGER,
1861 label VARCHAR,
1862 imtempaths VARCHAR
1864 EOSQL
1866 $sth = $gCfg{dbh}->prepare($sql);
1867 $sth->execute;
1869 my @cfgitems = qw(task step vssdir svnurl svnuser svnpwd ssphys tempdir
1870 setsvndate starttime);
1872 my $fielddef = join(",\n ",
1873 map {sprintf('%-12.12s VARCHAR', $_)} @cfgitems);
1875 $sql = <<"EOSQL";
1876 CREATE TABLE
1877 SystemInfo (
1878 $fielddef
1880 EOSQL
1882 $sth = $gCfg{dbh}->prepare($sql);
1883 $sth->execute;
1885 my $fields = join(', ', @cfgitems);
1886 my $args = join(', ', map {'?'} @cfgitems);
1888 $sql = <<"EOSQL";
1889 INSERT INTO
1890 SystemInfo ($fields)
1891 VALUES
1892 ($args)
1893 EOSQL
1895 $sth = $gCfg{dbh}->prepare($sql);
1896 $sth->execute(map {$gCfg{$_}} @cfgitems);
1897 $sth->finish();
1899 } # End InitSysTables
1901 ###############################################################################
1902 # ReloadSysTables
1903 ###############################################################################
1904 sub ReloadSysTables {
1905 my($sql, $sth, $sthup, $row, $field, $val);
1907 $sql = "SELECT * FROM SystemInfo";
1909 $sth = $gCfg{dbh}->prepare($sql);
1910 $sth->execute();
1912 $row = $sth->fetchrow_hashref();
1914 FIELD:
1915 while (($field, $val) = each %$row) {
1916 if (defined($gCfg{$field})) { # allow user to override saved vals
1917 $sql = "UPDATE SystemInfo SET $field = ?";
1918 $sthup = $gCfg{dbh}->prepare($sql);
1919 $sthup->execute($gCfg{$field});
1920 } else {
1921 $gCfg{$field} = $val;
1925 $sth->finish();
1926 &SetSystemTask($gCfg{task});
1928 } # End ReloadSysTables
1930 ###############################################################################
1931 # Initialize
1932 ###############################################################################
1933 sub Initialize {
1934 $| = 1;
1936 GetOptions(\%gCfg,'vssdir=s','tempdir=s','dumpfile=s','resume','verbose',
1937 'debug','timing+','task=s','revtimerange=i','ssphys=s',
1938 'encoding=s','trunkdir=s','auto_props=s', 'label_mapper=s', 'md5');
1940 &GiveHelp("Must specify --vssdir") if !defined($gCfg{vssdir});
1941 $gCfg{tempdir} = './_vss2svn' if !defined($gCfg{tempdir});
1942 $gCfg{dumpfile} = 'vss2svn-dumpfile.dat' if !defined($gCfg{dumpfile});
1944 if (defined($gCfg{auto_props}) && ! -r $gCfg{auto_props}) {
1945 die "auto_props file '$gCfg{auto_props}' is not readable";
1948 if (defined($gCfg{label_mapper}) && ! -r $gCfg{label_mapper}) {
1949 die "label_mapper file '$gCfg{label_mapper}' is not readable";
1952 $gCfg{sqlitedb} = "$gCfg{tempdir}/vss_data.db";
1954 # XML output from ssphysout placed here.
1955 $gCfg{ssphysout} = "$gCfg{tempdir}/ssphysout";
1956 $gCfg{encoding} = 'windows-1252' if !defined($gCfg{encoding});
1958 # Commit messages for SVN placed here.
1959 $gCfg{svncomment} = "$gCfg{tempdir}/svncomment.tmp.txt";
1960 mkdir $gCfg{tempdir} unless (-d $gCfg{tempdir});
1962 # Directories for holding VSS revisions
1963 $gCfg{vssdata} = "$gCfg{tempdir}/vssdata";
1965 if ($gCfg{resume} && !-e $gCfg{sqlitedb}) {
1966 warn "WARNING: --resume set but no database exists; starting new "
1967 . "conversion...";
1968 $gCfg{resume} = 0;
1971 if ($gCfg{debug}) {
1972 $gCfg{verbose} = 1;
1974 $gCfg{timing} = 0 unless defined $gCfg{timing};
1975 $gCfg{md5} = 0 unless defined $gCfg{md5};
1977 $gCfg{starttime} = scalar localtime($^T);
1979 # trunkdir should (must?) be without trailing slash
1980 $gCfg{trunkdir} = '' unless defined $gCfg{trunkdir};
1981 $gCfg{trunkdir} =~ s:\\:/:g;
1982 $gCfg{trunkdir} =~ s:/$::;
1984 $gCfg{junkdir} = '/lost+found';
1986 $gCfg{labeldir} = '/labels';
1988 $gCfg{errortasks} = [];
1991 no warnings 'once';
1992 $gCfg{usingExe} = (defined($PerlApp::TOOL));
1995 &ConfigureXmlParser();
1997 ### Don't go past here if resuming a previous run ###
1998 if ($gCfg{resume}) {
1999 return 1;
2002 rmtree($gCfg{vssdata}) if (-e $gCfg{vssdata});
2003 mkdir $gCfg{vssdata};
2005 $gCfg{ssphys} ||= 'ssphys';
2006 $gCfg{svn} ||= 'SVN.exe';
2008 $gCfg{task} = 'INIT';
2009 $gCfg{step} = 0;
2010 } # End Initialize
2012 ###############################################################################
2013 # ConfigureXmlParser
2014 ###############################################################################
2015 sub ConfigureXmlParser {
2017 if(defined($ENV{XML_SIMPLE_PREFERRED_PARSER})) {
2018 # user has defined a preferred parser; don't mess with it
2019 $gCfg{xmlParser} = $ENV{XML_SIMPLE_PREFERRED_PARSER};
2020 return 1;
2023 $gCfg{xmlParser} = 'XML::Simple';
2025 eval { require XML::SAX; };
2027 if($@) {
2028 # no XML::SAX; let XML::Simple use its own parser
2029 return 1;
2031 elsif($gCfg{usingExe}) {
2032 # Prevent the ParserDetails.ini error message when running from .exe
2033 XML::SAX->load_parsers($INC[1]);
2036 $gCfg{xmlParser} = 'XML::SAX::Expat';
2037 $XML::SAX::ParserPackage = $gCfg{xmlParser};
2039 my $p;
2041 eval { $p = XML::SAX::ParserFactory->parser(); };
2043 if(!$@) {
2044 # XML::SAX::Expat installed; use it
2046 # for exe version, XML::Parser::Expat needs help finding its encmaps
2047 no warnings 'once';
2049 my $encdir;
2050 foreach my $dir (@INC) {
2051 $encdir = "$dir/encodings";
2052 $encdir =~ s:\\:/:g;
2053 $encdir =~ s://:/:g;
2054 if(-d $encdir) {
2055 print "Adding '$encdir' to encodings file path\n";
2056 push(@XML::Parser::Expat::Encoding_Path, $encdir);
2060 return 1;
2063 undef $XML::SAX::ParserPackage;
2064 eval { $p = XML::SAX::ParserFactory->parser(); };
2066 if(!$@) {
2067 $gCfg{xmlParser} = ref $p;
2068 return 1;
2071 # couldn't find a better package; go back to XML::Simple
2072 $gCfg{'xmlParser'} = 'XML::Simple';
2073 return 1;
2075 } # End ConfigureXmlParser
2077 ###############################################################################
2078 # GiveHelp
2079 ###############################################################################
2080 sub GiveHelp {
2081 my($msg) = @_;
2083 $msg ||= 'Online Help';
2085 print <<"EOTXT";
2087 $msg
2089 USAGE: perl vss2svn.pl --vssdir <dir> [options]
2091 REQUIRED PARAMETERS:
2092 --vssdir <dir> : Directory where VSS database is located. This should be
2093 the directory in which the "srcsafe.ini" file is located.
2095 OPTIONAL PARAMETERS:
2096 --ssphys <path> : Full path to ssphys.exe program; uses PATH otherwise
2097 --tempdir <dir> : Temp directory to use during conversion;
2098 default is ./_vss2svn
2099 --dumpfile <file> : specify the subversion dumpfile to be created;
2100 default is ./vss2svn-dumpfile.dat
2101 --revtimerange <sec> : specify the difference between two ss actions
2102 that are treated as one subversion revision;
2103 default is 3600 seconds (== 1hour)
2105 --resume : Resume a failed or aborted previous run
2106 --task <task> : specify the task to resume; task is one of the following
2107 INIT, LOADVSSNAMES, FINDDBFILES, GETPHYSHIST,
2108 MERGEPARENTDATA, MERGEMOVEDATA, REMOVETMPCHECKIN,
2109 MERGEUNPINPIN, BUILDACTIONHIST, IMPORTSVN
2111 --verbose : Print more info about the items being processed
2112 --debug : Print lots of debugging info.
2113 --timing : Show timing information during various steps
2114 --encoding : Specify the encoding used in VSS;
2115 Default is windows-1252
2116 --trunkdir : Specify where to map the VSS Project Root in the
2117 converted repository (default = "/")
2118 --auto_props : Specify an autoprops ini file to use, e.g.
2119 --auto_props="c:/Dokumente und Einstellungen/user/Anwendungsdaten/Subversion/config"
2120 --md5 : generate md5 checksums
2121 --label_mapper : INI style file to map labels to different locataions
2122 EOTXT
2124 exit(1);
2125 } # End GiveHelp