Remove old tigris.org website; it will still be accessible at the "old" repo
[vss2svn.git] / script / vss2svn.pl
blob1aa51bc84059a8bcad9f3bf719d1101eae38403b
1 #!/usr/bin/perl
3 use warnings;
4 use strict;
6 use Getopt::Long;
7 use DBI;
8 use DBD::SQLite2;
9 use XML::Simple;
10 use File::Find;
11 use File::Path;
12 use Time::CTime;
13 use Data::Dumper;
14 use Benchmark ':hireswallclock';
16 use lib '.';
17 use Vss2Svn::ActionHandler;
18 use Vss2Svn::DataCache;
19 use Vss2Svn::SvnRevHandler;
20 use Vss2svn::Dumpfile;
22 our(%gCfg, %gSth, @gErr, %gFh, $gSysOut, %gActionType, %gNameLookup, %gId);
24 our $VERSION = '0.10';
26 &Initialize;
27 &ConnectDatabase;
29 &SetupGlobals;
30 &ShowHeader;
32 &RunConversion;
34 &DisconnectDatabase;
35 &ShowSummary;
37 ###############################################################################
38 # RunConversion
39 ###############################################################################
40 sub RunConversion {
42 # store a hash of actions to take; allows restarting in case of failed
43 # migration
44 my %joblist =
46 INIT => {handler => sub{ 1; },
47 next => 'LOADVSSNAMES'},
49 # Load the "real" names associated with the stored "short" names
50 LOADVSSNAMES => {handler => \&LoadVssNames,
51 next => 'FINDDBFILES'},
53 # Add a stub entry into the Physical table for each physical
54 # file in the VSS DB
55 FINDDBFILES => {handler => \&FindPhysDbFiles,
56 next => 'GETPHYSHIST'},
58 # Load the history of what happened to the physical files. This
59 # only gets us halfway there because we don't know what the real
60 # filenames are yet
61 GETPHYSHIST => {handler => \&GetPhysVssHistory,
62 next => 'MERGEPARENTDATA'},
64 # Merge data from parent records into child records where possible
65 MERGEPARENTDATA => {handler => \&MergeParentData,
66 next => 'BUILDACTIONHIST'},
68 # Take the history of physical actions and convert them to VSS
69 # file actions
70 BUILDACTIONHIST => {handler => \&BuildVssActionHistory,
71 next => 'IMPORTSVN'},
73 # Create a dumpfile or import to repository
74 IMPORTSVN => {handler => \&ImportToSvn,
75 next => 'DONE'},
78 my $info;
80 while ($gCfg{task} ne 'DONE') {
81 $info = $joblist{ $gCfg{task} }
82 or die "FATAL ERROR: Unknown task '$gCfg{task}'\n";
84 print "TASK: $gCfg{task}\n";
86 if ($gCfg{prompt}) {
87 print "Press ENTER to continue...\n";
88 my $temp = <STDIN>;
89 die if $temp =~ m/^quit/i;
92 &{ $info->{handler} };
93 &SetSystemTask( $info->{next} );
96 } # End RunConversion
98 ###############################################################################
99 # LoadVssNames
100 ###############################################################################
101 sub LoadVssNames {
102 &DoSsCmd("info \"$gCfg{vssdatadir}\\names.dat\"");
104 my $xs = XML::Simple->new(KeyAttr => [],
105 ForceArray => [qw(NameCacheEntry Entry)],);
107 my $xml = $xs->XMLin($gSysOut);
109 my $namesref = $xml->{NameCacheEntry} || return 1;
111 my($entry, $count, $offset, $name);
113 my $cache = Vss2Svn::DataCache->new('NameLookup')
114 || &ThrowError("Could not create cache 'NameLookup'");
116 ENTRY:
117 foreach $entry (@$namesref) {
118 $count = $entry->{NrOfEntries};
119 # next ENTRY unless $count > 1;
121 $offset = $entry->{offset};
123 if ($count == 1) {
124 $name = $entry->{Entry}->[0]->{content};
125 } elsif ($count == 2) {
126 $name = $entry->{Entry}->[1]->{content};
127 } else {
128 $name = $entry->{Entry}->[$count - 2]->{content};
131 $cache->add($offset, $name);
134 $cache->commit();
136 } # End LoadVssNames
138 ###############################################################################
139 # FindPhysDbFiles
140 ###############################################################################
141 sub FindPhysDbFiles {
143 my $cache = Vss2Svn::DataCache->new('Physical')
144 || &ThrowError("Could not create cache 'Physical'");
146 find(sub{ &FoundSsFile($cache) }, $gCfg{vssdatadir});
148 $cache->commit();
150 } # End FindPhysDbFiles
152 ###############################################################################
153 # FoundSsFile
154 ###############################################################################
155 sub FoundSsFile {
156 my($cache) = @_;
158 my $path = $File::Find::name;
159 return if (-d $path);
161 my $vssdatadir = quotemeta($gCfg{vssdatadir});
163 if ($path =~ m:^$vssdatadir/./([a-z]{8})$:i) {
164 $cache->add(uc($1));
167 } # End FoundSsFile
169 ###############################################################################
170 # GetPhysVssHistory
171 ###############################################################################
172 sub GetPhysVssHistory {
173 my($sql, $sth, $row, $physname, $physdir);
175 &LoadNameLookup;
176 my $cache = Vss2Svn::DataCache->new('PhysicalAction', 1)
177 || &ThrowError("Could not create cache 'PhysicalAction'");
179 $sql = "SELECT * FROM Physical";
180 $sth = $gCfg{dbh}->prepare($sql);
181 $sth->execute();
183 my $xs = XML::Simple->new(ForceArray => [qw(Version)]);
185 while (defined($row = $sth->fetchrow_hashref() )) {
186 $physname = $row->{physname};
188 $physdir = "$gCfg{vssdir}\\data\\" . substr($physname, 0, 1);
190 &GetVssPhysInfo($cache, $physdir, $physname, $xs);
193 $cache->commit();
195 } # End GetPhysVssHistory
197 ###############################################################################
198 # GetVssPhysInfo
199 ###############################################################################
200 sub GetVssPhysInfo {
201 my($cache, $physdir, $physname, $xs) = @_;
203 &DoSsCmd("info \"$physdir\\$physname\"");
205 my $xml = $xs->XMLin($gSysOut);
206 my $parentphys;
208 my $iteminfo = $xml->{ItemInfo};
210 if (!defined($iteminfo) || !defined($iteminfo->{Type}) ||
211 ref($iteminfo->{Type})) {
213 &ThrowWarning("Can't handle file '$physname'; not a project or file\n");
214 return;
217 if ($iteminfo->{Type} == 1) {
218 $parentphys = ($physname eq 'AAAAAAAA')?
219 '' : &GetProjectParent($xml);
220 } elsif ($iteminfo->{Type} == 2) {
221 $parentphys = undef;
222 } else {
223 &ThrowWarning("Can't handle file '$physname'; not a project or file\n");
224 return;
227 &GetVssItemVersions($cache, $physname, $parentphys, $xml);
229 } # End GetVssPhysInfo
231 ###############################################################################
232 # GetProjectParent
233 ###############################################################################
234 sub GetProjectParent {
235 my($xml) = @_;
237 no warnings 'uninitialized';
238 return $xml->{ItemInfo}->{ParentPhys} || undef;
240 } # End GetProjectParent
242 ###############################################################################
243 # GetVssItemVersions
244 ###############################################################################
245 sub GetVssItemVersions {
246 my($cache, $physname, $parentphys, $xml) = @_;
248 return 0 unless defined $xml->{Version};
250 my($parentdata, $version, $vernum, $action, $name, $actionid, $actiontype,
251 $tphysname, $itemname, $itemtype, $parent, $user, $timestamp, $comment,
252 $is_binary, $info, $priority, $sortkey, $cachename);
254 VERSION:
255 foreach $version (@{ $xml->{Version} }) {
256 $action = $version->{Action};
257 $name = $action->{SSName};
258 $tphysname = $action->{Physical} || $physname;
259 $user = $version->{UserName};
260 $timestamp = $version->{Date};
262 $itemname = &GetItemName($name);
264 $actionid = $action->{ActionId};
265 $info = $gActionType{$actionid};
267 if (!$info) {
268 warn "\nWARNING: Unknown action '$actionid'\n";
269 next VERSION;
272 $itemtype = $info->{type};
273 $actiontype = $info->{action};
275 if ($actiontype eq 'IGNORE') {
276 next VERSION;
279 $comment = undef;
280 $is_binary = 0;
281 $info = undef;
282 $parentdata = 0;
283 $priority = 5;
285 if ($version->{Comment} && !ref($version->{Comment})) {
286 $comment = $version->{Comment} || undef;
289 if (defined($comment)) {
290 $comment =~ s/^\s+//s;
291 $comment =~ s/\s+$//s;
294 if ($itemtype == 1 && $physname eq 'AAAAAAAA'
295 && ref($tphysname)) {
297 $tphysname = $physname;
298 $itemname = '';
299 } elsif ($physname ne $tphysname) {
300 # If version's physical name and file's physical name are different,
301 # this is a project describing an action on a child item. Most of
302 # the time, this very same data will be in the child's physical
303 # file and with more detail (such as check-in comment).
305 # However, in some cases (such as renames, or when the child's
306 # physical file was later purged), this is the only place we'll
307 # have the data; also, sometimes the child record doesn't even
308 # have enough information about itself (such as which project it
309 # was created in and which project(s) it's shared in).
311 # So, for a parent record describing a child action, we'll set a
312 # flag, then combine them in the next phase.
314 $parentdata = 1;
316 # OK, since we're describing an action in the child, the parent is
317 # actually this (project) item
319 $parentphys = $physname;
320 } else {
321 $parentphys = undef;
324 if ($itemtype == 1) {
325 $itemname .= '/';
326 } elsif (defined($xml->{ItemInfo}) &&
327 defined($xml->{ItemInfo}->{Binary}) &&
328 $xml->{ItemInfo}->{Binary}) {
330 $is_binary = 1;
333 if ($actiontype eq 'RENAME') {
334 # if a rename, we store the new name in the action's 'info' field
336 $info = &GetItemName($action->{NewSSName});
338 if ($itemtype == 1) {
339 $info .= '/';
341 } elsif ($actiontype eq 'BRANCH') {
342 $info = $action->{Parent};
343 } elsif ($actiontype eq 'PIN') {
344 $info = $action->{PinnedToVersion};
347 $vernum = ($parentdata)? undef : $version->{VersionNumber};
349 $priority -= 4 if $actiontype eq 'ADD'; # Adds are always first
350 $priority -= 3 if $actiontype eq 'SHARE';
351 $priority -= 2 if $actiontype eq 'BRANCH';
353 # store the reversed physname as a sortkey; a bit wasteful but makes
354 # debugging easier for the time being...
355 $sortkey = reverse($tphysname);
357 $cache->add($tphysname, $vernum, $parentphys, $actiontype, $itemname,
358 $itemtype, $timestamp, $user, $is_binary, $info, $priority,
359 $sortkey, $parentdata, $comment);
363 } # End GetVssItemVersions
365 ###############################################################################
366 # GetItemName
367 ###############################################################################
368 sub GetItemName {
369 my($nameelem) = @_;
371 my $itemname = $nameelem->{content};
373 if (defined($nameelem->{offset})) {
374 # Might have a "better" name in the name cache, but sometimes the
375 # original name is best.
376 my $cachename = $gNameLookup{ $nameelem->{offset} };
377 return $itemname unless defined($cachename);
379 if (!defined($itemname) ||
380 (length($cachename) >= length($itemname) &&
381 !($itemname !~ m/~/ && $cachename =~ m/~/))) {
383 print "Changing name of '$itemname' to '$cachename' from "
384 . "name cache\n" if $gCfg{debug};
386 $itemname = $cachename;
387 } else {
388 print "Found name '$cachename' in namecache, but kept original "
389 . "'$itemname'\n" if $gCfg{debug};
393 return $itemname;
395 } # End GetItemName
397 ###############################################################################
398 # LoadNameLookup
399 ###############################################################################
400 sub LoadNameLookup {
401 my($sth, $row);
403 $sth = $gCfg{dbh}->prepare('SELECT offset, name FROM NameLookup');
404 $sth->execute();
406 while(defined($row = $sth->fetchrow_hashref() )) {
407 $gNameLookup{ $row->{offset} } = $row->{name};
409 } # End LoadNameLookup
411 ###############################################################################
412 # MergeParentData
413 ###############################################################################
414 sub MergeParentData {
415 # VSS has a funny way of not placing enough information to rebuild history
416 # in one data file; for example, renames are stored in the parent project
417 # rather than in that item's data file. Also, it's sometimes impossible to
418 # tell from a child record which was eventually shared to multiple folders,
419 # which folder it was originally created in.
421 # So, at this stage we look for any parent records which described child
422 # actions, then update those records with data from the child objects. We
423 # then delete the separate child objects to avoid duplication.
425 my($sth, $rows, $row);
426 $sth = $gCfg{dbh}->prepare('SELECT * FROM PhysicalAction '
427 . 'WHERE parentdata = 1');
428 $sth->execute();
430 # need to pull in all recs at once, since we'll be updating/deleting data
431 $rows = $sth->fetchall_arrayref( {} );
433 my($childrecs, $child, $id);
434 my @delchild = ();
436 foreach $row (@$rows) {
437 $childrecs = &GetChildRecs($row);
439 if (scalar @$childrecs > 1) {
440 &ThrowWarning("Multiple child recs for parent rec "
441 . "'$row->{action_id}'");
444 foreach $child (@$childrecs) {
445 &UpdateParentRec($row, $child);
446 push(@delchild, $child->{action_id});
450 foreach $id (@delchild) {
451 &DeleteChildRec($id);
456 } # End MergeParentData
458 ###############################################################################
459 # GetChildRecs
460 ###############################################################################
461 sub GetChildRecs {
462 my($parentrec) = @_;
464 # Here we need to find any child rows which give us additional info on the
465 # parent rows. There's no definitive way to find matching rows, but joining
466 # on physname, actiontype, timestamp, and author gets us close. The problem
467 # is that the "two" actions may not have happened in the exact same second,
468 # so we need to also look for any that are up to two seconds apart and hope
469 # we don't get the wrong row.
471 my $sql = <<"EOSQL";
472 SELECT
474 FROM
475 PhysicalAction
476 WHERE
477 parentdata = 0
478 AND physname = ?
479 AND actiontype = ?
480 AND (? - timestamp IN (0, 1, 2, 3, 4))
481 AND author = ?
482 ORDER BY
483 timestamp
484 EOSQL
486 my $sth = $gCfg{dbh}->prepare($sql);
487 $sth->execute( @{ $parentrec }{qw(physname actiontype timestamp author)} );
489 return $sth->fetchall_arrayref( {} );
490 } # End GetChildRecs
492 ###############################################################################
493 # UpdateParentRec
494 ###############################################################################
495 sub UpdateParentRec {
496 my($row, $child) = @_;
498 # The child record has the "correct" version number (relative to the child
499 # and not the parent), as well as the comment info and whether the file is
500 # binary
502 my $comment;
505 no warnings 'uninitialized';
506 $comment = "$row->{comment}\n$child->{comment}";
507 $comment =~ s/\n$//;
510 my $sql = <<"EOSQL";
511 UPDATE
512 PhysicalAction
514 version = ?,
515 is_binary = ?,
516 comment = ?
517 WHERE
518 action_id = ?
519 EOSQL
521 my $sth = $gCfg{dbh}->prepare($sql);
522 $sth->execute( $child->{version}, $child->{is_binary}, $comment,
523 $row->{action_id} );
525 } # End UpdateParentRec
527 ###############################################################################
528 # DeleteChildRec
529 ###############################################################################
530 sub DeleteChildRec {
531 my($id) = @_;
533 my $sql = "DELETE FROM PhysicalAction WHERE action_id = ?";
535 my $sth = $gCfg{dbh}->prepare($sql);
536 $sth->execute($id);
537 } # End DeleteChildRec
539 ###############################################################################
540 # BuildVssActionHistory
541 ###############################################################################
542 sub BuildVssActionHistory {
543 my $vsscache = Vss2Svn::DataCache->new('VssAction', 1)
544 || &ThrowError("Could not create cache 'VssAction'");
546 my $joincache = Vss2Svn::DataCache->new('SvnRevisionVssAction')
547 || &ThrowError("Could not create cache 'SvnRevisionVssAction'");
549 # This will keep track of the current SVN revision, and increment it when
550 # the author or comment changes, the timestamps span more than an hour
551 # (by default), or the same physical file is affected twice
553 my $svnrevs = Vss2Svn::SvnRevHandler->new()
554 || &ThrowError("Could not create SVN revision handler");
555 $svnrevs->{verbose} = $gCfg{verbose};
557 my($sth, $row, $action, $handler, $physinfo, $itempaths, $allitempaths);
559 my $sql = 'SELECT * FROM PhysicalAction ORDER BY timestamp ASC, '
560 . 'priority ASC, sortkey ASC';
562 $sth = $gCfg{dbh}->prepare($sql);
563 $sth->execute();
565 ROW:
566 while(defined($row = $sth->fetchrow_hashref() )) {
567 $svnrevs->check($row);
568 $action = $row->{actiontype};
570 $handler = Vss2Svn::ActionHandler->new($row);
571 $handler->{verbose} = $gCfg{verbose};
572 $physinfo = $handler->physinfo();
574 if (defined($physinfo) && $physinfo->{type} != $row->{itemtype} ) {
575 &ThrowError("Inconsistent item type for '$row->{physname}'; "
576 . "'$row->{itemtype}' unexpected");
579 # The handler's job is to keep track of physical-to-real name mappings
580 # and return the full item paths corresponding to the physical item. In
581 # case of a rename, it will return the old name, so we then do another
582 # lookup on the new name.
584 # Commits and renames can apply to multiple items if that item is
585 # shared; since SVN has no notion of such shares, we keep track of
586 # those ourself and replicate the functionality using multiple actions.
588 if (!$handler->handle($action)) {
589 &ThrowWarning($handler->{errmsg})
590 if $handler->{errmsg};
591 next ROW;
594 $itempaths = $handler->{itempaths};
596 if (!defined $itempaths) {
597 &ThrowWarning($handler->{errmsg})
598 if $handler->{errmsg};
599 next ROW;
602 # In cases of a corrupted share source, the handler may change the
603 # action from 'SHARE' to 'ADD'
604 $row->{actiontype} = $handler->{action};
606 # May contain add'l info for the action depending on type:
607 # RENAME: the new name (without path)
608 # SHARE: the source path which was shared
609 # MOVE: the new path
610 # PIN: the version that was pinned
611 $row->{info} = $handler->{info};
613 $allitempaths = join("\t", @$itempaths);
614 $row->{itempaths} = $allitempaths;
616 $vsscache->add(@$row{ qw(physname version actiontype itempaths
617 itemtype is_binary info) });
618 $joincache->add( $svnrevs->{revnum}, $vsscache->{pkey} );
622 $vsscache->commit();
623 $svnrevs->commit();
624 $joincache->commit();
626 } # End BuildVssActionHistory
628 ###############################################################################
629 # ImportToSvn
630 ###############################################################################
631 sub ImportToSvn {
632 # For the time being, we support only creating a dumpfile and not directly
633 # importing to SVN. We could perhaps add this functionality by making the
634 # CreateSvnDumpfile logic more generic and using polymorphism to switch out
635 # the Vss2Svn::Dumpfile object with one that handles imports.
637 &CreateSvnDumpfile;
638 } # End ImportToSvn
640 ###############################################################################
641 # CreateSvnDumpfile
642 ###############################################################################
643 sub CreateSvnDumpfile {
644 my $fh;
646 my $file = $gCfg{dumpfile};
647 open $fh, ">$file"
648 or &ThrowError("Could not create dumpfile '$file'");
650 my($sql, $sth, $action_sth, $row, $revision, $actions, $action, $physname, $itemtype);
652 my %exported = ();
654 $sql = 'SELECT * FROM SvnRevision ORDER BY revision_id ASC';
656 $sth = $gCfg{dbh}->prepare($sql);
657 $sth->execute();
659 $sql = <<"EOSQL";
660 SELECT * FROM
661 VssAction
662 WHERE action_id IN
663 (SELECT action_id FROM SvnRevisionVssAction WHERE revision_id = ?)
664 ORDER BY action_id
665 EOSQL
667 $action_sth = $gCfg{dbh}->prepare($sql);
669 my $dumpfile = Vss2Svn::Dumpfile->new($fh);
671 REVISION:
672 while(defined($row = $sth->fetchrow_hashref() )) {
674 my $t0 = new Benchmark;
676 $revision = $row->{revision_id};
677 $dumpfile->begin_revision($row);
679 next REVISION if $revision == 0;
681 $action_sth->execute($revision);
682 $actions = $action_sth->fetchall_arrayref( {} );
684 ACTION:
685 foreach $action(@$actions) {
686 $physname = $action->{physname};
687 $itemtype = $action->{itemtype};
689 if (!exists $exported{$physname}) {
690 if ($itemtype == 2) {
691 $exported{$physname} = &ExportVssPhysFile($physname, $action->{version});
692 } else {
693 $exported{$physname} = undef;
697 # do_action needs to know the revision_id, so paste it on
698 $action->{revision_id} = $revision;
700 $dumpfile->do_action($action, $exported{$physname});
702 print "revision $revision: ", timestr(timediff(new Benchmark, $t0)),"\n"
703 if $gCfg{timing};
706 my @err = @{ $dumpfile->{errors} };
708 if (scalar @err > 0) {
709 print "\nERRORS during dumpfile creation:\n ";
710 print join("\n ", @err);
713 $dumpfile->finish();
714 close $fh;
716 } # End CreateSvnDumpfile
718 ###############################################################################
719 # ExportVssPhysFile
720 ###############################################################################
721 sub ExportVssPhysFile {
722 my($physname, $version) = @_;
724 $physname =~ m/^((.).)/;
726 my $exportdir = "$gCfg{vssdata}\\$1";
727 my $physpath = "$gCfg{vssdir}\\data\\$2\\$physname";
729 if (! -e $physpath) {
730 # physical file doesn't exist; it must have been destroyed later
731 &ThrowWarning("Can't retrieve revisions from physical file "
732 . "'$physname'; it was either destroyed or corrupted");
733 return undef;
736 mkpath($exportdir);
738 # MergeParentData normally will merge two corresponding item and parent
739 # actions. But if the actions are more appart than the maximum allowed
740 # timespan, we will end up with an undefined version in an ADD action here
741 # As a hot fix, we define the version to 1, which will also revert to the
742 # alpha 1 version behavoir.
743 if (! defined $version) {
744 &ThrowWarning("'$physname': no version specified for retrieval");
746 # fall through and try with version 1.
747 $version = 1;
750 if (! -e "$exportdir\\$physname.$version" ) {
751 &DoSsCmd("get -b -v$version --force-overwrite $physpath $exportdir\\$physname");
754 return $exportdir;
755 } # End ExportVssPhysFile
757 ###############################################################################
758 # ShowHeader
759 ###############################################################################
760 sub ShowHeader {
761 my $info = $gCfg{task} eq 'INIT'? 'BEGINNING CONVERSION...' :
762 "RESUMING CONVERSION FROM TASK '$gCfg{task}' AT STEP $gCfg{step}...";
763 my $starttime = ctime($^T);
765 my $ssversion = &GetSsVersion();
767 print <<"EOTXT";
768 ======== VSS2SVN ========
769 $info
770 Start Time : $starttime
772 VSS Dir : $gCfg{vssdir}
773 Temp Dir : $gCfg{tempdir}
774 Dumpfile : $gCfg{dumpfile}
776 SSPHYS exe : $gCfg{ssphys}
777 SSPHYS ver : $ssversion
779 EOTXT
781 } # End ShowHeader
783 ###############################################################################
784 # ShowSummary
785 ###############################################################################
786 sub ShowSummary {
788 my $starttime = ctime($^T);
789 chomp $starttime;
790 my $endtime = ctime(time);
791 chomp $endtime;
792 my $elapsed;
795 use integer;
796 my $secs = time - $^T;
798 my $hours = $secs / 3600;
799 $secs -= ($hours * 3600);
801 my $mins = $secs / 60;
802 $secs -= ($mins * 60);
804 $elapsed = sprintf("%2.2i:%2.2i:%2.2i", $hours, $mins, $secs);
807 print <<"EOTXT";
808 Started at : $starttime
809 Ended at : $endtime
810 Elapsed time : $elapsed (H:M:S)
812 EOTXT
814 } # End ShowSummary
816 ###############################################################################
817 # DoSsCmd
818 ###############################################################################
819 sub DoSsCmd {
820 my($cmd) = @_;
822 my $ok = &DoSysCmd("\"$gCfg{ssphys}\" $cmd", 1);
824 $gSysOut =~ s/\x00//g; # remove null bytes
825 $gSysOut =~ s/.\x08//g; # yes, I've seen VSS store backspaces in names!
826 # allow all characters in the windows-1252 codepage: see http://de.wikipedia.org/wiki/Windows-1252
827 $gSysOut =~ s/[\x00-\x09\x11\x12\x14-\x1F\x81\x8D\x8F\x90\x9D]/_/g; # just to be sure
829 } # End DoSsCmd
831 ###############################################################################
832 # DoSysCmd
833 ###############################################################################
834 sub DoSysCmd {
835 my($cmd, $allowfail) = @_;
837 print "$cmd\n" if $gCfg{verbose};
838 $gSysOut = `$cmd`;
840 print $gSysOut if $gCfg{debug};
842 my $rv = 1;
844 if ($? == -1) {
845 &ThrowWarning("FAILED to execute: $!");
846 die unless $allowfail;
848 $rv = 0;
849 } elsif ($?) {
850 &ThrowWarning(sprintf "FAILED with non-zero exit status %d", $? >> 8);
851 die unless $allowfail;
853 $rv = 0;
856 return $rv;
858 } # End DoSysCmd
860 ###############################################################################
861 # GetSsVersion
862 ###############################################################################
863 sub GetSsVersion {
864 my $out = `\"$gCfg{ssphys}\" -v 2>&1`;
865 $out =~ m/^(ssphys v.*?)[:\n]/m;
867 return $1 || 'unknown';
868 } # End GetSsVersion
870 ###############################################################################
871 # ThrowWarning
872 ###############################################################################
873 sub ThrowWarning {
874 my($msg, $callinfo) = @_;
876 $callinfo ||= [caller()];
878 $msg .= "\nat $callinfo->[1] line $callinfo->[2]";
880 warn "ERROR -- $msg\n";
882 push @gErr, $msg;
884 } # End ThrowWarning
886 ###############################################################################
887 # ThrowError
888 ###############################################################################
889 sub ThrowError {
890 &ThrowWarning(@_, [caller()]);
891 &StopConversion;
892 } # End ThrowError
894 ###############################################################################
895 # StopConversion
896 ###############################################################################
897 sub StopConversion {
898 &DisconnectDatabase;
899 &CloseAllFiles;
901 exit(1);
902 } # End StopConversion
904 ###############################################################################
905 # CloseAllFiles
906 ###############################################################################
907 sub CloseAllFiles {
909 } # End CloseAllFiles
911 ###############################################################################
912 # SetSystemTask
913 ###############################################################################
914 sub SetSystemTask {
915 my($task, $leavestep) = @_;
917 print "\nSETTING TASK $task\n" if $gCfg{verbose};
919 my($sql, $sth);
921 $sth = $gSth{'SYSTEMTASK'};
923 if (!defined $sth) {
924 $sql = <<"EOSQL";
925 UPDATE
926 SystemInfo
928 task = ?
929 EOSQL
931 $sth = $gSth{'SYSTEMTASK'} = $gCfg{dbh}->prepare($sql);
934 $sth->execute($task);
936 $gCfg{task} = $task;
938 &SetSystemStep(0) unless $leavestep;
940 } # End SetSystemTask
942 ###############################################################################
943 # SetSystemStep
944 ###############################################################################
945 sub SetSystemStep {
946 my($step) = @_;
948 print "\nSETTING STEP $step\n" if $gCfg{verbose};
950 my($sql, $sth);
952 $sth = $gSth{'SYSTEMSTEP'};
954 if (!defined $sth) {
955 $sql = <<"EOSQL";
956 UPDATE
957 SystemInfo
959 step = ?
960 EOSQL
962 $sth = $gCfg{'SYSTEMSTEP'} = $gCfg{dbh}->prepare($sql);
965 $sth->execute($step);
967 $gCfg{step} = $step;
969 } # End SetSystemStep
971 ###############################################################################
972 # ConnectDatabase
973 ###############################################################################
974 sub ConnectDatabase {
975 my $db = $gCfg{sqlitedb};
977 if (-e $db && (!$gCfg{resume} ||
978 (defined($gCfg{task}) && $gCfg{task} eq 'INIT'))) {
980 unlink $db or &ThrowError("Could not delete existing database "
981 .$gCfg{sqlitedb});
984 print "Connecting to database $db\n\n";
986 $gCfg{dbh} = DBI->connect("dbi:SQLite2:dbname=$db", '', '',
987 {RaiseError => 1, AutoCommit => 1})
988 or die "Couldn't connect database $db: $DBI::errstr";
990 } # End ConnectDatabase
992 ###############################################################################
993 # DisconnectDatabase
994 ###############################################################################
995 sub DisconnectDatabase {
996 $gCfg{dbh}->disconnect if defined $gCfg{dbh};
997 } # End DisconnectDatabase
999 ###############################################################################
1000 # SetupGlobals
1001 ###############################################################################
1002 sub SetupGlobals {
1003 if (defined($gCfg{task}) && $gCfg{task} eq 'INIT') {
1004 &InitSysTables;
1005 } else {
1006 &ReloadSysTables;
1009 $gCfg{ssphys} = 'SSPHYS.exe' if !defined($gCfg{ssphys});
1010 $gCfg{vssdatadir} = "$gCfg{vssdir}\\data";
1012 (-d "$gCfg{vssdatadir}") or &ThrowError("$gCfg{vssdir} does not appear "
1013 . "to be a valid VSS database");
1015 my($id, $type, $action);
1016 while(<DATA>) {
1017 chomp;
1018 ($id, $type, $action) = split "\t";
1019 $gActionType{$id} = {type => $type, action => $action};
1022 Vss2Svn::DataCache->SetCacheDir($gCfg{tempdir});
1023 Vss2Svn::DataCache->SetDbHandle($gCfg{dbh});
1024 Vss2Svn::DataCache->SetVerbose($gCfg{verbose});
1026 Vss2Svn::SvnRevHandler->SetRevTimeRange($gCfg{revtimerange})
1027 if defined $gCfg{revtimerange};
1029 } # End SetupGlobals
1031 ###############################################################################
1032 # InitSysTables
1033 ###############################################################################
1034 sub InitSysTables {
1035 my($sql, $sth);
1037 $sql = <<"EOSQL";
1038 CREATE TABLE
1039 Physical (
1040 physname VARCHAR
1042 EOSQL
1044 $sth = $gCfg{dbh}->prepare($sql);
1045 $sth->execute;
1047 $sql = <<"EOSQL";
1048 CREATE TABLE
1049 NameLookup (
1050 offset INTEGER,
1051 name VARCHAR
1053 EOSQL
1055 $sth = $gCfg{dbh}->prepare($sql);
1056 $sth->execute;
1058 $sql = <<"EOSQL";
1059 CREATE TABLE
1060 PhysicalAction (
1061 action_id INTEGER PRIMARY KEY,
1062 physname VARCHAR,
1063 version INTEGER,
1064 parentphys VARCHAR,
1065 actiontype VARCHAR,
1066 itemname VARCHAR,
1067 itemtype INTEGER,
1068 timestamp INTEGER,
1069 author VARCHAR,
1070 is_binary INTEGER,
1071 info VARCHAR,
1072 priority INTEGER,
1073 sortkey VARCHAR,
1074 parentdata INTEGER,
1075 comment TEXT
1077 EOSQL
1079 $sth = $gCfg{dbh}->prepare($sql);
1080 $sth->execute;
1082 $sql = <<"EOSQL";
1083 CREATE INDEX
1084 PhysicalAction_IDX1 ON PhysicalAction (
1085 timestamp ASC,
1086 priority ASC,
1087 sortkey ASC
1089 EOSQL
1091 $sth = $gCfg{dbh}->prepare($sql);
1092 $sth->execute;
1094 $sql = <<"EOSQL";
1095 CREATE INDEX
1096 PhysicalAction_IDX2 ON PhysicalAction (
1097 physname ASC,
1098 parentphys ASC,
1099 actiontype ASC,
1100 timestamp ASC,
1101 author ASC
1103 EOSQL
1105 $sth = $gCfg{dbh}->prepare($sql);
1106 $sth->execute;
1108 $sql = <<"EOSQL";
1109 CREATE TABLE
1110 VssAction (
1111 action_id INTEGER PRIMARY KEY,
1112 physname VARCHAR,
1113 version INTEGER,
1114 action VARCHAR,
1115 itempaths VARCHAR,
1116 itemtype INTEGER,
1117 is_binary INTEGER,
1118 info VARCHAR
1120 EOSQL
1122 $sth = $gCfg{dbh}->prepare($sql);
1123 $sth->execute;
1125 $sql = <<"EOSQL";
1126 CREATE INDEX
1127 VssAction_IDX1 ON VssAction (
1128 action_id ASC
1130 EOSQL
1132 $sth = $gCfg{dbh}->prepare($sql);
1133 $sth->execute;
1135 $sql = <<"EOSQL";
1136 CREATE TABLE
1137 SvnRevision (
1138 revision_id INTEGER PRIMARY KEY,
1139 timestamp INTEGER,
1140 author VARCHAR,
1141 comment TEXT
1143 EOSQL
1145 $sth = $gCfg{dbh}->prepare($sql);
1146 $sth->execute;
1148 $sql = <<"EOSQL";
1149 CREATE TABLE
1150 SvnRevisionVssAction (
1151 revision_id INTEGER,
1152 action_id INTEGER
1154 EOSQL
1156 $sth = $gCfg{dbh}->prepare($sql);
1157 $sth->execute;
1159 $sql = <<"EOSQL";
1160 CREATE INDEX
1161 SvnRevisionVssAction_IDX1 ON SvnRevisionVssAction (
1162 revision_id ASC,
1163 action_id ASC
1165 EOSQL
1167 $sth = $gCfg{dbh}->prepare($sql);
1168 $sth->execute;
1170 my @cfgitems = qw(task step vssdir svnurl svnuser svnpwd ssphys tempdir
1171 setsvndate starttime);
1173 my $fielddef = join(",\n ",
1174 map {sprintf('%-12.12s VARCHAR', $_)} @cfgitems);
1176 $sql = <<"EOSQL";
1177 CREATE TABLE
1178 SystemInfo (
1179 $fielddef
1181 EOSQL
1183 $sth = $gCfg{dbh}->prepare($sql);
1184 $sth->execute;
1186 my $fields = join(', ', @cfgitems);
1187 my $args = join(', ', map {'?'} @cfgitems);
1189 $sql = <<"EOSQL";
1190 INSERT INTO
1191 SystemInfo ($fields)
1192 VALUES
1193 ($args)
1194 EOSQL
1196 $sth = $gCfg{dbh}->prepare($sql);
1197 $sth->execute(map {$gCfg{$_}} @cfgitems);
1198 $sth->finish();
1200 } # End InitSysTables
1202 ###############################################################################
1203 # ReloadSysTables
1204 ###############################################################################
1205 sub ReloadSysTables {
1206 my($sql, $sth, $sthup, $row, $field, $val);
1208 $sql = "SELECT * FROM SystemInfo";
1210 $sth = $gCfg{dbh}->prepare($sql);
1211 $sth->execute();
1213 $row = $sth->fetchrow_hashref();
1215 FIELD:
1216 while (($field, $val) = each %$row) {
1217 if (defined($gCfg{$field})) { # allow user to override saved vals
1218 $sql = "UPDATE SystemInfo SET $field = ?";
1219 $sthup = $gCfg{dbh}->prepare($sql);
1220 $sthup->execute($gCfg{$field});
1221 } else {
1222 $gCfg{$field} = $val;
1226 $sth->finish();
1227 &SetSystemTask($gCfg{task});
1229 } # End ReloadSysTables
1231 ###############################################################################
1232 # Initialize
1233 ###############################################################################
1234 sub Initialize {
1235 GetOptions(\%gCfg,'vssdir=s','tempdir=s','dumpfile=s','resume','verbose',
1236 'debug','timing+','task=s','revtimerange=i');
1238 &GiveHelp("Must specify --vssdir") if !defined($gCfg{vssdir});
1239 $gCfg{tempdir} = '.\\_vss2svn' if !defined($gCfg{tempdir});
1240 $gCfg{dumpfile} = 'vss2svn-dumpfile.txt' if !defined($gCfg{dumpfile});
1242 $gCfg{sqlitedb} = "$gCfg{tempdir}\\vss_data.db";
1244 # XML output from ssphysout placed here.
1245 $gCfg{ssphysout} = "$gCfg{tempdir}\\ssphysout";
1247 # Commit messages for SVN placed here.
1248 $gCfg{svncomment} = "$gCfg{tempdir}\\svncomment.tmp.txt";
1249 mkdir $gCfg{tempdir} unless (-d $gCfg{tempdir});
1251 # Directories for holding VSS revisions
1252 $gCfg{vssdata} = "$gCfg{tempdir}\\vssdata";
1254 if ($gCfg{resume} && !-e $gCfg{sqlitedb}) {
1255 warn "WARNING: --resume set but no database exists; starting new "
1256 . "conversion...";
1257 $gCfg{resume} = 0;
1260 if ($gCfg{debug}) {
1261 $gCfg{verbose} = 1;
1263 $gCfg{timing} = 0 unless defined $gCfg{timing};
1265 $gCfg{starttime} = scalar localtime($^T);
1267 ### Don't go past here if resuming a previous run ###
1268 if ($gCfg{resume}) {
1269 return 1;
1272 rmtree($gCfg{vssdata}) if (-e $gCfg{vssdata});
1273 mkdir $gCfg{vssdata};
1275 #foreach my $check (qw(svnurl)) {
1276 # &GiveHelp("ERROR: missing required parameter $check")
1277 # unless defined $gCfg{$check};
1280 $gCfg{ssphys} ||= 'SSPHYS.exe';
1281 $gCfg{svn} ||= 'SVN.exe';
1283 $gCfg{task} = 'INIT';
1284 $gCfg{step} = 0;
1285 } # End Initialize
1287 ###############################################################################
1288 # GiveHelp
1289 ###############################################################################
1290 sub GiveHelp {
1291 my($msg) = @_;
1293 $msg ||= 'Online Help';
1295 print <<"EOTXT";
1297 $msg
1299 USAGE: perl vss2svn.pl --vssdir <dir> [options]
1301 REQUIRED PARAMETERS:
1302 --vssdir <dir> : Directory where VSS database is located. This should be
1303 the directory in which the "srcsafe.ini" file is located.
1305 OPTIONAL PARAMETERS:
1306 --ssphys <path> : Full path to ssphys.exe program; uses PATH otherwise
1307 --tempdir <dir> : Temp directory to use during conversion;
1308 default is .\\_vss2svn
1309 --dumpfile <file> : specify the subversion dumpfile to be created;
1310 default is .\\vss2svn-dumpfile.txt
1311 --revtimerange <sec> : specify the difference between two ss actions
1312 that are treated as one subversion revision;
1313 default is 3600 seconds (== 1hour)
1315 --resume : Resume a failed or aborted previous run
1316 --task <task> : specify the task to resume; task is one of the following
1317 INIT, LOADVSSNAMES, FINDDBFILES, GETPHYSHIST,
1318 MERGEPARENTDATA, BUILDACTIONHIST, IMPORTSVN
1320 --verbose : Print more info about the items being processed
1321 --debug : Print lots of debugging info.
1322 --timing : Show timing information during various steps
1323 EOTXT
1325 exit(1);
1326 } # End GiveHelp
1328 # Following is the data for %gActionType. First field is the node type from
1329 # ssphys; second field is item type (1=project, 2=file); third field is the
1330 # generic action it should be mapped to (loosely mapped to SVN actions)
1332 # RollBack is only seen in combiation with a BranchFile activity, so actually
1333 # RollBack is the item view on the activity and BranchFile is the parent side
1334 # ==> map RollBack to BRANCH, so that we can join the two actions in the
1335 # MergeParentData step
1337 __DATA__
1338 CreatedProject 1 ADD
1339 AddedProject 1 ADD
1340 RenamedProject 1 RENAME
1341 MovedProjectTo 1 IGNORE
1342 MovedProjectFrom 1 MOVE
1343 DeletedProject 1 DELETE
1344 DestroyedProject 1 DELETE
1345 RecoveredProject 1 RECOVER
1346 CheckedIn 2 COMMIT
1347 CreatedFile 2 ADD
1348 AddedFile 2 ADD
1349 RenamedFile 2 RENAME
1350 DeletedFile 2 DELETE
1351 DestroyedFile 2 DELETE
1352 RecoveredFile 2 RECOVER
1353 SharedFile 2 SHARE
1354 BranchFile 2 BRANCH
1355 PinnedFile 2 IGNORE
1356 RollBack 2 BRANCH
1357 UnpinnedFile 2 IGNORE
1358 Labeled 2 IGNORE