2 ########################################################################
4 # Copyright (c) 2000, 2001 by Donald Sharp <sharpd@cisco.com>
7 # Some portions Copyright (c) 2002, 2003 by
8 # Derek R. Price <mailto:derek@ximbiot.com>
9 # & Ximbiot <http://ximbiot.com>.
10 # All rights reserved.
12 # Permission is granted to copy and/or distribute this file, with or
13 # without modifications, provided this notice is preserved.
15 # This program is free software; you can redistribute it and/or modify
16 # it under the terms of the GNU General Public License as published by
17 # the Free Software Foundation; either version 2, or (at your option)
20 # This program is distributed in the hope that it will be useful,
21 # but WITHOUT ANY WARRANTY; without even the implied warranty of
22 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 # GNU General Public License for more details.
25 ########################################################################
27 =head1 validate_repo.pl
29 Script to check the integrity of the Repository.
33 perldoc validate_repo.pl
34 validate_repo.pl --help [--verbose!]
35 validate_repo.pl [--verbose!] [--cvsroot=CVSROOT] [--exec=SCRIPT]...
36 [--all-revisions!] [module]...
40 This script will search through a repository and determine if any of the
41 files in it are corrupted.
43 This is normally accomplished by checking out all I<important> revisions, where
44 I<important> revisions are defined as the smallest set which, when checked out,
45 will cause each and every revision's integrity to be verified. This resolves
46 to the most recent revision on each branch and the first and last revisions on
49 Please do not run this script inside of the repository itself. This will cause
58 Print this very help text (or, with C<--verbose>, act like
59 C<perldoc validate_repo.pl>).
61 =item C<-a> or C<--all-revisions>
63 Check out each and every revision rather than just the I<important> ones.
64 This flag is useful with C<--exec> to execute the C<SCRIPT> (from C<--exec>
65 below) on a checked out copy of each and every revision.
67 =item C<-d> or C<--cvsroot=CVSROOT>
69 Use repository specified by C<CVSROOT>. Defaults to the contents of the
70 F<./CVS/Root> file when it exists and is readable, then to the contents of the
71 C<$CVSROOT> environment variable when it is set and non-empty.
73 =item C<-e> or C<--exec=SCRIPT>
75 Execute (as from command prompt) C<SCRIPT> if it exists as a file, is readable,
76 and is executable, or evaluate (as a perl script) C<SCRIPT> for a checked out
77 copy of each I<important> revision of each RCS archive in CVSROOT. Executed
78 scripts are passed C<CVSROOT FILE REVISION FNO>, where C<CVSROOT> is what
79 you'd think, C<FILE> is the path to the file relative to C<CVSROOT> and
80 suitable for use as an argument to C<cvs co>, C<cvs rlog>, and so on,
81 C<REVISION> is the revision of the checked out file, and C<FNO> is the file
82 number of the open, read-only file descriptor containing the checked out
83 contents of revision C<REVISION> of C<FILE>. An evaluated C<SCRIPT> will find
84 the same four arguments in the same order in C<@_>, except that C<FNO> will be
87 With C<--all-revisions>, execute or evaluate C<SCRIPT> for a checked out
88 version of each revsion in the RCS archive.
90 =item C<-v> or C<--verbose>
92 Print verbose debugging information (or, when specified with C<--help>, act
93 like C<perldoc validate_repo.pl>).
101 The module in the repository to examine. Defaults to the contents of the
102 F<./CVS/Repository> file when it exists and is readable, then to F<.>
107 setenv CVSROOT /release/111/cvs
111 validate_repo.pl -d /another/cvsroot --verbose --exec '
112 system "grep \"This string means Im a bad, bad file!\" <&"
115 or die "Revision $_[2] of $_[0]/$_[1],v is bad, bad, bad!"'
123 ######################################################################
125 ######################################################################
128 use Fcntl qw( F_GETFD F_SETFD );
137 ######################################################################
139 ######################################################################
148 @list_of_broken_files
151 $total_interesting_revisions
158 ######################################################################
160 ######################################################################
162 ######################################################################
168 # To search the repository for broken files
174 # $cvsroot - The CVS repository to search through.
175 # $ENV{ CVSROOT } - The default CVS repository to search through.
176 # @list_of_broken_files - The list of files that need to
178 # $verbose - is verbose mode on?
179 # @scripts - scripts to run on checked out files.
180 # $total_revisions - The number of revisions considered
181 # $total_interesting_revisions - The number of revisions used
182 # $total_files - The total number of files looked at.
185 # A list of broken files
188 # Do not run this script inside the repository. Choose
189 # a nice safe spot( like /tmp ) outside of the repository.
191 ######################################################################
198 $total_interesting_revisions = 0;
199 $total_revisions = 0;
201 Getopt::Long::Configure( "bundling" );
203 'all-revisions|a!' => \$all_revisions,
204 'cvsroot|d=s' => \$cvsroot,
205 'exec|e=s' => \@scripts,
206 'help|h|?!' => \$help,
207 'verbose|v!' => \$verbose
215 pod2usage( -exitval => 2,
216 -verbose => $verbose ? 2 : 1,
217 -output => \*STDOUT )
220 verbose( "Verbose Mode Turned On\n" );
222 if( !$cvsroot && -f "CVS/Root" && -r "CVS/Root" )
224 my $file = new IO::File "< CVS/Root";
225 $cvsroot = $file->getline;
228 $cvsroot = $ENV{'CVSROOT'} unless $cvsroot;
229 pod2usage( "error: Must set CVSROOT" ) unless $cvsroot;
231 if( $cvsroot =~ /^:\w+:/ && $cvsroot !~ /^:local:/
234 print STDERR "CVSROOT must be :local:\n";
240 $_ = File::Spec->rel2abs( $_ ) unless /\n/ || !-x $_;
244 if( !scalar( @ARGV ) && -f "CVS/Repository" && -r "CVS/Repository" )
246 my $file = new IO::File "< CVS/Repository";
247 my $module = $file->getline;
252 push @ARGV, "." unless( scalar @ARGV );
254 foreach my $directory_to_look_at ( @ARGV )
256 $directory_to_look_at = File::Spec->catfile( $cvsroot,
257 $directory_to_look_at );
260 while( -l $directory_to_look_at )
262 $directory_to_look_at = readlink( $directory_to_look_at );
264 die( "Encountered too many symlinks for CVSROOT ($cvsroot)\n" )
265 if( $sym_count > 5 );
268 # Remove indirections.
269 $directory_to_look_at =~ s#(/+.)*$##o;
271 verbose( "Processing: $directory_to_look_at\n" );
272 @ignore_files = get_ignore_files_from_cvsroot( $directory_to_look_at );
273 find( \&process_file, $directory_to_look_at );
276 print "List of corrupted files\n" if @list_of_broken_files;
277 foreach my $broken ( @list_of_broken_files )
279 print( "**** File: $broken\n" );
282 print "List of Files containing invalid revisions:\n"
284 foreach ( @invalid_revs )
286 print( "**** File: ($_->{'rev'}) $_->{'file'}\n" );
289 print "List of Files That Don't belong in Repository:\n"
291 foreach my $extra ( @extra_files )
293 print( "**** File: $extra\n" );
295 print( "Total Files: $total_files Corrupted files: "
296 . scalar( @list_of_broken_files )
298 . scalar( @invalid_revs )
300 . scalar( @extra_files )
301 . " Ignored Files: $ignored_files\n" );
302 print( "Total Revisions: $total_revisions Interesting Revisions: $total_interesting_revisions\n" );
309 print STDERR @_ if $verbose;
314 ######################################################################
320 # This function is called by the find function, its purpose
321 # is to decide if it is important to look at a file or not. When
322 # a file is important, we log it or call &look_at_cvs_file on it.
325 # 1) If the file is an archive file, we call &look_at_cvs_file on
327 # 2) Else, if the file is not in the ignore list, we store its name
334 # $cvsroot - The CVS repository to search through
335 # @ignore_files - File patterns we can afford to ignore.
336 # $File::Find::name - The absolute path of the file being examined.
344 ######################################################################
347 if( ! -d $File::Find::name )
349 my $path = $File::Find::name;
350 $path =~ s#^$cvsroot/(\./)*##;
353 verbose( "Examining `$path'\n" );
355 if( $path =~ s/,v$// )
357 look_at_cvs_file( $path );
359 elsif( !grep { $path =~ $_ } @ignore_files )
361 push @extra_files, $path;
362 verbose( "Adding unrecognized file `$path' to corrupted list.\n" );
367 verbose( "Ignoring `$path'\n" );
372 ######################################################################
378 # To decide if a file is broken or not. The algorithm is:
379 # a) Get the revision history for the file.
380 # - If that fails the file is broken, save the fact
381 # and continue processing other files.
382 # - If that succeeds we have a list of revisions.
383 # b) For each revision call &check_revision on the file.
384 # - If that fails the file is broken, save the fact
385 # and continue processing other files.
389 # $file - The path of the file to look at, relative to $cvsroot and
390 # suitable for use as an argument to `cvs co', `cvs rlog', and
391 # the rest of CVS's r* commands.
400 # We have to handle Attic files in a special manner.
401 # Basically remove the Attic from the string if it
402 # exists at the end of the $path variable.
404 ######################################################################
408 my( $name, $path ) = fileparse( $file );
410 $file = $path . $name if $path =~ s#Attic/$##;
412 my( $finfo, $rinfo ) = get_history( $file );
414 unless( defined $rinfo )
416 verbose( "\t`$file' is corrupted. It was determined to contain no\n"
417 . "\trevisions via a cvs rlog command\n" );
418 push( @list_of_broken_files, $file );
423 $all_revisions ? keys %$rinfo
424 : find_interesting_revisions( keys %$rinfo );
426 foreach my $revision ( @int_revisions )
428 verbose( "\t\tLooking at Revision: $revision\n" );
429 if( !check_revision( $file, $revision, $finfo, $rinfo ) )
431 verbose( "\t$file is corrupted in revision: $revision\n" );
432 push( @list_of_broken_files, $file );
438 ######################################################################
444 # To retrieve an array of revision numbers.
447 # $file - The file to retrieve the revision numbers for
450 # $cvsroot - the CVSROOT we are examining
453 # On Success - A hash of revision info, indexed by revision numbers.
454 # On Failure - undef.
457 # The $_ is saved off because The File::find functionality
458 # expects the $_ to not have been changed.
459 # The -N option for the rlog command means to spit out
460 # tags or branch names.
462 ######################################################################
466 $file =~ s/(["\$`\\])/\\$1/g;
467 my %finfo; # Info about the file.
468 my %rinfo; # Info about revisions in the file.
471 my $fh = new IO::File( "cvs -d $cvsroot rlog -N \"$file\""
472 . ($verbose ? "" : " 2>&1") . " |" )
473 or die( "unable to run `cvs rlog', help" );
476 while( my $line = $fh->getline )
480 if( ( $revision ) = $line =~ /^revision (.*?)(\tlocked by: \S+;)?$/ )
482 unless($revision =~ m/^\d+\.\d+(?:\.\d+\.\d+)*$/)
484 push @invalid_revs, { 'file' => $file, 'rev' => $revision };
485 verbose( "Adding invalid revision `$revision' of file `$file' to invalid revs list.\n" );
492 # We require ---- before a ^revision tag, not a revision
498 if( my ( $date, $author, $state ) =
499 $line =~ /^date: (\S+ \S+); author: ([^;]+); state: (\S+);/ )
510 die "Couldn't read date/author/state for revision $revision\n"
511 . "of $file from `cvs rlog'.\n"
519 # Until we find the first ---- below, we can read general file info
521 $line =~ /^keyword substitution: (\S+)$/ )
523 $finfo{'kwmode'} = $kwmode;
527 # rlog outputs a "----" line before the actual revision
528 # without this we'll pick up peoples comments if they
529 # happen to start with revision
530 if( $line =~ /^----------------------------$/ )
532 # Catch this case when $ignore == -1 or 0
541 verbose( "Revision $_: " );
542 verbose( join( ", ", %{$rinfo{$_}} ) );
547 die "Syserr closing pipe from `cvs co': $!"
548 if !$fh->close && $!;
551 return( \%finfo, %rinfo ? \%rinfo : undef );
554 ######################################################################
560 # Given a file and a revision number ensure that we can check out that
563 # If the user has specified any scripts (passed in as arguments to --exec
564 # and stored in @scripts), run them on the checked out revision. If
565 # executable scripts exit with a non-zero status or evaluated scripts set
566 # $@ (die), print $status or $@ as a warning.
569 # $file - The file to look at.
570 # $revision - The revision to look at.
571 # $rinfo - A reference to a hash containing information about the
572 # revisions in $file.
573 # For instance, $rinfo->{$revision}->{'date'} contains the
574 # date revision $revision was committed.
580 # If we can get the File - 1
581 # If we can not get the File - 0
584 # cvs command line options are as followed:
585 # -n - Do not run any checkout program as specified by the -o
586 # option in the modules file
587 # -p - Put all output to standard out.
588 # -r - The revision of the file that we would like to look at.
589 # -ko - Get the revision exactly as checked in - do not allow
590 # RCS keyword substitution.
591 # Please note that cvs will return 0 for being able to successfully
592 # read the file and 1 for failure to read the file.
594 ######################################################################
597 my( $file, $revision, $finfo, $rinfo ) = @_;
598 $file =~ s/(["\$`\\])/\\$1/g;
600 # Allow binaries to be checked out as such. Otherwise, use -ko to avoid
601 # replacing keywords in the files.
602 my $kwmode = $finfo->{'kwmode'} eq 'b' ? '' : ' -ko';
603 my $command = "cvs -d $cvsroot co$kwmode -npr $revision \"$file\"";
605 verbose( "Executing `$command'.\n" );
608 my $fh = new IO::File $command . ($verbose ? "" : " 2>&1") . " |";
609 fcntl( $fh, F_SETFD, 0 )
610 or die "Can't clear close-on-exec flag on filehandle: $!";
612 foreach my $script (@scripts)
615 if( $script !~ /\n/ && -x $script )
617 # exec external script
618 my $status = system $script, $cvsroot, $file, $revision,
620 warn "`$script $cvsroot $file $revision "
622 . "' exited with code $status"
628 @_ = ($cvsroot, $file, $revision, $fh);
630 warn "script $count ($cvsroot, $file, $revision, $fh) exited abnormally: $@"
634 # Read any data left so the close will work even if our called script
635 # didn't finish reading the data.
636 () = $fh->getlines; # force list context
637 die "Syserr closing pipe from `cvs co': $!"
638 if !$fh->close && $!;
643 $ret_code = 0xffff & system "$command >/dev/null 2>&1";
649 ######################################################################
652 # find_interesting_revisions
655 # CVS stores information in a logical manner. We only really
656 # need to look at some interestin revisions. These are:
658 # And the last version on every branch.
659 # This is because cvs stores changes descending from
660 # main line. ie suppose the last version on mainline is 1.6
661 # version 1.6 of the file is stored in toto. version 1.5
662 # is stored as a diff between 1.5 and 1.6. 1.4 is stored
663 # as a diff between 1.5 and 1.4.
664 # branches are stored a little differently. They are
665 # stored in ascending order. Suppose there is a branch
666 # on 1.4 of the file. The first branches revision number
667 # would be 1.4.1.1. This is stored as a diff between
668 # version 1.4 and 1.4.1.1. The 1.4.1.2 version is stored
669 # as a diff between 1.4.1.1 and 1.4.1.2. Therefore
670 # we are only interested in the earliest revision number
671 # and the highest revision number on a branch.
674 # @revisions - The list of revisions to find interesting ones
680 # @new_revisions - The list of revisions that we find interesting
684 ######################################################################
685 sub find_interesting_revisions
687 my( @revisions ) = @_;
689 my %max_branch_revision;
695 foreach my $revision( @revisions )
697 ( $branch_number, $branch_rev ) = branch_split( $revision );
698 $max_branch_revision{$branch_number} = $branch_rev
699 if( !exists $max_branch_revision{$branch_number}
700 || $max_branch_revision{$branch_number} < $branch_rev );
703 push( @new_revisions, "1.1" ) unless (exists $max_branch_revision{1}
704 && $max_branch_revision{1} == 1);
705 while( ( $key, $value ) = each ( %max_branch_revision ) )
707 push( @new_revisions, $key . "." . $value );
714 $nrc = @new_revisions;
716 $total_revisions += $rc;
717 $total_interesting_revisions += $nrc;
719 verbose( "\t\tTotal Revisions: $rc Interesting Revisions: $nrc\n" );
721 return( @new_revisions );
726 ######################################################################
732 # To split up a revision number up into the branch part and
733 # the number part. For Instance:
734 # 1.1.1.1 - is split 1.1.1 and 1
735 # 2.1 - is split 2 and 1
736 # 1.3.4.5.7.8 - is split 1.3.4.5.7 and 8
739 # $revision - The revision to look at.
745 # ( $branch, $revision ) -
746 # $branch - The branch part of the revision number
747 # $revision - The revision part of the revision number
752 ######################################################################
755 my( $revision ) = @_;
761 @split_rev = split /\./, $revision;
763 my $numbers = @split_rev;
764 @split_rev = reverse( @split_rev );
765 $branch = pop( @split_rev );
766 for( $count = 0; $count < $numbers - 2 ; $count++ )
768 $branch .= "." . pop( @split_rev );
771 return( $branch, pop( @split_rev ) );
774 ######################################################################
777 # get_ignore_files_from_cvsroot
780 # Retrieve the list of files from the CVSROOT/ directory
781 # that should be ignored.
782 # These are the regular files (e.g., commitinfo, loginfo)
783 # and those specified in the checkoutlist file.
792 # @ignore - the list of files to ignore
797 ######################################################################
798 sub get_ignore_files_from_cvsroot {
802 qr{^(./)?CVSROOT/.#[^/]*$}o,
803 qr{^(./)?CVSROOT/checkoutlist$}o,
804 qr{^(./)?CVSROOT/commitinfo$}o,
805 qr{^(./)?CVSROOT/config$}o,
806 qr{^(./)?CVSROOT/cvsignore$}o,
807 qr{^(./)?CVSROOT/cvswrappers$}o,
808 qr{^(./)?CVSROOT/editinfo$}o,
809 qr{^(./)?CVSROOT/history$}o,
810 qr{^(./)?CVSROOT/loginfo$}o,
811 qr{^(./)?CVSROOT/modules$}o,
812 qr{^(./)?CVSROOT/notify$}o,
813 qr{^(./)?CVSROOT/passwd$}o,
814 qr{^(./)?CVSROOT/postadmin$}o,
815 qr{^(./)?CVSROOT/postproxy$}o,
816 qr{^(./)?CVSROOT/posttag$}o,
817 qr{^(./)?CVSROOT/postwatch$}o,
818 qr{^(./)?CVSROOT/preproxy$}o,
819 qr{^(./)?CVSROOT/rcsinfo$}o,
820 qr{^(./)?CVSROOT/readers$}o,
821 qr{^(./)?CVSROOT/taginfo$}o,
822 qr{^(./)?CVSROOT/val-tags$}o,
823 qr{^(./)?CVSROOT/verifymsg$}o,
824 qr{^(./)?CVSROOT/writers$}o
827 my $checkoutlist_file = "$cvsroot/CVSROOT/checkoutlist";
828 if( -f $checkoutlist_file && -r $checkoutlist_file )
830 my $fh = new IO::File "<$checkoutlist_file"
831 or die "Unable to read checkoutlist file ($checkoutlist_file): $!\n";
833 my @list = $fh->getlines;
835 $fh->close or die( "Unable to close checkoutlist file: $!\n" );
837 foreach my $line( @list )
839 next if( $line =~ /^#/ || $line =~ /^\s*$/ );
840 $line =~ s/^\s*(\S+)(\s+.*)?$/$1/;
841 push @ignore, qr{^(./)?CVSROOT/$line$};
856 # vim:tabstop=4:shiftwidth=4