crypto/dist/ipsec-tools/misc/cvs2cl.pl

   1 #!/bin/sh
   2 exec perl -w -x "$0" ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5
   6 ##############################################################
   7 ###                                                        ###
   8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
   9 ###                                                        ###
  10 ##############################################################
  11
  12 ## $Revision: 1.1 $
  13 ## $Date: 2009/01/20 14:36:08 $
  14 ## $Author: tteras $
  15 ##
  16
  17 use strict;
  18
  19 use File::Basename qw( fileparse );
  20 use Getopt::Long   qw( GetOptions );
  21 use Text::Wrap     qw( );
  22 use Time::Local    qw( timegm );
  23 use User::pwent    qw( getpwnam );
  24
  25 # The Plan:
  26 #
  27 # Read in the logs for multiple files, spit out a nice ChangeLog that
  28 # mirrors the information entered during `cvs commit'.
  29 #
  30 # The problem presents some challenges. In an ideal world, we could
  31 # detect files with the same author, log message, and checkin time --
  32 # each <filelist, author, time, logmessage> would be a changelog entry.
  33 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  34 # so checkins can span a range of times.  Also, the directory structure
  35 # could be hierarchical.
  36 #
  37 # Another question is whether we really want to have the ChangeLog
  38 # exactly reflect commits. An author could issue two related commits,
  39 # with different log entries, reflecting a single logical change to the
  40 # source. GNU style ChangeLogs group these under a single author/date.
  41 # We try to do the same.
  42 #
  43 # So, we parse the output of `cvs log', storing log messages in a
  44 # multilevel hash that stores the mapping:
  45 #   directory => author => time => message => filelist
  46 # As we go, we notice "nearby" commit times and store them together
  47 # (i.e., under the same timestamp), so they appear in the same log
  48 # entry.
  49 #
  50 # When we've read all the logs, we twist this mapping into
  51 # a time => author => message => filelist mapping for each directory.
  52 #
  53 # If we're not using the `--distributed' flag, the directory is always
  54 # considered to be `./', even as descend into subdirectories.
  55
  56 # Call Tree
  57
  58 # name                         number of lines (10.xii.03)
  59 # parse_options                         192
  60 # derive_changelog                       13
  61 # +-maybe_grab_accumulation_date         38
  62 # +-read_changelog                      277
  63 #   +-maybe_read_user_map_file           94
  64 #     +-run_ext                           9
  65 #   +-read_file_path                     29
  66 #   +-read_symbolic_name                 43
  67 #   +-read_revision                      49
  68 #   +-read_date_author_and_state         25
  69 #     +-parse_date_author_and_state      20
  70 #   +-read_branches                      36
  71 # +-output_changelog                    424
  72 #   +-pretty_file_list                  290
  73 #     +-common_path_prefix               35
  74 #   +-preprocess_msg_text                30
  75 #     +-min                               1
  76 #   +-mywrap                             16
  77 #   +-last_line_len                       5
  78 #   +-wrap_log_entry                    177
  79 #
  80 # Utilities
  81 #
  82 # xml_escape                              6
  83 # slurp_file                             11
  84 # debug                                   5
  85 # version                                 2
  86 # usage                                 142
  87
  88 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
  89 #
  90 # Note about a bug-slash-opportunity:
  91 # -----------------------------------
  92 #
  93 # There's a bug in Text::Wrap, which affects cvs2cl.  This script
  94 # reveals it:
  95 #
  96 #   #!/usr/bin/perl -w
  97 #
  98 #   use Text::Wrap;
  99 #
 100 #   my $test_text =
 101 #   "This script demonstrates a bug in Text::Wrap.  The very long line
 102 #   following this paragraph will be relocated relative to the surrounding
 103 #   text:
 104 #
 105 #   ====================================================================
 106 #
 107 #   See?  When the bug happens, we'll get the line of equal signs below
 108 #   this paragraph, even though it should be above.";
 109 #
 110 #
 111 #   # Print out the test text with no wrapping:
 112 #   print "$test_text";
 113 #   print "\n";
 114 #   print "\n";
 115 #
 116 #   # Now print it out wrapped, and see the bug:
 117 #   print wrap ("\t", "        ", "$test_text");
 118 #   print "\n";
 119 #   print "\n";
 120 #
 121 # If the line of equal signs were one shorter, then the bug doesn't
 122 # happen.  Interesting.
 123 #
 124 # Anyway, rather than fix this in Text::Wrap, we might as well write a
 125 # new wrap() which has the following much-needed features:
 126 #
 127 # * initial indentation, like current Text::Wrap()
 128 # * subsequent line indentation, like current Text::Wrap()
 129 # * user chooses among: force-break long words, leave them alone, or die()?
 130 # * preserve existing indentation: chopped chunks from an indented line
 131 #   are indented by same (like this line, not counting the asterisk!)
 132 # * optional list of things to preserve on line starts, default ">"
 133 #
 134 # Note that the last two are essentially the same concept, so unify in
 135 # implementation and give a good interface to controlling them.
 136 #
 137 # And how about:
 138 #
 139 # Optionally, when encounter a line pre-indented by same as previous
 140 # line, then strip the newline and refill, but indent by the same.
 141 # Yeah...
 142
 143 # Globals --------------------------------------------------------------------
 144
 145 # In case we have to print it out:
 146 my $VERSION = '$Revision: 1.1 $';
 147 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
 148
 149 ## Vars set by options:
 150
 151 # Print debugging messages?
 152 my $Debug = 0;
 153
 154 # Just show version and exit?
 155 my $Print_Version = 0;
 156
 157 # Just print usage message and exit?
 158 my $Print_Usage = 0;
 159
 160 # What file should we generate (defaults to "ChangeLog")?
 161 my $Log_File_Name = "ChangeLog";
 162
 163 # Grab most recent entry date from existing ChangeLog file, just add
 164 # to that ChangeLog.
 165 my $Cumulative = 0;
 166
 167 # `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
 168 # as it guarantees at least one entry in the update changelog, which means
 169 # that there will always be a date to extract for the next update.  The repeat
 170 # entry can be removed in postprocessing, if necessary.
 171
 172 # MJP 2003-08-02
 173 # I don't think this actually does anything useful
 174 my $Update = 0;
 175
 176 # Expand usernames to email addresses based on a map file?
 177 my $User_Map_File = '';
 178 my $User_Passwd_File;
 179 my $Mail_Domain;
 180
 181 # Output log in chronological order? [default is reverse chronological order]
 182 my $Chronological_Order = 0;
 183
 184 # Grab user details via gecos
 185 my $Gecos = 0;
 186
 187 # User domain for gecos email addresses
 188 my $Domain;
 189
 190 # Output to a file or to stdout?
 191 my $Output_To_Stdout = 0;
 192
 193 # Eliminate empty log messages?
 194 my $Prune_Empty_Msgs = 0;
 195
 196 # Tags of which not to output
 197 my %ignore_tags;
 198
 199 # Show only revisions with Tags
 200 my %show_tags;
 201
 202 # Don't call Text::Wrap on the body of the message
 203 my $No_Wrap = 0;
 204
 205 # Indentation of log messages
 206 my $Indent = "\t";
 207
 208 # Don't do any pretty print processing
 209 my $Summary = 0;
 210
 211 # Separates header from log message.  Code assumes it is either " " or
 212 # "\n\n", so if there's ever an option to set it to something else,
 213 # make sure to go through all conditionals that use this var.
 214 my $After_Header = " ";
 215
 216 # XML Encoding
 217 my $XML_Encoding = '';
 218
 219 # Format more for programs than for humans.
 220 my $XML_Output = 0;
 221 my $No_XML_Namespace = 0;
 222 my $No_XML_ISO_Date = 0;
 223
 224 # Do some special tweaks for log data that was written in FSF
 225 # ChangeLog style.
 226 my $FSF_Style = 0;
 227
 228 # Set iff output should be like an FSF-style ChangeLog.
 229 my $FSF_Output = 0;
 230
 231 # Show times in UTC instead of local time
 232 my $UTC_Times = 0;
 233
 234 # Show times in output?
 235 my $Show_Times = 1;
 236
 237 # Show day of week in output?
 238 my $Show_Day_Of_Week = 0;
 239
 240 # Show revision numbers in output?
 241 my $Show_Revisions = 0;
 242
 243 # Show dead files in output?
 244 my $Show_Dead = 0;
 245
 246 # Hide dead trunk files which were created as a result of additions on a
 247 # branch?
 248 my $Hide_Branch_Additions = 1;
 249
 250 # Show tags (symbolic names) in output?
 251 my $Show_Tags = 0;
 252
 253 # Show tags separately in output?
 254 my $Show_Tag_Dates = 0;
 255
 256 # Show branches by symbolic name in output?
 257 my $Show_Branches = 0;
 258
 259 # Show only revisions on these branches or their ancestors.
 260 my @Follow_Branches;
 261 # Show only revisions on these branches or their ancestors; ignore descendent
 262 # branches.
 263 my @Follow_Only;
 264
 265 # Don't bother with files matching this regexp.
 266 my @Ignore_Files;
 267
 268 # How exactly we match entries.  We definitely want "o",
 269 # and user might add "i" by using --case-insensitive option.
 270 my $Case_Insensitive = 0;
 271
 272 # Maybe only show log messages matching a certain regular expression.
 273 my $Regexp_Gate = '';
 274
 275 # Show tags only matching certain regular expression.
 276 my $Regexp_Tag = '';
 277
 278 # Pass this global option string along to cvs, to the left of `log':
 279 my $Global_Opts = '';
 280
 281 # Pass this option string along to the cvs log subcommand:
 282 my $Command_Opts = '';
 283
 284 # Read log output from stdin instead of invoking cvs log?
 285 my $Input_From_Stdin = 0;
 286
 287 # Don't show filenames in output.
 288 my $Hide_Filenames = 0;
 289
 290 # Don't shorten directory names from filenames.
 291 my $Common_Dir = 1;
 292
 293 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 294 # times that span a range of time. We assume that checkins will last no
 295 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 296 # checkins will happen from the same users with the same message less
 297 # than $Max_Checkin_Duration seconds apart.
 298 my $Max_Checkin_Duration = 180;
 299
 300 # What to put at the front of [each] ChangeLog.
 301 my $ChangeLog_Header = '';
 302
 303 # Whether to enable 'delta' mode, and for what start/end tags.
 304 my $Delta_Mode = 0;
 305 my $Delta_From = '';
 306 my $Delta_To = '';
 307
 308 my $TestCode;
 309
 310 # Whether to parse filenames from the RCS filename, and if so what
 311 # prefix to strip.
 312 my $RCS_Root;
 313
 314 # Whether to output information on the # of lines added and removed
 315 # by each file modification.
 316 my $Show_Lines_Modified = 0;
 317
 318 ## end vars set by options.
 319
 320 # latest observed times for the start/end tags in delta mode
 321 my $Delta_StartTime = 0;
 322 my $Delta_EndTime = 0;
 323
 324 my $No_Ancestors = 0;
 325
 326 my $No_Extra_Indent = 0;
 327
 328 my $GroupByDate = 0;
 329 my $GroupByAuthor = 0;
 330
 331 # ----------------------------------------------------------------------------
 332
 333 package CVS::Utils::ChangeLog::EntrySet;
 334
 335 sub new {
 336   my $class = shift;
 337   my %self;
 338   bless \%self, $class;
 339 }
 340
 341 # -------------------------------------
 342
 343 sub output_changelog {
 344   my $output_type = $XML_Output ? 'XML' : 'Text';
 345   my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
 346   my $output = $output_class->new(follow_branches => \@Follow_Branches,
 347                                   follow_only     => \@Follow_Only,
 348                                   ignore_tags     => \%ignore_tags,
 349                                   show_tags       => \%show_tags,
 350                                  );
 351   $output->output_changelog(@_);
 352 }
 353
 354 # -------------------------------------
 355
 356 sub add_fileentry {
 357   my ($self, $file_full_path, $time, $revision, $state, $lines,
 358       $branch_names, $branch_roots, $branch_numbers,
 359       $symbolic_names, $author, $msg_txt) = @_;
 360
 361       my $qunk =
 362         CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
 363                                               $state, $lines,
 364                                               $branch_names, $branch_roots,
 365                                               $branch_numbers,
 366                                               $symbolic_names);
 367
 368       # We might be including revision numbers and/or tags and/or
 369       # branch names in the output.  Most of the code from here to
 370       # loop-end deals with organizing these in qunk.
 371
 372       unless ( $Hide_Branch_Additions
 373                and
 374                $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
 375         # Add this file to the list
 376         # (We use many spoonfuls of autovivication magic. Hashes and arrays
 377         # will spring into existence if they aren't there already.)
 378
 379         &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
 380
 381         # Store with the files in this commit.  Later we'll loop through
 382         # again, making sure that revisions with the same log message
 383         # and nearby commit times are grouped together as one commit.
 384         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
 385           CVS::Utils::ChangeLog::Message->new($msg_txt)
 386               unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
 387         $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
 388       }
 389
 390 }
 391
 392 # ----------------------------------------------------------------------------
 393
 394 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
 395
 396 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 397
 398 use File::Basename qw( fileparse );
 399
 400 sub new {
 401   my $class = shift;
 402   my $self = $class->SUPER::new(@_);
 403 }
 404
 405 # -------------------------------------
 406
 407 sub wday {
 408   my $self = shift; my $class = ref $self;
 409   my ($wday) = @_;
 410
 411   return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
 412 }
 413
 414 # -------------------------------------
 415
 416 sub header_line {
 417   my $self = shift;
 418   my ($time, $author, $lastdate, $lastauthor) = @_;
 419
 420   my $header_line = '';
 421
 422   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 423     = $UTC_Times ? gmtime($time) : localtime($time);
 424
 425   my $date = $self->fdatetime($time);
 426
 427   if ($Show_Times) {
 428     $header_line = sprintf "%s  %s\n\n", $date, $author;
 429   } else {
 430     if ( $GroupByDate and ($date eq $lastdate) and
 431          ((!$GroupByAuthor) or ($author eq $lastauthor)) ) {
 432       $header_line = '';
 433     } else {
 434       if ( $GroupByDate and ! $GroupByAuthor ) {
 435         $header_line = "$date\n\n";
 436       } else {
 437         $header_line = "$date  $author\n\n";
 438       }
 439     }
 440   }
 441 }
 442
 443 # -------------------------------------
 444
 445 sub preprocess_msg_text {
 446   my $self = shift;
 447   my ($text) = @_;
 448
 449   $text = $self->SUPER::preprocess_msg_text($text);
 450
 451   unless ( $No_Wrap ) {
 452     # Strip off lone newlines, but only for lines that don't begin with
 453     # whitespace or a mail-quoting character, since we want to preserve
 454     # that kind of formatting.  Also don't strip newlines that follow a
 455     # period; we handle those specially next.  And don't strip
 456     # newlines that precede an open paren.
 457     1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
 458
 459     # If a newline follows a period, make sure that when we bring up the
 460     # bottom sentence, it begins with two spaces.
 461     1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
 462   }
 463
 464   return $text;
 465 }
 466
 467 # -------------------------------------
 468
 469 # Here we take a bunch of qunks and convert them into printed
 470 # summary that will include all the information the user asked for.
 471 sub pretty_file_list {
 472   my $self = shift;
 473
 474   return ''
 475     if $Hide_Filenames;
 476
 477   my $qunksref = shift;
 478
 479   my @filenames;
 480   my $beauty = '';          # The accumulating header string for this entry.
 481   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 482   my %unanimous_tags;       # Tags found in all qunks
 483   my %all_branches;         # Branches found in any qunk
 484   my $fbegun = 0;           # Did we begin printing filenames yet?
 485
 486   my ($common_dir, $qunkrefs) =
 487     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
 488
 489   my @qunkrefs = @$qunkrefs;
 490
 491   # Not XML output, so complexly compactify for chordate consumption.  At this
 492   # point we have enough global information about all the qunks to organize
 493   # them non-redundantly for output.
 494
 495   if ($common_dir) {
 496     # Note that $common_dir still has its trailing slash
 497     $beauty .= "$common_dir: ";
 498   }
 499
 500   if ($Show_Branches)
 501   {
 502     # For trailing revision numbers.
 503     my @brevisions;
 504
 505     foreach my $branch (keys (%all_branches))
 506     {
 507       foreach my $qunkref (@qunkrefs)
 508       {
 509         if ((defined ($qunkref->branch))
 510             and ($qunkref->branch eq $branch))
 511         {
 512           if ($fbegun) {
 513             # kff todo: comma-delimited in XML too?  Sure.
 514             $beauty .= ", ";
 515           }
 516           else {
 517             $fbegun = 1;
 518           }
 519           my $fname = substr ($qunkref->filename, length ($common_dir));
 520           $beauty .= $fname;
 521           $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
 522
 523           if ( $Show_Tags and defined $qunkref->tags ) {
 524             my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 525
 526             if (@tags) {
 527               $beauty .= " (tags: ";
 528               $beauty .= join (', ', @tags);
 529               $beauty .= ")";
 530             }
 531           }
 532
 533           if ($Show_Revisions) {
 534             # Collect the revision numbers' last components, but don't
 535             # print them -- they'll get printed with the branch name
 536             # later.
 537             $qunkref->revision =~ /.+\.([\d]+)$/;
 538             push (@brevisions, $1);
 539
 540             # todo: we're still collecting branch roots, but we're not
 541             # showing them anywhere.  If we do show them, it would be
 542             # nifty to just call them revision "0" on a the branch.
 543             # Yeah, that's the ticket.
 544           }
 545         }
 546       }
 547       $beauty .= " ($branch";
 548       if (@brevisions) {
 549         if ((scalar (@brevisions)) > 1) {
 550           $beauty .= ".[";
 551           $beauty .= (join (',', @brevisions));
 552           $beauty .= "]";
 553         }
 554         else {
 555           # Square brackets are spurious here, since there's no range to
 556           # encapsulate
 557           $beauty .= ".$brevisions[0]";
 558         }
 559       }
 560       $beauty .= ")";
 561     }
 562   }
 563
 564   # Okay; any qunks that were done according to branch are taken care
 565   # of, and marked as printed.  Now print everyone else.
 566
 567   my %fileinfo_printed;
 568   foreach my $qunkref (@qunkrefs)
 569   {
 570     next if (defined ($qunkref->{'printed'}));   # skip if already printed
 571
 572     my $b = substr ($qunkref->filename, length ($common_dir));
 573     # todo: Shlomo's change was this:
 574     # $beauty .= substr ($qunkref->filename,
 575     #              (($common_dir eq "./") ? '' : length ($common_dir)));
 576     $qunkref->{'printed'} = 1;  # Set a mark bit.
 577
 578     if ($Show_Revisions || $Show_Tags || $Show_Dead)
 579     {
 580       my $started_addendum = 0;
 581
 582       if ($Show_Revisions) {
 583         $started_addendum = 1;
 584         $b .= " (";
 585         $b .= $qunkref->revision;
 586       }
 587       if ($Show_Dead && $qunkref->state =~ /dead/)
 588       {
 589         # Deliberately not using $started_addendum. Keeping it simple.
 590         $b .= "[DEAD]";
 591       }
 592       if ($Show_Tags && (defined $qunkref->tags)) {
 593         my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
 594         if ((scalar (@tags)) > 0) {
 595           if ($started_addendum) {
 596             $b .= ", ";
 597           }
 598           else {
 599             $b .= " (tags: ";
 600           }
 601           $b .= join (', ', @tags);
 602           $started_addendum = 1;
 603         }
 604       }
 605       if ($started_addendum) {
 606         $b .= ")";
 607       }
 608     }
 609
 610     unless ( exists $fileinfo_printed{$b} ) {
 611       if ($fbegun) {
 612         $beauty .= ", ";
 613       } else {
 614         $fbegun = 1;
 615       }
 616       $beauty .= $b, $fileinfo_printed{$b} = 1;
 617     }
 618   }
 619
 620   # Unanimous tags always come last.
 621   if ($Show_Tags && %unanimous_tags)
 622   {
 623     $beauty .= " (utags: ";
 624     $beauty .= join (', ', sort keys (%unanimous_tags));
 625     $beauty .= ")";
 626   }
 627
 628   # todo: still have to take care of branch_roots?
 629
 630   $beauty = "$beauty:";
 631
 632   return $beauty;
 633 }
 634
 635 # -------------------------------------
 636
 637 sub output_tagdate {
 638   my $self = shift;
 639   my ($fh, $time, $tag) = @_;
 640
 641   my $fdatetime = $self->fdatetime($time);
 642   print $fh "$fdatetime  tag $tag\n\n";
 643   return;
 644 }
 645
 646 # -------------------------------------
 647
 648 sub format_body {
 649   my $self = shift;
 650   my ($msg, $files, $qunklist) = @_;
 651
 652   my $body;
 653
 654   if ( $No_Wrap and ! $Summary ) {
 655     $msg = $self->preprocess_msg_text($msg);
 656     $files = $self->mywrap("\t", "\t  ", "* $files");
 657     $msg =~ s/\n(.+)/\n$Indent$1/g;
 658     unless ($After_Header eq " ") {
 659       $msg =~ s/^(.+)/$Indent$1/g;
 660     }
 661     if ( $Hide_Filenames ) {
 662       $body = $After_Header . $msg;
 663     } else {
 664       $body = $files . $After_Header . $msg;
 665     }
 666   } elsif ( $Summary ) {
 667     my ($filelist, $qunk);
 668     my (@DeletedQunks, @AddedQunks, @ChangedQunks);
 669
 670     $msg = $self->preprocess_msg_text($msg);
 671     #
 672     #     Sort the files (qunks) according to the operation that was
 673     # performed.  Files which were added have no line change
 674     # indicator, whereas deleted files have state dead.
 675     #
 676     foreach $qunk ( @$qunklist ) {
 677       if ( "dead" eq $qunk->state) {
 678         push @DeletedQunks, $qunk;
 679       } elsif ( ! defined $qunk->lines ) {
 680         push @AddedQunks, $qunk;
 681       } else {
 682         push @ChangedQunks, $qunk;
 683       }
 684     }
 685     #
 686     #     The qunks list was  originally in tree search order.  Let's
 687     # get that back.  The lists, if they exist, will be reversed upon
 688     # processing.
 689     #
 690
 691     #
 692     #     Now write the three sections onto $filelist
 693     #
 694     if ( @DeletedQunks ) {
 695       $filelist .= "\tDeleted:\n";
 696       foreach $qunk ( @DeletedQunks ) {
 697         $filelist .= "\t\t" . $qunk->filename;
 698         $filelist .= " (" . $qunk->revision . ")";
 699         $filelist .= "\n";
 700       }
 701       undef @DeletedQunks;
 702     }
 703
 704     if ( @AddedQunks ) {
 705       $filelist .= "\tAdded:\n";
 706       foreach $qunk (@AddedQunks) {
 707         $filelist .= "\t\t" . $qunk->filename;
 708         $filelist .= " (" . $qunk->revision . ")";
 709         $filelist .= "\n";
 710       }
 711       undef @AddedQunks ;
 712     }
 713
 714     if ( @ChangedQunks ) {
 715       $filelist .= "\tChanged:\n";
 716       foreach $qunk (@ChangedQunks) {
 717         $filelist .= "\t\t" . $qunk->filename;
 718         $filelist .= " (" . $qunk->revision . ")";
 719         $filelist .= ", \"" . $qunk->state . "\"";
 720         $filelist .= ", lines: " . $qunk->lines;
 721         $filelist .= "\n";
 722       }
 723       undef @ChangedQunks;
 724     }
 725
 726     chomp $filelist;
 727
 728     if ( $Hide_Filenames ) {
 729       $filelist = '';
 730     }
 731
 732     $msg =~ s/\n(.*)/\n$Indent$1/g;
 733     unless ( $After_Header eq " " or $FSF_Style ) {
 734       $msg =~ s/^(.*)/$Indent$1/g;
 735     }
 736
 737     unless ( $No_Wrap ) {
 738       if ( $FSF_Style ) {
 739         $msg = $self->wrap_log_entry($msg, '', 69, 69);
 740         chomp($msg);
 741         chomp($msg);
 742       } else {
 743         $msg = $self->mywrap('', $Indent, "$msg");
 744         $msg =~ s/[ \t]+\n/\n/g;
 745       }
 746     }
 747
 748     $body = $filelist . $After_Header . $msg;
 749   } else {  # do wrapping, either FSF-style or regular
 750     my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
 751
 752     if ( $FSF_Style ) {
 753       $files = $self->mywrap($Indent, $latter_wrap, "* $files");
 754
 755       my $files_last_line_len = 0;
 756       if ( $After_Header eq " " ) {
 757         $files_last_line_len = $self->last_line_len($files);
 758         $files_last_line_len += 1;  # for $After_Header
 759       }
 760
 761       $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
 762       $body = $files . $After_Header . $msg;
 763     } else {  # not FSF-style
 764       $msg = $self->preprocess_msg_text($msg);
 765       $body = $files . $After_Header . $msg;
 766       $body = $self->mywrap($Indent, $latter_wrap, "* $body");
 767       $body =~ s/[ \t]+\n/\n/g;
 768     }
 769   }
 770
 771   return $body;
 772 }
 773
 774 # ----------------------------------------------------------------------------
 775
 776 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
 777
 778 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
 779
 780 use File::Basename qw( fileparse );
 781
 782 sub new {
 783   my $class = shift;
 784   my $self = $class->SUPER::new(@_);
 785 }
 786
 787 # -------------------------------------
 788
 789 sub header_line {
 790   my $self = shift;
 791   my ($time, $author, $lastdate) = @_;
 792
 793   my $header_line = '';
 794
 795   my $isoDate;
 796
 797   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 798
 799   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 800   $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 801                      $y + 1900, $m + 1, $d, $H, $M, $S);
 802
 803   my (undef,$min,$hour,$mday,$mon,$year,$wday)
 804     = $UTC_Times ? gmtime($time) : localtime($time);
 805
 806   my $date = $self->fdatetime($time);
 807   $wday = $self->wday($wday);
 808
 809   $header_line =
 810     sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
 811              $year+1900, $mon+1, $mday, $hour, $min);
 812   $header_line .= "<isoDate>$isoDate</isoDate>\n"
 813     unless $No_XML_ISO_Date;
 814   $header_line .= sprintf("<author>%s</author>\n" , $author);
 815 }
 816
 817 # -------------------------------------
 818
 819 sub wday {
 820   my $self = shift; my $class = ref $self;
 821   my ($wday) = @_;
 822
 823   return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
 824 }
 825
 826 # -------------------------------------
 827
 828 sub escape {
 829   my $self = shift;
 830
 831   my $txt = shift;
 832   $txt =~ s/&/&amp;/g;
 833   $txt =~ s/</&lt;/g;
 834   $txt =~ s/>/&gt;/g;
 835   return $txt;
 836 }
 837
 838 # -------------------------------------
 839
 840 sub output_header {
 841   my $self = shift;
 842   my ($fh) = @_;
 843
 844   my $encoding    =
 845     length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 846   my $version     = 'version="1.0"';
 847   my $declaration =
 848     sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 849   my $root        =
 850     $No_XML_Namespace ?
 851       '<changelog>'     :
 852         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 853   print $fh "$declaration\n\n$root\n\n";
 854 }
 855
 856 # -------------------------------------
 857
 858 sub output_footer {
 859   my $self = shift;
 860   my ($fh) = @_;
 861
 862   print $fh "</changelog>\n";
 863 }
 864
 865 # -------------------------------------
 866
 867 sub preprocess_msg_text {
 868   my $self = shift;
 869   my ($text) = @_;
 870
 871   $text = $self->SUPER::preprocess_msg_text($text);
 872
 873   $text = $self->escape($text);
 874   chomp $text;
 875   $text = "<msg>${text}</msg>\n";
 876
 877   return $text;
 878 }
 879
 880 # -------------------------------------
 881
 882 # Here we take a bunch of qunks and convert them into a printed
 883 # summary that will include all the information the user asked for.
 884 sub pretty_file_list {
 885   my $self = shift;
 886   my ($qunksref) = @_;
 887
 888   my $beauty = '';          # The accumulating header string for this entry.
 889   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
 890   my %unanimous_tags;       # Tags found in all qunks
 891   my %all_branches;         # Branches found in any qunk
 892   my $fbegun = 0;           # Did we begin printing filenames yet?
 893
 894   my ($common_dir, $qunkrefs) =
 895     $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
 896       $qunksref);
 897
 898   my @qunkrefs = @$qunkrefs;
 899
 900   # If outputting XML, then our task is pretty simple, because we
 901   # don't have to detect common dir, common tags, branch prefixing,
 902   # etc.  We just output exactly what we have, and don't worry about
 903   # redundancy or readability.
 904
 905   foreach my $qunkref (@qunkrefs)
 906   {
 907     my $filename    = $qunkref->filename;
 908     my $state       = $qunkref->state;
 909     my $revision    = $qunkref->revision;
 910     my $tags        = $qunkref->tags;
 911     my $branch      = $qunkref->branch;
 912     my $branchroots = $qunkref->roots;
 913     my $lines       = $qunkref->lines;
 914
 915     $filename = $self->escape($filename);   # probably paranoia
 916     $revision = $self->escape($revision);   # definitely paranoia
 917
 918     $beauty .= "<file>\n";
 919     $beauty .= "<name>${filename}</name>\n";
 920     $beauty .= "<cvsstate>${state}</cvsstate>\n";
 921     $beauty .= "<revision>${revision}</revision>\n";
 922
 923     if ($Show_Lines_Modified
 924         && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
 925         $beauty .= "<linesadded>$1</linesadded>\n";
 926         $beauty .= "<linesremoved>$2</linesremoved>\n";
 927     }
 928
 929     if ($branch) {
 930       $branch   = $self->escape($branch);     # more paranoia
 931       $beauty .= "<branch>${branch}</branch>\n";
 932     }
 933     foreach my $tag (@$tags) {
 934       $tag = $self->escape($tag);  # by now you're used to the paranoia
 935       $beauty .= "<tag>${tag}</tag>\n";
 936     }
 937     foreach my $root (@$branchroots) {
 938       $root = $self->escape($root);  # which is good, because it will continue
 939       $beauty .= "<branchroot>${root}</branchroot>\n";
 940     }
 941     $beauty .= "</file>\n";
 942   }
 943
 944   # Theoretically, we could go home now.  But as long as we're here,
 945   # let's print out the common_dir and utags, as a convenience to
 946   # the receiver (after all, earlier code calculated that stuff
 947   # anyway, so we might as well take advantage of it).
 948
 949   if ((scalar (keys (%unanimous_tags))) > 1) {
 950     foreach my $utag ((keys (%unanimous_tags))) {
 951       $utag = $self->escape($utag);   # the usual paranoia
 952       $beauty .= "<utag>${utag}</utag>\n";
 953     }
 954   }
 955   if ($common_dir) {
 956     $common_dir = $self->escape($common_dir);
 957     $beauty .= "<commondir>${common_dir}</commondir>\n";
 958   }
 959
 960   # That's enough for XML, time to go home:
 961   return $beauty;
 962 }
 963
 964 # -------------------------------------
 965
 966 sub output_tagdate {
 967   my $self = shift;
 968   my ($fh, $time, $tag) = @_;
 969
 970   my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
 971
 972   # Ideally, this would honor $UTC_Times and use +HH:MM syntax
 973   my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
 974                        $y + 1900, $m + 1, $d, $H, $M, $S);
 975
 976   print $fh "<tagdate>\n";
 977   print $fh "<tagisodate>$isoDate</tagisodate>\n";
 978   print $fh "<tagdatetag>$tag</tagdatetag>\n";
 979   print $fh "</tagdate>\n\n";
 980   return;
 981 }
 982
 983 # -------------------------------------
 984
 985 sub output_entry {
 986   my $self = shift;
 987   my ($fh, $entry) = @_;
 988   print $fh "<entry>\n$entry</entry>\n\n";
 989 }
 990
 991 # -------------------------------------
 992
 993 sub format_body {
 994   my $self = shift;
 995   my ($msg, $files, $qunklist) = @_;
 996
 997   $msg = $self->preprocess_msg_text($msg);
 998   return $files . $msg;
 999 }
1000
1001 # ----------------------------------------------------------------------------
1002
1003 package CVS::Utils::ChangeLog::EntrySet::Output;
1004
1005 use Carp           qw( croak );
1006 use File::Basename qw( fileparse );
1007
1008 # Class Utility Functions -------------
1009
1010 { # form closure
1011
1012 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
1013 sub weekday_en {
1014   my $class = shift;
1015   return $weekdays[$_[0]];
1016 }
1017
1018 }
1019
1020 # -------------------------------------
1021
1022 sub new {
1023   my ($proto, %args) = @_;
1024   my $class = ref $proto || $proto;
1025
1026   my $follow_branches = delete $args{follow_branches};
1027   my $follow_only     = delete $args{follow_only};
1028   my $ignore_tags     = delete $args{ignore_tags};
1029   my $show_tags       = delete $args{show_tags};
1030   die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1031     for keys %args;
1032
1033   bless +{follow_branches => $follow_branches,
1034           follow_only     => $follow_only,
1035           show_tags       => $show_tags,
1036           ignore_tags     => $ignore_tags,
1037          }, $class;
1038 }
1039
1040 # Abstract Subrs ----------------------
1041
1042 sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1043 sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1044 sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1045 sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1046
1047 # Instance Subrs ----------------------
1048
1049 sub output_header { }
1050
1051 # -------------------------------------
1052
1053 sub output_entry {
1054   my $self = shift;
1055   my ($fh, $entry) = @_;
1056   print $fh "$entry\n";
1057 }
1058
1059 # -------------------------------------
1060
1061 sub output_footer { }
1062
1063 # -------------------------------------
1064
1065 sub escape { return $_[1] }
1066
1067 # -------------------------------------
1068
1069 sub _revision_is_wanted {
1070   my ($self, $qunk) = @_;
1071
1072   my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1073   my $follow_branches = $self->{follow_branches};
1074   my $follow_only     = $self->{follow_only};
1075
1076   for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1077     return
1078       if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1079   }
1080
1081   if ( keys %{$self->{show_tags}} ) {
1082     for my $show_tag (keys %{$self->{show_tags}}) {
1083       return
1084         if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1085     }
1086   }
1087
1088   return 1
1089     unless @$follow_branches + @$follow_only; # no follow is follow all
1090
1091   for my $x (map([$_, 1], @$follow_branches),
1092              map([$_, 0], @$follow_only    )) {
1093     my ($branch, $followsub) = @$x;
1094
1095     # Special case for following trunk revisions
1096     return 1
1097       if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1098
1099     if ( my $branch_number = $branch_numbers->{$branch} ) {
1100       # Are we on one of the follow branches or an ancestor of same?
1101
1102       # If this revision is a prefix of the branch number, or possibly is less
1103       # in the minormost number, OR if this branch number is a prefix of the
1104       # revision, then yes.  Otherwise, no.
1105
1106       # So below, we determine if any of those conditions are met.
1107
1108       # Trivial case: is this revision on the branch?  (Compare this way to
1109       # avoid regexps that screw up Emacs indentation, argh.)
1110       if ( substr($revision, 0, (length($branch_number) + 1))
1111            eq
1112            ($branch_number . ".") ) {
1113         if ( $followsub ) {
1114           return 1;
1115 #        } elsif ( length($revision) == length($branch_number)+2 ) {
1116         } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
1117           return 1;
1118         }
1119       } elsif ( length($branch_number) > length($revision)
1120                 and
1121                 ! $No_Ancestors ) {
1122         # Non-trivial case: check if rev is ancestral to branch
1123
1124         # r_left still has the trailing "."
1125         my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1126
1127         # b_left still has trailing "."
1128         # b_mid has no trailing "."
1129         my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1130         return 1
1131           if $r_left eq $b_left and $r_end <= $b_mid;
1132       }
1133     }
1134   }
1135
1136   return;
1137 }
1138
1139 # -------------------------------------
1140
1141 sub output_changelog {
1142 my $self = shift; my $class = ref $self;
1143   my ($grand_poobah) = @_;
1144   ### Process each ChangeLog
1145
1146   while (my ($dir,$authorhash) = each %$grand_poobah)
1147   {
1148     &main::debug ("DOING DIR: $dir\n");
1149
1150     # Here we twist our hash around, from being
1151     #   author => time => message => filelist
1152     # in %$authorhash to
1153     #   time => author => message => filelist
1154     # in %changelog.
1155     #
1156     # This is also where we merge entries.  The algorithm proceeds
1157     # through the timeline of the changelog with a sliding window of
1158     # $Max_Checkin_Duration seconds; within that window, entries that
1159     # have the same log message are merged.
1160     #
1161     # (To save space, we zap %$authorhash after we've copied
1162     # everything out of it.)
1163
1164     my %changelog;
1165     while (my ($author,$timehash) = each %$authorhash)
1166     {
1167       my %stamptime;
1168       foreach my $time (sort {$a <=> $b} (keys %$timehash))
1169       {
1170         my $msghash = $timehash->{$time};
1171         while (my ($msg,$qunklist) = each %$msghash)
1172         {
1173           my $stamptime = $stamptime{$msg};
1174           if ((defined $stamptime)
1175               and (($time - $stamptime) < $Max_Checkin_Duration)
1176               and (defined $changelog{$stamptime}{$author}{$msg}))
1177           {
1178             push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1179           }
1180           else {
1181             $changelog{$time}{$author}{$msg} = $qunklist->files;
1182             $stamptime{$msg} = $time;
1183           }
1184         }
1185       }
1186     }
1187     undef (%$authorhash);
1188
1189     ### Now we can write out the ChangeLog!
1190
1191     my ($logfile_here, $logfile_bak, $tmpfile);
1192     my $lastdate = "";
1193     my $lastauthor = "";
1194
1195     if (! $Output_To_Stdout) {
1196       $logfile_here =  $dir . $Log_File_Name;
1197       if (!$^O =~ /Win32/i) {
1198         $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1199       }
1200       else {
1201         $logfile_here =~ s/^\.\/+//;      # remove any leading "./"
1202       }
1203       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1204       $logfile_bak  = "${logfile_here}.bak";
1205
1206       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1207     }
1208     else {
1209       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1210     }
1211
1212     print LOG_OUT $ChangeLog_Header;
1213
1214     my %tag_date_printed;
1215
1216     $self->output_header(\*LOG_OUT);
1217
1218     my @key_list = ();
1219     if($Chronological_Order) {
1220         @key_list = sort {$a <=> $b} (keys %changelog);
1221     } else {
1222         @key_list = sort {$b <=> $a} (keys %changelog);
1223     }
1224
1225     if ( $Show_Tag_Dates || $XML_Output ) {
1226       foreach my $time (@key_list) {
1227         my $authorhash = $changelog{$time};
1228         while (my ($author,$mesghash) = each %$authorhash) {
1229           while (my ($msg,$qunk) = each %$mesghash) {
1230             my $qunklist = $mesghash->{$msg};
1231             for my $qunkref2 (@$qunklist) {
1232               if (!$self->_revision_is_wanted($qunkref2)) {
1233                 if (defined ($qunkref2->tags)) {
1234                   for my $tag (@{$qunkref2->tags}) {
1235                     $tag_date_printed{$tag} = 1;
1236                   }
1237                 }
1238               }
1239             }
1240           }
1241         }
1242       }
1243     }
1244
1245     foreach my $time (@key_list)
1246     {
1247       next if ($Delta_Mode &&
1248                (($time <= $Delta_StartTime) ||
1249                 ($time > $Delta_EndTime && $Delta_EndTime)));
1250
1251       # Set up the date/author line.
1252       # kff todo: do some more XML munging here, on the header
1253       # part of the entry:
1254       my (undef,$min,$hour,$mday,$mon,$year,$wday)
1255           = $UTC_Times ? gmtime($time) : localtime($time);
1256
1257       $wday = $self->wday($wday);
1258       # XML output includes everything else, we might as well make
1259       # it always include Day Of Week too, for consistency.
1260       my $authorhash = $changelog{$time};
1261       if ( $Show_Tag_Dates || $XML_Output ) {
1262         my %tags;
1263         while (my ($author,$mesghash) = each %$authorhash) {
1264           while (my ($msg,$qunk) = each %$mesghash) {
1265             for my $qunkref2 (@$qunk) {
1266               if (defined ($qunkref2->tags)) {
1267                 for my $tag (@{$qunkref2->tags}) {
1268                   $tags{$tag} = 1;
1269                 }
1270               }
1271             }
1272           }
1273         }
1274         # Sort here for determinism to ease testing
1275         foreach my $tag (sort keys %tags) {
1276           if ( ! defined $tag_date_printed{$tag} ) {
1277             $tag_date_printed{$tag} = $time;
1278             if ( (! defined $Regexp_Tag) or ( $tag =~ /$Regexp_Tag/ ) ) {
1279               $self->output_tagdate(\*LOG_OUT, $time, $tag);
1280               $lastauthor = ""
1281             }
1282           }
1283         }
1284       }
1285       while (my ($author,$mesghash) = each %$authorhash)
1286       {
1287         # If XML, escape in outer loop to avoid compound quoting:
1288         $author = $self->escape($author);
1289
1290       FOOBIE:
1291         # We sort here to enable predictable ordering for the testing porpoises
1292         for my $msg (sort keys %$mesghash)
1293         {
1294           my $qunklist = $mesghash->{$msg};
1295
1296           my @qunklist =
1297             grep $self->_revision_is_wanted($_), @$qunklist;
1298
1299           next FOOBIE unless @qunklist;
1300
1301           my $files               = $self->pretty_file_list(\@qunklist);
1302           my $header_line;          # date and author
1303           my $wholething;           # $header_line + $body
1304
1305           my $date = $self->fdatetime($time);
1306           $header_line = $self->header_line($time, $author, $lastdate, $lastauthor);
1307           $lastdate = $date;
1308           $lastauthor = $author;
1309
1310           $Text::Wrap::huge = 'overflow'
1311             if $Text::Wrap::VERSION >= 2001.0130;
1312           # Reshape the body according to user preferences.
1313           my $body = $self->format_body($msg, $files, \@qunklist);
1314
1315           $body =~ s/[ \t]+\n/\n/g;
1316           $wholething = $header_line . $body;
1317
1318           # One last check: make sure it passes the regexp test, if the
1319           # user asked for that.  We have to do it here, so that the
1320           # test can match against information in the header as well
1321           # as in the text of the log message.
1322
1323           # How annoying to duplicate so much code just because I
1324           # can't figure out a way to evaluate scalars on the trailing
1325           # operator portion of a regular expression.  Grrr.
1326           if ($Case_Insensitive) {
1327             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1328               $self->output_entry(\*LOG_OUT, $wholething);
1329             }
1330           }
1331           else {
1332             unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1333               $self->output_entry(\*LOG_OUT, $wholething);
1334             }
1335           }
1336         }
1337       }
1338     }
1339
1340     $self->output_footer(\*LOG_OUT);
1341
1342     close (LOG_OUT);
1343
1344     if ( ! $Output_To_Stdout ) {
1345       # If accumulating, append old data to new before renaming.  But
1346       # don't append the most recent entry, since it's already in the
1347       # new log due to CVS's idiosyncratic interpretation of "log -d".
1348       if ($Cumulative && -f $logfile_here) {
1349         open NEW_LOG, ">>$tmpfile"
1350           or die "trouble appending to $tmpfile ($!)";
1351
1352         open OLD_LOG, "<$logfile_here"
1353           or die "trouble reading from $logfile_here ($!)";
1354
1355         my $started_first_entry = 0;
1356         my $passed_first_entry = 0;
1357         while (<OLD_LOG>) {
1358           if ( ! $passed_first_entry ) {
1359             if ( ( ! $started_first_entry )
1360                 and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1361               $started_first_entry = 1;
1362             } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1363               $passed_first_entry = 1;
1364               print NEW_LOG $_;
1365             }
1366           } else {
1367             print NEW_LOG $_;
1368           }
1369         }
1370
1371         close NEW_LOG;
1372         close OLD_LOG;
1373       }
1374
1375       if ( -f $logfile_here ) {
1376         rename $logfile_here, $logfile_bak;
1377       }
1378       rename $tmpfile, $logfile_here;
1379     }
1380   }
1381 }
1382
1383 # -------------------------------------
1384
1385 # Don't call this wrap, because with 5.5.3, that clashes with the
1386 # (unconditional :-( ) export of wrap() from Text::Wrap
1387 sub mywrap {
1388   my $self = shift;
1389   my ($indent1, $indent2, @text) = @_;
1390   # If incoming text looks preformatted, don't get clever
1391   my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1392   if ( grep /^\s+/m, @text ) {
1393     return $text;
1394   }
1395   my @lines = split /\n/, $text;
1396   $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1397   $lines[0] =~ s/^$indent1\s+/$indent1/;
1398   s/^$indent2\s+/$indent2/
1399     for @lines[1..$#lines];
1400   my $newtext = join "\n", @lines;
1401   $newtext .= "\n"
1402     if substr($text, -1) eq "\n";
1403   return $newtext;
1404 }
1405
1406 # -------------------------------------
1407
1408 sub preprocess_msg_text {
1409   my $self = shift;
1410   my ($text) = @_;
1411
1412   # Strip out carriage returns (as they probably result from DOSsy editors).
1413   $text =~ s/\r\n/\n/g;
1414   # If it *looks* like two newlines, make it *be* two newlines:
1415   $text =~ s/\n\s*\n/\n\n/g;
1416
1417   return $text;
1418 }
1419
1420 # -------------------------------------
1421
1422 sub last_line_len {
1423   my $self = shift;
1424
1425   my $files_list = shift;
1426   my @lines = split (/\n/, $files_list);
1427   my $last_line = pop (@lines);
1428   return length ($last_line);
1429 }
1430
1431 # -------------------------------------
1432
1433 # A custom wrap function, sensitive to some common constructs used in
1434 # log entries.
1435 sub wrap_log_entry {
1436   my $self = shift;
1437
1438   my $text = shift;                  # The text to wrap.
1439   my $left_pad_str = shift;          # String to pad with on the left.
1440
1441   # These do NOT take left_pad_str into account:
1442   my $length_remaining = shift;      # Amount left on current line.
1443   my $max_line_length  = shift;      # Amount left for a blank line.
1444
1445   my $wrapped_text = '';             # The accumulating wrapped entry.
1446   my $user_indent = '';              # Inherited user_indent from prev line.
1447
1448   my $first_time = 1;                # First iteration of the loop?
1449   my $suppress_line_start_match = 0; # Set to disable line start checks.
1450
1451   my @lines = split (/\n/, $text);
1452   while (@lines)   # Don't use `foreach' here, it won't work.
1453   {
1454     my $this_line = shift (@lines);
1455     chomp $this_line;
1456
1457     if ($this_line =~ /^(\s+)/) {
1458       $user_indent = $1;
1459     }
1460     else {
1461       $user_indent = '';
1462     }
1463
1464     # If it matches any of the line-start regexps, print a newline now...
1465     if ($suppress_line_start_match)
1466     {
1467       $suppress_line_start_match = 0;
1468     }
1469     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1470            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1471            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1472            || ($this_line =~ /^(\s+)(\S+)/)
1473            || ($this_line =~ /^(\s*)- +/)
1474            || ($this_line =~ /^()\s*$/)
1475            || ($this_line =~ /^(\s*)\*\) +/)
1476            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1477     {
1478       # Make a line break immediately, unless header separator is set
1479       # and this line is the first line in the entry, in which case
1480       # we're getting the blank line for free already and shouldn't
1481       # add an extra one.
1482       unless (($After_Header ne " ") and ($first_time))
1483       {
1484         if ($this_line =~ /^()\s*$/) {
1485           $suppress_line_start_match = 1;
1486           $wrapped_text .= "\n${left_pad_str}";
1487         }
1488
1489         $wrapped_text .= "\n${left_pad_str}";
1490       }
1491
1492       $length_remaining = $max_line_length - (length ($user_indent));
1493     }
1494
1495     # Now that any user_indent has been preserved, strip off leading
1496     # whitespace, so up-folding has no ugly side-effects.
1497     $this_line =~ s/^\s*//;
1498
1499     # Accumulate the line, and adjust parameters for next line.
1500     my $this_len = length ($this_line);
1501     if ($this_len == 0)
1502     {
1503       # Blank lines should cancel any user_indent level.
1504       $user_indent = '';
1505       $length_remaining = $max_line_length;
1506     }
1507     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1508     {
1509       # Walk backwards from the end.  At first acceptable spot, break
1510       # a new line.
1511       my $idx = $length_remaining - 1;
1512       if ($idx < 0) { $idx = 0 };
1513       while ($idx > 0)
1514       {
1515         if (substr ($this_line, $idx, 1) =~ /\s/)
1516         {
1517           my $line_now = substr ($this_line, 0, $idx);
1518           my $next_line = substr ($this_line, $idx);
1519           $this_line = $line_now;
1520
1521           # Clean whitespace off the end.
1522           chomp $this_line;
1523
1524           # The current line is ready to be printed.
1525           $this_line .= "\n${left_pad_str}";
1526
1527           # Make sure the next line is allowed full room.
1528           $length_remaining = $max_line_length - (length ($user_indent));
1529
1530           # Strip next_line, but then preserve any user_indent.
1531           $next_line =~ s/^\s*//;
1532
1533           # Sneak a peek at the user_indent of the upcoming line, so
1534           # $next_line (which will now precede it) can inherit that
1535           # indent level.  Otherwise, use whatever user_indent level
1536           # we currently have, which might be none.
1537           my $next_next_line = shift (@lines);
1538           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1539             $next_line = $1 . $next_line if (defined ($1));
1540             # $length_remaining = $max_line_length - (length ($1));
1541             $next_next_line =~ s/^\s*//;
1542           }
1543           else {
1544             $next_line = $user_indent . $next_line;
1545           }
1546           if (defined ($next_next_line)) {
1547             unshift (@lines, $next_next_line);
1548           }
1549           unshift (@lines, $next_line);
1550
1551           # Our new next line might, coincidentally, begin with one of
1552           # the line-start regexps, so we temporarily turn off
1553           # sensitivity to that until we're past the line.
1554           $suppress_line_start_match = 1;
1555
1556           last;
1557         }
1558         else
1559         {
1560           $idx--;
1561         }
1562       }
1563
1564       if ($idx == 0)
1565       {
1566         # We bottomed out because the line is longer than the
1567         # available space.  But that could be because the space is
1568         # small, or because the line is longer than even the maximum
1569         # possible space.  Handle both cases below.
1570
1571         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1572         {
1573           # The line is simply too long -- there is no hope of ever
1574           # breaking it nicely, so just insert it verbatim, with
1575           # appropriate padding.
1576           $this_line = "\n${left_pad_str}${this_line}";
1577         }
1578         else
1579         {
1580           # Can't break it here, but may be able to on the next round...
1581           unshift (@lines, $this_line);
1582           $length_remaining = $max_line_length - (length ($user_indent));
1583           $this_line = "\n${left_pad_str}";
1584         }
1585       }
1586     }
1587     else  # $this_len < $length_remaining, so tack on what we can.
1588     {
1589       # Leave a note for the next iteration.
1590       $length_remaining = $length_remaining - $this_len;
1591
1592       if ($this_line =~ /\.$/)
1593       {
1594         $this_line .= "  ";
1595         $length_remaining -= 2;
1596       }
1597       else  # not a sentence end
1598       {
1599         $this_line .= " ";
1600         $length_remaining -= 1;
1601       }
1602     }
1603
1604     # Unconditionally indicate that loop has run at least once.
1605     $first_time = 0;
1606
1607     $wrapped_text .= "${user_indent}${this_line}";
1608   }
1609
1610   # One last bit of padding.
1611   $wrapped_text .= "\n";
1612
1613   return $wrapped_text;
1614 }
1615
1616 # -------------------------------------
1617
1618 sub _pretty_file_list {
1619   my $self = shift;
1620
1621   my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1622
1623   my @qunkrefs =
1624     grep +( ( ! $_->tags_exists
1625               or
1626               ! grep exists $ignore_tags{$_}, @{$_->tags})
1627             and
1628             ( ! keys %show_tags
1629               or
1630               ( $_->tags_exists
1631                 and
1632                 grep exists $show_tags{$_}, @{$_->tags} )
1633             )
1634           ),
1635     @$qunksref;
1636
1637   my $common_dir;           # Dir prefix common to all files ('' if none)
1638
1639   # First, loop over the qunks gathering all the tag/branch names.
1640   # We'll put them all in non_unanimous_tags, and take out the
1641   # unanimous ones later.
1642  QUNKREF:
1643   foreach my $qunkref (@qunkrefs)
1644   {
1645     # Keep track of whether all the files in this commit were in the
1646     # same directory, and memorize it if so.  We can make the output a
1647     # little more compact by mentioning the directory only once.
1648     if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1649     {
1650       if (! (defined ($common_dir)))
1651       {
1652         my ($base, $dir);
1653         ($base, $dir, undef) = fileparse ($qunkref->filename);
1654
1655         if ((! (defined ($dir)))  # this first case is sheer paranoia
1656             or ($dir eq '')
1657             or ($dir eq "./")
1658             or ($dir eq ".\\"))
1659         {
1660           $common_dir = '';
1661         }
1662         else
1663         {
1664           $common_dir = $dir;
1665         }
1666       }
1667       elsif ($common_dir ne '')
1668       {
1669         # Already have a common dir prefix, so how much of it can we preserve?
1670         $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1671       }
1672     }
1673     else  # only one file in this entry anyway, so common dir not an issue
1674     {
1675       $common_dir = '';
1676     }
1677
1678     if (defined ($qunkref->branch)) {
1679       $all_branches->{$qunkref->branch} = 1;
1680     }
1681     if (defined ($qunkref->tags)) {
1682       foreach my $tag (@{$qunkref->tags}) {
1683         $non_unanimous_tags->{$tag} = 1;
1684       }
1685     }
1686   }
1687
1688   # Any tag held by all qunks will be printed specially... but only if
1689   # there are multiple qunks in the first place!
1690   if ((scalar (@qunkrefs)) > 1) {
1691     foreach my $tag (keys (%$non_unanimous_tags)) {
1692       my $everyone_has_this_tag = 1;
1693       foreach my $qunkref (@qunkrefs) {
1694         if ((! (defined ($qunkref->tags)))
1695             or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1696           $everyone_has_this_tag = 0;
1697         }
1698       }
1699       if ($everyone_has_this_tag) {
1700         $unanimous_tags->{$tag} = 1;
1701         delete $non_unanimous_tags->{$tag};
1702       }
1703     }
1704   }
1705
1706   return $common_dir, \@qunkrefs;
1707 }
1708
1709 # -------------------------------------
1710
1711 sub fdatetime {
1712   my $self = shift;
1713
1714   my ($year, $mday, $mon, $wday, $hour, $min);
1715
1716   if ( @_ > 1 ) {
1717     ($year, $mday, $mon, $wday, $hour, $min) = @_;
1718   } else {
1719     my ($time) = @_;
1720     (undef, $min, $hour, $mday, $mon, $year, $wday) =
1721       $UTC_Times ? gmtime($time) : localtime($time);
1722
1723     $year += 1900;
1724     $mon  += 1;
1725     $wday  = $self->wday($wday);
1726   }
1727
1728   my $fdate = $self->fdate($year, $mon, $mday, $wday);
1729
1730   if ($Show_Times) {
1731     my $ftime = $self->ftime($hour, $min);
1732     return "$fdate $ftime";
1733   } else {
1734     return $fdate;
1735   }
1736 }
1737
1738 # -------------------------------------
1739
1740 sub fdate {
1741   my $self = shift;
1742
1743   my ($year, $mday, $mon, $wday);
1744
1745   if ( @_ > 1 ) {
1746     ($year, $mon, $mday, $wday) = @_;
1747   } else {
1748     my ($time) = @_;
1749     (undef, undef, undef, $mday, $mon, $year, $wday) =
1750       $UTC_Times ? gmtime($time) : localtime($time);
1751
1752     $year += 1900;
1753     $mon  += 1;
1754     $wday  = $self->wday($wday);
1755   }
1756
1757   return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1758 }
1759
1760 # -------------------------------------
1761
1762 sub ftime {
1763   my $self = shift;
1764
1765   my ($hour, $min);
1766
1767   if ( @_ > 1 ) {
1768     ($hour, $min) = @_;
1769   } else {
1770     my ($time) = @_;
1771     (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1772   }
1773
1774   return sprintf '%02u:%02u', $hour, $min;
1775 }
1776
1777 # ----------------------------------------------------------------------------
1778
1779 package CVS::Utils::ChangeLog::Message;
1780
1781 sub new {
1782   my $class = shift;
1783   my ($msg) = @_;
1784
1785   my %self = (msg => $msg, files => []);
1786
1787   bless \%self, $class;
1788 }
1789
1790 sub add_fileentry {
1791   my $self = shift;
1792   my ($fileentry) = @_;
1793
1794   die "Not a fileentry: $fileentry"
1795     unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1796
1797   push @{$self->{files}}, $fileentry;
1798 }
1799
1800 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1801
1802 # ----------------------------------------------------------------------------
1803
1804 package CVS::Utils::ChangeLog::FileEntry;
1805
1806 use File::Basename qw( fileparse );
1807
1808 # Each revision of a file has a little data structure (a `qunk')
1809 # associated with it.  That data structure holds not only the
1810 # file's name, but any additional information about the file
1811 # that might be needed in the output, such as the revision
1812 # number, tags, branches, etc.  The reason to have these things
1813 # arranged in a data structure, instead of just appending them
1814 # textually to the file's name, is that we may want to do a
1815 # little rearranging later as we write the output.  For example,
1816 # all the files on a given tag/branch will go together, followed
1817 # by the tag in parentheses (so trunk or otherwise non-tagged
1818 # files would go at the end of the file list for a given log
1819 # message).  This rearrangement is a lot easier to do if we
1820 # don't have to reparse the text.
1821 #
1822 # A qunk looks like this:
1823 #
1824 #   {
1825 #     filename    =>    "hello.c",
1826 #     revision    =>    "1.4.3.2",
1827 #     time        =>    a timegm() return value (moment of commit)
1828 #     tags        =>    [ "tag1", "tag2", ... ],
1829 #     branch      =>    "branchname" # There should be only one, right?
1830 #     roots       =>    [ "branchtag1", "branchtag2", ... ]
1831 #     lines       =>    "+x -y" # or undefined; x and y are integers
1832 #   }
1833
1834 # Single top-level ChangeLog, or one per subdirectory?
1835 my $distributed;
1836 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1837
1838 sub new {
1839   my $class = shift;
1840   my ($path, $time, $revision, $state, $lines,
1841       $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1842
1843   my %self = (time     => $time,
1844               revision => $revision,
1845               state    => $state,
1846               lines    => $lines,
1847               branch_numbers => $branch_numbers,
1848              );
1849
1850   if ( $distributed ) {
1851     @self{qw(filename dir_key)} = fileparse($path);
1852   } else {
1853     @self{qw(filename dir_key)} = ($path, './');
1854   }
1855
1856   { # Scope for $branch_prefix
1857     (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1858     $branch_prefix =~ s/\.$//;
1859     if ( $branch_names->{$branch_prefix} ) {
1860       my $branch_name = $branch_names->{$branch_prefix};
1861       $self{branch}   = $branch_name;
1862       $self{branches} = [$branch_name];
1863     }
1864     while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1865       push @{$self{branches}}, $branch_names->{$branch_prefix}
1866         if exists $branch_names->{$branch_prefix};
1867     }
1868   }
1869
1870   # If there's anything in the @branch_roots array, then this
1871   # revision is the root of at least one branch.  We'll display
1872   # them as branch names instead of revision numbers, the
1873   # substitution for which is done directly in the array:
1874   $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1875     if @$branch_roots;
1876
1877   if ( exists $symbolic_names->{$revision} ) {
1878     $self{tags} = delete $symbolic_names->{$revision};
1879     &main::delta_check($time, $self{tags});
1880   }
1881
1882   bless \%self, $class;
1883 }
1884
1885 sub filename       { $_[0]->{filename}       }
1886 sub dir_key        { $_[0]->{dir_key}        }
1887 sub revision       { $_[0]->{revision}       }
1888 sub branch         { $_[0]->{branch}         }
1889 sub state          { $_[0]->{state}          }
1890 sub lines          { $_[0]->{lines}          }
1891 sub roots          { $_[0]->{roots}          }
1892 sub branch_numbers { $_[0]->{branch_numbers} }
1893
1894 sub tags        { $_[0]->{tags}     }
1895 sub tags_exists {
1896   exists $_[0]->{tags};
1897 }
1898
1899 # This may someday be used in a more sophisticated calculation of what other
1900 # files are involved in this commit.  For now, we don't use it much except for
1901 # delta mode, because the common-commit-detection algorithm is hypothesized to
1902 # be "good enough" as it stands.
1903 sub time     { $_[0]->{time}     }
1904
1905 # ----------------------------------------------------------------------------
1906
1907 package CVS::Utils::ChangeLog::EntrySetBuilder;
1908
1909 use File::Basename qw( fileparse );
1910 use Time::Local    qw( timegm );
1911
1912 use constant MAILNAME => "/etc/mailname";
1913
1914 # In 'cvs log' output, one long unbroken line of equal signs separates files:
1915 use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1916 # In 'cvs log' output, a shorter line of dashes separates log messages within
1917 # a file:
1918 use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1919
1920 use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1921
1922 # -------------------------------------
1923
1924 sub new {
1925   my ($proto) = @_;
1926   my $class = ref $proto || $proto;
1927
1928   my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1929   my $self = bless +{ grand_poobah => $poobah }, $class;
1930
1931   $self->clear_file;
1932   $self->maybe_read_user_map_file;
1933   return $self;
1934 }
1935
1936 # -------------------------------------
1937
1938 sub clear_msg {
1939   my ($self) = @_;
1940
1941   # Make way for the next message
1942   undef $self->{rev_msg};
1943   undef $self->{rev_time};
1944   undef $self->{rev_revision};
1945   undef $self->{rev_author};
1946   undef $self->{rev_state};
1947   undef $self->{lines};
1948   $self->{rev_branch_roots} = [];       # For showing which files are branch
1949                                         # ancestors.
1950   $self->{collecting_symbolic_names} = 0;
1951 }
1952
1953 # -------------------------------------
1954
1955 sub clear_file {
1956   my ($self) = @_;
1957   $self->clear_msg;
1958
1959   undef $self->{filename};
1960   $self->{branch_names}   = +{};        # We'll grab branch names while we're
1961                                         # at it.
1962   $self->{branch_numbers} = +{};        # Save some revisions for
1963                                         # @Follow_Branches
1964   $self->{symbolic_names} = +{};        # Where tag names get stored.
1965 }
1966
1967 # -------------------------------------
1968
1969 sub grand_poobah { $_[0]->{grand_poobah} }
1970
1971 # -------------------------------------
1972
1973 sub read_changelog {
1974   my ($self, $command) = @_;
1975
1976   local (*READER);
1977   my $pid;
1978   if (! $Input_From_Stdin) {
1979     if ($^O =~ /Win32/i) {
1980       open (READER, "@$command |")
1981         or die "unable to run \"@$command\"";
1982     }
1983     else {
1984       local (*WRITER);
1985       pipe(READER, WRITER)
1986         or die "Couldn't form pipe: $!\n";
1987       $pid = fork;
1988       if (! defined $pid) {
1989         die "Couldn't fork: $!\n";
1990       }
1991       if ( ! $pid ) { # child
1992         open STDOUT, '>&=' . fileno WRITER
1993           or die "Couldn't dup stderr to ", fileno WRITER, "\n";
1994         # strangely, some perls give spurious warnings about STDIN being opened
1995         # for output only these close calls precede the STDOUT reopen above.
1996         # I think they must be reusing fd 1.
1997         close READER;
1998         close STDIN;
1999
2000         exec @$command;
2001       }
2002
2003       close WRITER;
2004     }
2005
2006     &main::debug ("(run \"@$command\")\n");
2007   }
2008   else {
2009     open READER, '-' or die "unable to open stdin for reading";
2010   }
2011
2012   binmode READER;
2013
2014  XX_Log_Source:
2015   while (<READER>) {
2016     chomp;
2017     s!\r$!!;
2018
2019     # If on a new file and don't see filename, skip until we find it, and
2020     # when we find it, grab it.
2021     if ( ! defined $self->{filename} ) {
2022       $self->read_file_path($_);
2023     } elsif ( /^symbolic names:$/ ) {
2024       $self->{collecting_symbolic_names} = 1;
2025     } elsif ( $self->{collecting_symbolic_names} ) {
2026       $self->read_symbolic_name($_);
2027     } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
2028       $self->clear_file;
2029     } elsif ( ! defined $self->{rev_revision} ) {
2030         # If have file name, but not revision, and see revision, then grab
2031         # it.  (We collect unconditionally, even though we may or may not
2032         # ever use it.)
2033       $self->read_revision($_);
2034     } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
2035       $self->read_date_author_and_state($_);
2036     } elsif ( /^branches:\s+(.*);$/ ) {
2037       $self->read_branches($1);
2038     } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
2039       # If have file name, time, and author, then we're just grabbing
2040       # log message texts:
2041       $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
2042     } else {
2043       my $noadd = 0;
2044       if ( ! $self->{rev_msg}
2045            or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
2046            or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
2047         # ... until a msg separator is encountered:
2048         # Ensure the message contains something:
2049         $self->clear_msg, $noadd = 1
2050           if $Prune_Empty_Msgs;
2051         $self->{rev_msg} = "[no log message]\n";
2052       }
2053
2054       $self->add_file_entry
2055         unless $noadd;
2056
2057       if ( $_ eq FILE_SEPARATOR ) {
2058         $self->clear_file;
2059       } else {
2060         $self->clear_msg;
2061       }
2062     }
2063   }
2064
2065   close READER
2066     or die "Couldn't close pipe reader: $!\n";
2067   if ( defined $pid ) {
2068     my $rv;
2069     waitpid $pid, 0;
2070     0 == $?
2071       or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2072                            $pid, $? >> 8, $? & 127, $? & 128);
2073   }
2074   return;
2075 }
2076
2077 # -------------------------------------
2078
2079 sub add_file_entry {
2080   $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
2081                                                  rev_state lines branch_names
2082                                                  rev_branch_roots
2083                                                  branch_numbers
2084                                                  symbolic_names
2085                                                  rev_author rev_msg)});
2086 }
2087
2088 # -------------------------------------
2089
2090 sub maybe_read_user_map_file {
2091   my ($self) = @_;
2092
2093   my %expansions;
2094   my $User_Map_Input;
2095
2096   if (defined $User_Passwd_File)
2097   {
2098     if ( ! defined $Domain ) {
2099       if ( -e MAILNAME ) {
2100         chomp($Domain = slurp_file(MAILNAME));
2101       } else {
2102       MAILDOMAIN_CMD:
2103         for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2104           my ($text, $exit, $sig, $core) = run_ext($_);
2105           if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2106             chomp $text;
2107             if ( length $text ) {
2108               $Domain = $text;
2109               last MAILDOMAIN_CMD;
2110             }
2111           }
2112         }
2113       }
2114     }
2115
2116     die "No mail domain found\n"
2117       unless defined $Domain;
2118
2119     open (MAPFILE, "<$User_Passwd_File")
2120         or die ("Unable to open $User_Passwd_File ($!)");
2121     while (<MAPFILE>)
2122     {
2123       # all lines are valid
2124       my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2125       my $expansion = '';
2126       ($expansion) = split (',', $gecos)
2127         if defined $gecos && length $gecos;
2128
2129       my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2130       $expansions{$username} = "$expansion <$mailname>";
2131     }
2132     close (MAPFILE);
2133   }
2134
2135   if ($User_Map_File)
2136   {
2137     if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2138          !-f $User_Map_File )
2139     {
2140       my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2141       $User_Map_Input = "$rsh $1 'cat $2' |";
2142       &main::debug ("(run \"${User_Map_Input}\")\n");
2143     }
2144     else
2145     {
2146       $User_Map_Input = "<$User_Map_File";
2147     }
2148
2149     open (MAPFILE, $User_Map_Input)
2150         or die ("Unable to open $User_Map_File ($!)");
2151
2152     while (<MAPFILE>)
2153     {
2154       next if /^\s*#/;  # Skip comment lines.
2155       next if not /:/;  # Skip lines without colons.
2156
2157       # It is now safe to split on ':'.
2158       my ($username, $expansion) = split ':';
2159       chomp $expansion;
2160       $expansion =~ s/^'(.*)'$/$1/;
2161       $expansion =~ s/^"(.*)"$/$1/;
2162
2163       # If it looks like the expansion has a real name already, then
2164       # we toss the username we got from CVS log.  Otherwise, keep
2165       # it to use in combination with the email address.
2166
2167       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2168         # Also, add angle brackets if none present
2169         if (! ($expansion =~ /<\S+@\S+>/)) {
2170           $expansions{$username} = "$username <$expansion>";
2171         }
2172         else {
2173           $expansions{$username} = "$username $expansion";
2174         }
2175       }
2176       else {
2177         $expansions{$username} = $expansion;
2178       }
2179     } # fi ($User_Map_File)
2180
2181     close (MAPFILE);
2182   }
2183
2184  $self->{usermap} = \%expansions;
2185 }
2186
2187 # -------------------------------------
2188
2189 sub read_file_path {
2190   my ($self, $line) = @_;
2191
2192   my $path;
2193
2194   if ( $line =~ /^Working file: (.*)/ ) {
2195     $path = $1;
2196   } elsif ( defined $RCS_Root
2197             and
2198             $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2199     $path = $1;
2200     $path =~ s!Attic/!!;
2201   } else {
2202     return;
2203   }
2204
2205   if ( @Ignore_Files ) {
2206     my $base;
2207     ($base, undef, undef) = fileparse($path);
2208
2209     my $xpath = $Case_Insensitive ? lc($path) : $path;
2210     return
2211       if grep $path =~ /$_/, @Ignore_Files;
2212   }
2213
2214   $self->{filename} = $path;
2215   return;
2216 }
2217
2218 # -------------------------------------
2219
2220 sub read_symbolic_name {
2221   my ($self, $line) = @_;
2222
2223   # All tag names are listed with whitespace in front in cvs log
2224   # output; so if see non-whitespace, then we're done collecting.
2225   if ( /^\S/ ) {
2226     $self->{collecting_symbolic_names} = 0;
2227     return;
2228   } else {
2229     # we're looking at a tag name, so parse & store it
2230
2231     # According to the Cederqvist manual, in node "Tags", tag names must start
2232     # with an uppercase or lowercase letter and can contain uppercase and
2233     # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2234     # enforce that, so we'll allow anything CVS hands us to be a tag:
2235     my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2236
2237     # A branch number either has an odd number of digit sections
2238     # (and hence an even number of dots), or has ".0." as the
2239     # second-to-last digit section.  Test for these conditions.
2240     my $real_branch_rev = '';
2241     if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2242          and
2243          $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2244       $real_branch_rev = $tag_rev;
2245     } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2246       $real_branch_rev = $1 . $3;
2247     }
2248
2249     # If we got a branch, record its number.
2250     if ( $real_branch_rev ) {
2251       $self->{branch_names}->{$real_branch_rev} = $tag_name;
2252       $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2253     } else {
2254       # Else it's just a regular (non-branch) tag.
2255       push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2256     }
2257   }
2258
2259   $self->{collecting_symbolic_names} = 1;
2260   return;
2261 }
2262
2263 # -------------------------------------
2264
2265 sub read_revision {
2266   my ($self, $line) = @_;
2267
2268   my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2269
2270   return
2271     unless $revision;
2272
2273   $self->{rev_revision} = $revision;
2274   return;
2275 }
2276
2277 # -------------------------------------
2278
2279 { # Closure over %gecos_warned
2280 my %gecos_warned;
2281 sub read_date_author_and_state {
2282   my ($self, $line) = @_;
2283
2284   my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2285
2286   if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2287     $author = $self->{usermap}->{$author};
2288   } elsif ( defined $Domain or $Gecos == 1 ) {
2289     my $email = $author;
2290     $email = $author."@".$Domain
2291       if defined $Domain && $Domain ne '';
2292
2293     my $pw = getpwnam($author);
2294     my ($fullname, $office, $workphone, $homephone, $gcos);
2295     if ( defined $pw ) {
2296       $gcos = (getpwnam($author))[6];
2297       ($fullname, $office, $workphone, $homephone) =
2298         split /\s*,\s*/, $gcos;
2299     } else {
2300       warn "Couldn't find gecos info for author '$author'\n"
2301         unless $gecos_warned{$author}++;
2302       $fullname = '';
2303     }
2304     for (grep defined, $fullname, $office, $workphone, $homephone) {
2305       s/&/ucfirst(lc($pw))/ge;
2306     }
2307     $author = $fullname . "  <" . $email . ">"
2308       if defined $fullname && $fullname ne '';
2309   }
2310
2311   $self->{rev_state}  = $state;
2312   $self->{rev_time}   = $time;
2313   $self->{rev_author} = $author;
2314   return;
2315 }
2316 }
2317
2318 # -------------------------------------
2319
2320 sub read_branches {
2321   # A "branches: ..." line here indicates that one or more branches
2322   # are rooted at this revision.  If we're showing branches, then we
2323   # want to show that fact as well, so we collect all the branches
2324   # that this is the latest ancestor of and store them in
2325   # $self->[rev_branch_roots}.  Just for reference, the format of the
2326   # line we're seeing at this point is:
2327   #
2328   #    branches:  1.5.2;  1.5.4;  ...;
2329   #
2330   # Okay, here goes:
2331   my ($self, $line) = @_;
2332
2333   # Ugh.  This really bothers me.  Suppose we see a log entry
2334   # like this:
2335   #
2336   #    ----------------------------
2337   #    revision 1.1
2338   #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2339   #    branches:  1.1.2;
2340   #    Intended first line of log message begins here.
2341   #    ----------------------------
2342   #
2343   # The question is, how we can tell the difference between that
2344   # log message and a *two*-line log message whose first line is
2345   #
2346   #    "branches:  1.1.2;"
2347   #
2348   # See the problem?  The output of "cvs log" is inherently
2349   # ambiguous.
2350   #
2351   # For now, we punt: we liberally assume that people don't
2352   # write log messages like that, and just toss a "branches:"
2353   # line if we see it but are not showing branches.  I hope no
2354   # one ever loses real log data because of this.
2355   if ( $Show_Branches ) {
2356     $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2357     $self->{rev_branch_roots} = [split /;\s+/, $line]
2358       if length $line;
2359   }
2360 }
2361
2362 # -------------------------------------
2363
2364 sub parse_date_author_and_state {
2365   my ($self, $line) = @_;
2366   # Parses the date/time and author out of a line like:
2367   #
2368   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2369   #
2370   # or, in CVS 1.12.9:
2371   #
2372   # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2373
2374   my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2375     $line =~
2376       m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
2377         author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2378     or  die "Couldn't parse date ``$line''";
2379   die "Bad date or Y2K issues"
2380     unless $year > 1969 and $year < 2258;
2381   # Kinda arbitrary, but useful as a sanity check
2382   my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2383   if ( defined $utcOffset ) {
2384     my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
2385     my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
2386     $time += $offset;
2387   }
2388   if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2389     $self->{lines} = $1;
2390   }
2391
2392   return $time, $author, $state;
2393 }
2394
2395 # Subrs ----------------------------------------------------------------------
2396
2397 package main;
2398
2399 sub delta_check {
2400   my ($time, $tags) = @_;
2401
2402   # If we're in 'delta' mode, update the latest observed times for the
2403   # beginning and ending tags, and when we get around to printing output, we
2404   # will simply restrict ourselves to that timeframe...
2405   return
2406     unless $Delta_Mode;
2407
2408   $Delta_StartTime = $time
2409     if $time > $Delta_StartTime and $Delta_From and grep { $_ eq $Delta_From } @$tags;
2410
2411   $Delta_EndTime = $time
2412     if $time > $Delta_EndTime and $Delta_To and grep { $_ eq $Delta_To } @$tags;
2413 }
2414
2415 sub run_ext {
2416   my ($cmd) = @_;
2417   $cmd = [$cmd]
2418     unless ref $cmd;
2419   local $" = ' ';
2420   my $out = qx"@$cmd 2>&1";
2421   my $rv  = $?;
2422   my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2423   return $out, $exit, $sig, $core;
2424 }
2425
2426 # -------------------------------------
2427
2428 # If accumulating, grab the boundary date from pre-existing ChangeLog.
2429 sub maybe_grab_accumulation_date {
2430   if (! $Cumulative || $Update) {
2431     return '';
2432   }
2433
2434   # else
2435
2436   open (LOG, "$Log_File_Name")
2437       or die ("trouble opening $Log_File_Name for reading ($!)");
2438
2439   my $boundary_date;
2440   while (<LOG>)
2441   {
2442     if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2443     {
2444       $boundary_date = "$1";
2445       last;
2446     }
2447   }
2448
2449   close (LOG);
2450
2451   # convert time from utc to local timezone if the ChangeLog has
2452   # dates/times in utc
2453   if ($UTC_Times && $boundary_date)
2454   {
2455     # convert the utc time to a time value
2456     my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2457       m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2458     my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2459     # print the timevalue in the local timezone
2460     my ($ignore,$wday);
2461     ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2462     $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2463                             $year+1900,$mon+1,$mday,$hour,$min);
2464   }
2465
2466   return $boundary_date;
2467 }
2468
2469 # -------------------------------------
2470
2471 # Fills up a ChangeLog structure in the current directory.
2472 sub derive_changelog {
2473   my ($command) = @_;
2474
2475   # See "The Plan" above for a full explanation.
2476
2477   # Might be adding to an existing ChangeLog
2478   my $accumulation_date = maybe_grab_accumulation_date;
2479   if ($accumulation_date) {
2480     # Insert -d immediately after 'cvs log'
2481     my $Log_Date_Command = "-d>${accumulation_date}";
2482
2483     my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2484     splice @$command, $log_index+1, 0, $Log_Date_Command;
2485     &debug ("(adding log msg starting from $accumulation_date)\n");
2486   }
2487
2488 #  output_changelog(read_changelog($command));
2489   my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2490   $builder->read_changelog($command);
2491   $builder->grand_poobah->output_changelog;
2492 }
2493
2494 # -------------------------------------
2495
2496 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2497
2498 # -------------------------------------
2499
2500 sub common_path_prefix {
2501   my ($path1, $path2) = @_;
2502
2503   # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2504   # terms, and mould windoze filenames to match.  Is this really appropriate?
2505   # If a file is checked in under UN*X, and cvs log run on windoze, which way
2506   # do the path separators slope?  Can we use fileparse as per the local
2507   # conventions?  If so, we should probably have a user option to specify an
2508   # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2509   # the nasty \-/ transmogrification below.
2510
2511   my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2512
2513   # Transmogrify Windows filenames to look like Unix.
2514   # (It is far more likely that someone is running cvs2cl.pl under
2515   # Windows than that they would genuinely have backslashes in their
2516   # filenames.)
2517   tr!\\!/!
2518     for $dir1, $dir2;
2519
2520   my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2521
2522   my @path1 = grep length($_), split qr!/!, $dir1;
2523   my @path2 = grep length($_), split qr!/!, $dir2;
2524
2525   my @common_path;
2526   for (0..min($#path1,$#path2)) {
2527     if ( $path1[$_] eq $path2[$_]) {
2528       push @common_path, $path1[$_];
2529     } else {
2530       last;
2531     }
2532   }
2533
2534   return join '', map "$_/", @common_path;
2535 }
2536
2537 # -------------------------------------
2538
2539 sub parse_options {
2540   # Check this internally before setting the global variable.
2541   my $output_file;
2542
2543   # If this gets set, we encountered unknown options and will exit at
2544   # the end of this subroutine.
2545   my $exit_with_admonishment = 0;
2546
2547   # command to generate the log
2548   my @log_source_command = qw( cvs log );
2549
2550   my (@Global_Opts, @Local_Opts);
2551
2552   Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2553                               pass_through no_ignore_case ));
2554   GetOptions('help|usage|h'   => \$Print_Usage,
2555              'debug'          => \$Debug,        # unadvertised option, heh
2556              'version'        => \$Print_Version,
2557
2558              'file|f=s'       => \$output_file,
2559              'accum'          => \$Cumulative,
2560              'update'         => \$Update,
2561              'fsf'            => \$FSF_Style,
2562              'rcs=s'          => \$RCS_Root,
2563              'usermap|U=s'    => \$User_Map_File,
2564              'gecos'          => \$Gecos,
2565              'domain=s'       => \$Domain,
2566              'passwd=s'       => \$User_Passwd_File,
2567              'window|W=i'     => \$Max_Checkin_Duration,
2568              'chrono'         => \$Chronological_Order,
2569              'ignore|I=s'     => \@Ignore_Files,
2570              'case-insensitive|C' => \$Case_Insensitive,
2571              'regexp|R=s'     => \$Regexp_Gate,
2572              'stdin'          => \$Input_From_Stdin,
2573              'stdout'         => \$Output_To_Stdout,
2574              'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2575              'prune|P'        => \$Prune_Empty_Msgs,
2576              'no-wrap'        => \$No_Wrap,
2577              'gmt|utc'        => \$UTC_Times,
2578              'day-of-week|w'  => \$Show_Day_Of_Week,
2579              'revisions|r'    => \$Show_Revisions,
2580              'show-dead'      => \$Show_Dead,
2581              'tags|t'         => \$Show_Tags,
2582              'tag-regexp=s'   => \$Regexp_Tag,
2583              'tagdates|T'     => \$Show_Tag_Dates,
2584              'branches|b'     => \$Show_Branches,
2585              'follow|F=s'     => \@Follow_Branches,
2586              'follow-only=s'  => \@Follow_Only,
2587              'xml-encoding=s' => \$XML_Encoding,
2588              'xml'            => \$XML_Output,
2589              'noxmlns'        => \$No_XML_Namespace,
2590              'no-xml-iso-date' => \$No_XML_ISO_Date,
2591              'no-ancestors'   => \$No_Ancestors,
2592              'lines-modified' => \$Show_Lines_Modified,
2593
2594              'no-indent'    => sub {
2595                $Indent = '';
2596              },
2597
2598              'summary'      => sub {
2599                $Summary = 1;
2600                $After_Header = "\n\n"; # Summary implies --separate-header
2601              },
2602
2603              'no-times'     => sub {
2604                $Show_Times = 0;
2605              },
2606
2607              'no-hide-branch-additions' => sub {
2608                $Hide_Branch_Additions = 0;
2609              },
2610
2611              'no-common-dir'  => sub {
2612                $Common_Dir = 0;
2613              },
2614
2615              'ignore-tag=s'   => sub {
2616                $ignore_tags{$_[1]} = 1;
2617              },
2618
2619              'show-tag=s'     => sub {
2620                $show_tags{$_[1]} = 1;
2621              },
2622
2623              # Deliberately undocumented.  This is not a public interface, and
2624              # may change/disappear at any time.
2625              'test-code=s'    => \$TestCode,
2626
2627              'delta=s'        => sub {
2628                my $arg = $_[1];
2629                if ( $arg =~
2630                     /^([A-Za-z][A-Za-z0-9_\-\]\[\.]*)?:([A-Za-z][A-Za-z0-9_\-\]\[\.]*)?$/ )
2631                {
2632                  $Delta_From = $1;
2633                  $Delta_To = $2;
2634                  $Delta_Mode = 1;
2635                } else {
2636                  die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2637                }
2638              },
2639
2640              'FSF'             => sub {
2641                $FSF_Output = 1;
2642                $Show_Times = 0;
2643                $Common_Dir = 0;
2644                $No_Extra_Indent = 1;
2645                $Indent = "\t";
2646              },
2647
2648              'header=s'        => sub {
2649                my $narg = $_[1];
2650                $ChangeLog_Header = &slurp_file ($narg);
2651                if (! defined ($ChangeLog_Header)) {
2652                  $ChangeLog_Header = '';
2653                }
2654              },
2655
2656              'global-opts|g=s' => sub {
2657                my $narg = $_[1];
2658                push @Global_Opts, $narg;
2659                splice @log_source_command, 1, 0, $narg;
2660              },
2661
2662              'log-opts|l=s' => sub {
2663                my $narg = $_[1];
2664                push @Local_Opts, $narg;
2665                push @log_source_command, $narg;
2666              },
2667
2668              'mailname=s'   => sub {
2669                my $narg = $_[1];
2670                warn "--mailname is deprecated; please use --domain instead\n";
2671                $Domain = $narg;
2672              },
2673
2674              'separate-header|S' => sub {
2675                $After_Header = "\n\n";
2676                $No_Extra_Indent = 1;
2677              },
2678
2679              'group-by-date' => sub {
2680                $GroupByDate = 1;
2681                $Show_Times = 0;
2682              },
2683
2684              'group-by-author' => sub {
2685                $GroupByDate = 1;
2686                $GroupByAuthor = 1;
2687                $Show_Times = 0;
2688              },
2689
2690              'hide-filenames' => sub {
2691                $Hide_Filenames = 1;
2692                $After_Header = '';
2693              },
2694             )
2695     or die "options parsing failed\n";
2696
2697   push @log_source_command, map "$_", @ARGV;
2698
2699   ## Check for contradictions...
2700
2701   if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2702     print STDERR "cannot pass both --stdout and --distributed\n";
2703     $exit_with_admonishment = 1;
2704   }
2705
2706   if ($Output_To_Stdout && $output_file) {
2707     print STDERR "cannot pass both --stdout and --file\n";
2708     $exit_with_admonishment = 1;
2709   }
2710
2711   if ($Input_From_Stdin && @Global_Opts) {
2712     print STDERR "cannot pass both --stdin and -g\n";
2713     $exit_with_admonishment = 1;
2714   }
2715
2716   if ($Input_From_Stdin && @Local_Opts) {
2717     print STDERR "cannot pass both --stdin and -l\n";
2718     $exit_with_admonishment = 1;
2719   }
2720
2721   if ($XML_Output && $Cumulative) {
2722     print STDERR "cannot pass both --xml and --accum\n";
2723     $exit_with_admonishment = 1;
2724   }
2725
2726   if ($FSF_Output && $Cumulative) {
2727     print STDERR "cannot pass both --FSF and --accum\n";
2728     $exit_with_admonishment = 1;
2729   }
2730
2731   # Other consistency checks and option-driven logic
2732
2733   # Bleargh.  Compensate for a deficiency of custom wrapping.
2734   if ( ($After_Header ne " ") and $FSF_Style ) {
2735     $After_Header .= "\t";
2736   }
2737
2738   @Ignore_Files = map lc, @Ignore_Files
2739     if $Case_Insensitive;
2740
2741   # Or if any other error message has already been printed out, we
2742   # just leave now:
2743   if ($exit_with_admonishment) {
2744     &usage ();
2745     exit (1);
2746   }
2747   elsif ($Print_Usage) {
2748     &usage ();
2749     exit (0);
2750   }
2751   elsif ($Print_Version) {
2752     &version ();
2753     exit (0);
2754   }
2755
2756   ## Else no problems, so proceed.
2757
2758   if ($output_file) {
2759     $Log_File_Name = $output_file;
2760   }
2761
2762   return \@log_source_command;
2763 }
2764
2765 # -------------------------------------
2766
2767 sub slurp_file {
2768   my $filename = shift || die ("no filename passed to slurp_file()");
2769   my $retstr;
2770
2771   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2772   local $/ = undef;
2773   $retstr = <SLURPEE>;
2774   close (SLURPEE);
2775   return $retstr;
2776 }
2777
2778 # -------------------------------------
2779
2780 sub debug {
2781   if ($Debug) {
2782     my $msg = shift;
2783     print STDERR $msg;
2784   }
2785 }
2786
2787 # -------------------------------------
2788
2789 sub version {
2790   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2791 }
2792
2793 # -------------------------------------
2794
2795 sub usage {
2796   &version ();
2797
2798   eval "use Pod::Usage qw( pod2usage )";
2799
2800    if ( $@ ) {
2801     print <<'END';
2802
2803 * Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2804   upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2805   versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2806   install Pod::Usage direct from CPAN.
2807 END
2808
2809     local $/ = undef;
2810     my $message = <DATA>;
2811     $message =~ s/^=(head1|item) //gm;
2812     $message =~ s/^=(over|back).*\n//gm;
2813     $message =~ s/\n{3,}/\n\n/g;
2814     print $message;
2815   } else {
2816     print "\n";
2817     pod2usage( -exitval => 'NOEXIT',
2818                -verbose => 1,
2819                -output  => \*STDOUT,
2820              );
2821   }
2822
2823   return;
2824 }
2825
2826 # Main -----------------------------------------------------------------------
2827
2828 my $log_source_command = parse_options;
2829 if ( defined $TestCode ) {
2830   eval $TestCode;
2831   die "Eval failed: '$@'\n"
2832     if $@;
2833 } else {
2834   derive_changelog($log_source_command);
2835 }
2836
2837 __DATA__
2838
2839 =head1 NAME
2840
2841 cvs2cl.pl - convert cvs log messages to changelogs
2842
2843 =head1 SYNOPSIS
2844
2845 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2846
2847 =head1 DESCRIPTION
2848
2849 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2850 running "cvs log" and parsing the output. Duplicate log messages get
2851 unified in the Right Way.
2852
2853 The default output of cvs2cl is designed to be compact, formally unambiguous,
2854 but still easy for humans to read.  It should be largely self-explanatory; the
2855 one abbreviation that might not be obvious is "utags".  That stands for
2856 "universal tags" -- a universal tag is one held by all the files in a given
2857 change entry.
2858
2859 If you need output that's easy for a program to parse, use the B<--xml> option.
2860 Note that with XML output, just about all available information is included
2861 with each change entry, whether you asked for it or not, on the theory that
2862 your parser can ignore anything it's not looking for.
2863
2864 If filenames are given as arguments cvs2cl only shows log information for the
2865 named files.
2866
2867 =head1 OPTIONS
2868
2869 =over 4
2870
2871 =item B<-h>, B<-help>, B<--help>, B<-?>
2872
2873 Show a short help and exit.
2874
2875 =item B<--version>
2876
2877 Show version and exit.
2878
2879 =item B<-r>, B<--revisions>
2880
2881 Show revision numbers in output.
2882
2883 =item B<-b>, B<--branches>
2884
2885 Show branch names in revisions when possible.
2886
2887 =item B<-t>, B<--tags>
2888
2889 Show tags (symbolic names) in output.
2890
2891 =item B<-T>, B<--tagdates>
2892
2893 Show tags in output on their first occurance.
2894
2895 =item B<--show-dead>
2896
2897 Show dead files.
2898
2899 =item B<--stdin>
2900
2901 Read from stdin, don't run cvs log.
2902
2903 =item B<--stdout>
2904
2905 Output to stdout not to ChangeLog.
2906
2907 =item B<-d>, B<--distributed>
2908
2909 Put ChangeLogs in subdirs.
2910
2911 =item B<-f> I<FILE>, B<--file> I<FILE>
2912
2913 Write to I<FILE> instead of ChangeLog.
2914
2915 =item B<--fsf>
2916
2917 Use this if log data is in FSF ChangeLog style.
2918
2919 =item B<--FSF>
2920
2921 Attempt strict FSF-standard compatible output (incompatible with B<--accum>).
2922
2923 =item B<-W> I<SECS>, B<--window> I<SECS>
2924
2925 Window of time within which log entries unify.
2926
2927 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2928
2929 Expand usernames to email addresses from I<UFILE>.
2930
2931 =item B<--passwd> I<PASSWORDFILE>
2932
2933 Use system passwd file for user name expansion.  If no mail domain is provided
2934 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2935 -d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2936 those options is successful. Use a domain of '' to prevent the addition of a
2937 mail domain.
2938
2939 =item B<--domain> I<DOMAIN>
2940
2941 Domain to build email addresses from.
2942
2943 =item B<--gecos>
2944
2945 Get user information from GECOS data.
2946
2947 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2948
2949 Include only entries that match I<REGEXP>.  This option may be used multiple
2950 times.
2951
2952 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2953
2954 Ignore files whose names match I<REGEXP>.  This option may be used multiple
2955 times.  The regexp is a perl regular expression.  It is matched as is; you may
2956 want to prefix with a ^ or suffix with a $ to anchor the match.
2957
2958 =item B<-C>, B<--case-insensitive>
2959
2960 Any regexp matching is done case-insensitively.
2961
2962 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2963
2964 Show only revisions on or ancestral to I<BRANCH>.
2965
2966 =item B<--follow-only> I<BRANCH>
2967
2968 Like --follow, but sub-branches are not followed.
2969
2970 =item B<--no-ancestors>
2971
2972 When using B<-F>, only track changes since the I<BRANCH> started.
2973
2974 =item B<--no-hide-branch-additions>
2975
2976 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2977 entry) are not shown.  This flag reverses that action.
2978
2979 =item B<-S>, B<--separate-header>
2980
2981 Blank line between each header and log message.
2982
2983 =item B<--group-by-date>
2984
2985 Group ChangeLog entries on the same date together, instead of having a
2986 separate entry for each commit on that date.
2987
2988 =item B<--group-by-author>
2989
2990 Group consecutive ChangeLog entries from same author during same date,
2991 instead of having separate entry for each commit.
2992
2993 =item B<--summary>
2994
2995 Add CVS change summary information.
2996
2997 =item B<--no-wrap>
2998
2999 Don't auto-wrap log message (recommend B<-S> also).
3000
3001 =item B<--no-indent>
3002
3003 Don't indent log message
3004
3005 =item B<--gmt>, B<--utc>
3006
3007 Show times in GMT/UTC instead of local time.
3008
3009 =item B<--accum>
3010
3011 Add to an existing ChangeLog (incompatible with B<--xml> and B<--FSF>).
3012
3013 =item B<-w>, B<--day-of-week>
3014
3015 Show day of week.
3016
3017 =item B<--no-times>
3018
3019 Don't show times in output.
3020
3021 =item B<--chrono>
3022
3023 Output log in chronological order (default is reverse chronological order).
3024
3025 =item B<--header> I<FILE>
3026
3027 Get ChangeLog header from I<FILE> ("B<->" means stdin).
3028
3029 =item B<--xml>
3030
3031 Output XML instead of ChangeLog format (incompatible with B<--accum>).
3032
3033 =item B<--xml-encoding> I<ENCODING.>
3034
3035 Insert encoding clause in XML header.
3036
3037 =item B<--noxmlns>
3038
3039 Don't include xmlns= attribute in root element.
3040
3041 =item B<--hide-filenames>
3042
3043 Don't show filenames (ignored for XML output).
3044
3045 =item B<--no-common-dir>
3046
3047 Don't shorten directory names from filenames.
3048
3049 =item B<--rcs> I<CVSROOT>
3050
3051 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
3052 output, stripping the prefix I<CVSROOT>.
3053
3054 =item B<-P>, B<--prune>
3055
3056 Don't show empty log messages.
3057
3058 =item B<--lines-modified>
3059
3060 Output the number of lines added and the number of lines removed for
3061 each checkin (if applicable). At the moment, this only affects the
3062 XML output mode.
3063
3064 =item B<--ignore-tag> I<TAG>
3065
3066 Ignore individual changes that are associated with a given tag.
3067 May be repeated, if so, changes that are associated with any of
3068 the given tags are ignored.
3069
3070 =item B<--show-tag> I<TAG>
3071
3072 Log only individual changes that are associated with a given
3073 tag.  May be repeated, if so, changes that are associated with
3074 any of the given tags are logged.
3075
3076 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3077
3078 Attempt a delta between two tags (since I<FROM_TAG> up to and
3079 including I<TO_TAG>).  The algorithm is a simple date-based one
3080 (this is a hard problem) so results are imperfect.
3081
3082 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3083
3084 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
3085
3086 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3087
3088 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
3089
3090 =back
3091
3092 Notes about the options and arguments:
3093
3094 =over 4
3095
3096 =item *
3097
3098 The B<-I> and B<-F> options may appear multiple times.
3099
3100 =item *
3101
3102 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
3103 okay because no would ever, ever be crazy enough to name a branch "trunk",
3104 right?  Right.
3105
3106 =item *
3107
3108 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3109 each line of I<UFILE> looks like this:
3110
3111        jrandom:jrandom@red-bean.com
3112
3113 or maybe even like this
3114
3115        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3116
3117 Don't forget to quote the portion after the colon if necessary.
3118
3119 =item *
3120
3121 Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3122
3123        cvs2cl.pl -l "-d'DATESPEC'"
3124
3125 where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3126 CVS 1.10.7 and below requires there be no space between -d and its argument).
3127
3128 =item *
3129
3130 Dates/times are interpreted in the local time zone.
3131
3132 =item *
3133
3134 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3135 spaces as argument separators.
3136
3137 =item *
3138
3139 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3140 systems) for more information.
3141
3142 =item *
3143
3144 Note that the rules for quoting under windows shells are different.
3145
3146 =item *
3147
3148 To run in an automated environment such as CGI or PHP, suidperl may be needed
3149 in order to execute as the correct user to enable /cvsroot read lock files to
3150 be written for the 'cvs log' command.  This is likely just a case of changing
3151 the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3152 PATH variable.
3153
3154 =back
3155
3156 =head1 EXAMPLES
3157
3158 Some examples (working on UNIX shells):
3159
3160       # logs after 6th March, 2003 (inclusive)
3161       cvs2cl.pl -l "-d'>2003-03-06'"
3162       # logs after 4:34PM 6th March, 2003 (inclusive)
3163       cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3164       # logs between 4:46PM 6th March, 2003 (exclusive) and
3165       # 4:34PM 6th March, 2003 (inclusive)
3166       cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3167
3168 Some examples (on non-UNIX shells):
3169
3170       # Reported to work on windows xp/2000
3171       cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3172
3173 =head1 AUTHORS
3174
3175 =over 4
3176
3177 =item Karl Fogel
3178
3179 =item Melissa O'Neill
3180
3181 =item Martyn J. Pearce
3182
3183 =back
3184
3185 Contributions from
3186
3187 =over 4
3188
3189 =item Mike Ayers
3190
3191 =item Tim Bradshaw
3192
3193 =item Richard Broberg
3194
3195 =item Nathan Bryant
3196
3197 =item Oswald Buddenhagen
3198
3199 =item Neil Conway
3200
3201 =item Arthur de Jong
3202
3203 =item Mark W. Eichin
3204
3205 =item Dave Elcock
3206
3207 =item Reid Ellis
3208
3209 =item Simon Josefsson
3210
3211 =item Robin Hugh Johnson
3212
3213 =item Terry Kane
3214
3215 =item Pete Kempf
3216
3217 =item Akos Kiss
3218
3219 =item Claus Klein
3220
3221 =item Eddie Kohler
3222
3223 =item Richard Laager
3224
3225 =item Kevin Lilly
3226
3227 =item Karl-Heinz Marbaise
3228
3229 =item Mitsuaki Masuhara
3230
3231 =item Henrik Nordstrom
3232
3233 =item Joe Orton
3234
3235 =item Peter Palfrader
3236
3237 =item Thomas Parmelan
3238
3239 =item Jordan Russell
3240
3241 =item Jacek Sliwerski
3242
3243 =item Johannes Stezenbach
3244
3245 =item Joseph Walton
3246
3247 =item Ernie Zapata
3248
3249 =back
3250
3251 =head1 BUGS
3252
3253 Please report bugs to C<cvs2cl-reports {_AT_} red-bean.com>.
3254
3255 =head1 PREREQUISITES
3256
3257 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3258 also seems to require C<Perl 5.004_04> or higher.
3259
3260 =head1 OPERATING SYSTEM COMPATIBILITY
3261
3262 Should work on any OS.
3263
3264 =head1 SCRIPT CATEGORIES
3265
3266 Version_Control/CVS
3267
3268 =head1 COPYRIGHT
3269
3270 (C) 2001,2002,2003,2004 Martyn J. Pearce, under the GNU GPL.
3271
3272 (C) 1999 Karl Fogel, under the GNU GPL.
3273
3274 cvs2cl.pl is free software; you can redistribute it and/or modify
3275 it under the terms of the GNU General Public License as published by
3276 the Free Software Foundation; either version 2, or (at your option)
3277 any later version.
3278
3279 cvs2cl.pl is distributed in the hope that it will be useful,
3280 but WITHOUT ANY WARRANTY; without even the implied warranty of
3281 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3282 GNU General Public License for more details.
3283
3284 You may have received a copy of the GNU General Public License
3285 along with cvs2cl.pl; see the file COPYING.  If not, write to the
3286 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3287 Boston, MA 02111-1307, USA.
3288
3289 =head1 SEE ALSO
3290
3291 cvs(1)
3292