external/bsd/flex/dist/tools/cvs2cl.pl

   1 #!/bin/sh
   2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
   3 #!perl -w
   4
   5
   6 ##############################################################
   7 ###                                                        ###
   8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
   9 ###                                                        ###
  10 ##############################################################
  11
  12 ## Revision: 1.1
  13 ## $Date: 2009/10/26 00:29:59 $
  14 ## $Author: christos $
  15 ##
  16 ##   (C) 2001,2002,2003 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
  17 ##   (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
  18 ##
  19 ##   (Extensively hacked on by Melissa O'Neill <oneill@cs.sfu.ca>.)
  20 ##
  21 ## cvs2cl.pl is free software; you can redistribute it and/or modify
  22 ## it under the terms of the GNU General Public License as published by
  23 ## the Free Software Foundation; either version 2, or (at your option)
  24 ## any later version.
  25 ##
  26 ## cvs2cl.pl is distributed in the hope that it will be useful,
  27 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  28 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  29 ## GNU General Public License for more details.
  30 ##
  31 ## You may have received a copy of the GNU General Public License
  32 ## along with cvs2cl.pl; see the file COPYING.  If not, write to the
  33 ## Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  34 ## Boston, MA 02111-1307, USA.
  35
  36 \f
  37 use strict;
  38 use Text::Wrap;
  39 use Time::Local;
  40 use File::Basename;
  41
  42 \f
  43 # The Plan:
  44 #
  45 # Read in the logs for multiple files, spit out a nice ChangeLog that
  46 # mirrors the information entered during `cvs commit'.
  47 #
  48 # The problem presents some challenges. In an ideal world, we could
  49 # detect files with the same author, log message, and checkin time --
  50 # each <filelist, author, time, logmessage> would be a changelog entry.
  51 # We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
  52 # so checkins can span a range of times.  Also, the directory structure
  53 # could be hierarchical.
  54 #
  55 # Another question is whether we really want to have the ChangeLog
  56 # exactly reflect commits. An author could issue two related commits,
  57 # with different log entries, reflecting a single logical change to the
  58 # source. GNU style ChangeLogs group these under a single author/date.
  59 # We try to do the same.
  60 #
  61 # So, we parse the output of `cvs log', storing log messages in a
  62 # multilevel hash that stores the mapping:
  63 #   directory => author => time => message => filelist
  64 # As we go, we notice "nearby" commit times and store them together
  65 # (i.e., under the same timestamp), so they appear in the same log
  66 # entry.
  67 #
  68 # When we've read all the logs, we twist this mapping into
  69 # a time => author => message => filelist mapping for each directory.
  70 #
  71 # If we're not using the `--distributed' flag, the directory is always
  72 # considered to be `./', even as descend into subdirectories.
  73
  74 \f
  75 ############### Globals ################
  76
  77 # What we run to generate it:
  78 my $Log_Source_Command = "cvs log";
  79
  80 # In case we have to print it out:
  81 my $VERSION = 'Revision: 1.1';
  82 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
  83
  84 ## Vars set by options:
  85
  86 # Print debugging messages?
  87 my $Debug = 0;
  88
  89 # Just show version and exit?
  90 my $Print_Version = 0;
  91
  92 # Just print usage message and exit?
  93 my $Print_Usage = 0;
  94
  95 # Single top-level ChangeLog, or one per subdirectory?
  96 my $Distributed = 0;
  97
  98 # What file should we generate (defaults to "ChangeLog")?
  99 my $Log_File_Name = "ChangeLog";
 100
 101 # Grab most recent entry date from existing ChangeLog file, just add
 102 # to that ChangeLog.
 103 my $Cumulative = 0;
 104
 105 # Expand usernames to email addresses based on a map file?
 106 my $User_Map_File = "";
 107
 108 # Output to a file or to stdout?
 109 my $Output_To_Stdout = 0;
 110
 111 # Eliminate empty log messages?
 112 my $Prune_Empty_Msgs = 0;
 113
 114 # Tags of which not to output
 115 my @ignore_tags;
 116
 117 # Don't call Text::Wrap on the body of the message
 118 my $No_Wrap = 0;
 119
 120 # Separates header from log message.  Code assumes it is either " " or
 121 # "\n\n", so if there's ever an option to set it to something else,
 122 # make sure to go through all conditionals that use this var.
 123 my $After_Header = " ";
 124
 125 # XML Encoding
 126 my $XML_Encoding = '';
 127
 128 # Format more for programs than for humans.
 129 my $XML_Output = 0;
 130
 131 # Do some special tweaks for log data that was written in FSF
 132 # ChangeLog style.
 133 my $FSF_Style = 0;
 134
 135 # Show times in UTC instead of local time
 136 my $UTC_Times = 0;
 137
 138 # Show day of week in output?
 139 my $Show_Day_Of_Week = 0;
 140
 141 # Show revision numbers in output?
 142 my $Show_Revisions = 0;
 143
 144 # Show tags (symbolic names) in output?
 145 my $Show_Tags = 0;
 146
 147 # Show tags separately in output?
 148 my $Show_Tag_Dates = 0;
 149
 150 # Show branches by symbolic name in output?
 151 my $Show_Branches = 0;
 152
 153 # Show only revisions on these branches or their ancestors.
 154 my @Follow_Branches;
 155
 156 # Don't bother with files matching this regexp.
 157 my @Ignore_Files;
 158
 159 # How exactly we match entries.  We definitely want "o",
 160 # and user might add "i" by using --case-insensitive option.
 161 my $Case_Insensitive = 0;
 162
 163 # Maybe only show log messages matching a certain regular expression.
 164 my $Regexp_Gate = "";
 165
 166 # Pass this global option string along to cvs, to the left of `log':
 167 my $Global_Opts = "";
 168
 169 # Pass this option string along to the cvs log subcommand:
 170 my $Command_Opts = "";
 171
 172 # Read log output from stdin instead of invoking cvs log?
 173 my $Input_From_Stdin = 0;
 174
 175 # Don't show filenames in output.
 176 my $Hide_Filenames = 0;
 177
 178 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
 179 # times that span a range of time. We assume that checkins will last no
 180 # longer than $Max_Checkin_Duration seconds, and that similarly, no
 181 # checkins will happen from the same users with the same message less
 182 # than $Max_Checkin_Duration seconds apart.
 183 my $Max_Checkin_Duration = 180;
 184
 185 # What to put at the front of [each] ChangeLog.
 186 my $ChangeLog_Header = "";
 187
 188 # Whether to enable 'delta' mode, and for what start/end tags.
 189 my $Delta_Mode = 0;
 190 my $Delta_From = "";
 191 my $Delta_To = "";
 192
 193 ## end vars set by options.
 194
 195 # latest observed times for the start/end tags in delta mode
 196 my $Delta_StartTime = 0;
 197 my $Delta_EndTime = 0;
 198
 199 # In 'cvs log' output, one long unbroken line of equal signs separates
 200 # files:
 201 my $file_separator = "======================================="
 202                    . "======================================";
 203
 204 # In 'cvs log' output, a shorter line of dashes separates log messages
 205 # within a file:
 206 my $logmsg_separator = "----------------------------";
 207
 208 ############### End globals ############
 209
 210 \f
 211 &parse_options ();
 212 &derive_change_log ();
 213
 214 \f
 215 ### Everything below is subroutine definitions. ###
 216
 217 # If accumulating, grab the boundary date from pre-existing ChangeLog.
 218 sub maybe_grab_accumulation_date ()
 219 {
 220   if (! $Cumulative) {
 221     return "";
 222   }
 223
 224   # else
 225
 226   open (LOG, "$Log_File_Name")
 227       or die ("trouble opening $Log_File_Name for reading ($!)");
 228
 229   my $boundary_date;
 230   while (<LOG>)
 231   {
 232     if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)
 233     {
 234       $boundary_date = "$1";
 235       last;
 236     }
 237   }
 238
 239   close (LOG);
 240   return $boundary_date;
 241 }
 242
 243 # Fills up a ChangeLog structure in the current directory.
 244 sub derive_change_log ()
 245 {
 246   # See "The Plan" above for a full explanation.
 247
 248   my %grand_poobah;
 249
 250   my $file_full_path;
 251   my $time;
 252   my $revision;
 253   my $author;
 254   my $msg_txt;
 255   my $detected_file_separator;
 256
 257   my %tag_date_printed;
 258
 259   # Might be adding to an existing ChangeLog
 260   my $accumulation_date = &maybe_grab_accumulation_date ();
 261   if ($accumulation_date) {
 262     # Insert -d immediately after 'cvs log'
 263     my $Log_Date_Command = "-d\'>${accumulation_date}\'";
 264     $Log_Source_Command =~ s/(^.*log\S*)/$1 $Log_Date_Command/;
 265     &debug ("(adding log msg starting from $accumulation_date)\n");
 266   }
 267
 268   # We might be expanding usernames
 269   my %usermap;
 270
 271   # In general, it's probably not very maintainable to use state
 272   # variables like this to tell the loop what it's doing at any given
 273   # moment, but this is only the first one, and if we never have more
 274   # than a few of these, it's okay.
 275   my $collecting_symbolic_names = 0;
 276   my %symbolic_names;    # Where tag names get stored.
 277   my %branch_names;      # We'll grab branch names while we're at it.
 278   my %branch_numbers;    # Save some revisions for @Follow_Branches
 279   my @branch_roots;      # For showing which files are branch ancestors.
 280
 281   # Bleargh.  Compensate for a deficiency of custom wrapping.
 282   if (($After_Header ne " ") and $FSF_Style)
 283   {
 284     $After_Header .= "\t";
 285   }
 286
 287   if (! $Input_From_Stdin) {
 288     &debug ("(run \"${Log_Source_Command}\")\n");
 289     open (LOG_SOURCE, "$Log_Source_Command |")
 290         or die "unable to run \"${Log_Source_Command}\"";
 291   }
 292   else {
 293     open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
 294   }
 295
 296   binmode LOG_SOURCE;
 297
 298   %usermap = &maybe_read_user_map_file ();
 299
 300   while (<LOG_SOURCE>)
 301   {
 302     # Canonicalize line endings
 303     s/\r$//;
 304     # If on a new file and don't see filename, skip until we find it, and
 305     # when we find it, grab it.
 306     if ((! (defined $file_full_path)) and /^Working file: (.*)/)
 307     {
 308       $file_full_path = $1;
 309       if (@Ignore_Files)
 310       {
 311         my $base;
 312         ($base, undef, undef) = fileparse ($file_full_path);
 313         # Ouch, I wish trailing operators in regexps could be
 314         # evaluated on the fly!
 315         if ($Case_Insensitive) {
 316           if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) {
 317             undef $file_full_path;
 318           }
 319         }
 320         elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) {
 321           undef $file_full_path;
 322         }
 323       }
 324       next;
 325     }
 326
 327     # Just spin wheels if no file defined yet.
 328     next if (! $file_full_path);
 329
 330     # Collect tag names in case we're asked to print them in the output.
 331     if (/^symbolic names:$/) {
 332       $collecting_symbolic_names = 1;
 333       next;  # There's no more info on this line, so skip to next
 334     }
 335     if ($collecting_symbolic_names)
 336     {
 337       # All tag names are listed with whitespace in front in cvs log
 338       # output; so if see non-whitespace, then we're done collecting.
 339       if (/^\S/) {
 340         $collecting_symbolic_names = 0;
 341       }
 342       else    # we're looking at a tag name, so parse & store it
 343       {
 344         # According to the Cederqvist manual, in node "Tags", tag
 345         # names must start with an uppercase or lowercase letter and
 346         # can contain uppercase and lowercase letters, digits, `-',
 347         # and `_'.  However, it's not our place to enforce that, so
 348         # we'll allow anything CVS hands us to be a tag:
 349         /^\s+([^:]+): ([\d.]+)$/;
 350         my $tag_name = $1;
 351         my $tag_rev  = $2;
 352
 353         # A branch number either has an odd number of digit sections
 354         # (and hence an even number of dots), or has ".0." as the
 355         # second-to-last digit section.  Test for these conditions.
 356         my $real_branch_rev = "";
 357         if (($tag_rev =~ /^(\d+\.\d+\.)+\d+$/)   # Even number of dots...
 358             and (! ($tag_rev =~ /^(1\.)+1$/)))   # ...but not "1.[1.]1"
 359         {
 360           $real_branch_rev = $tag_rev;
 361         }
 362         elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/)  # Has ".0."
 363         {
 364           $real_branch_rev = $1 . $3;
 365         }
 366         # If we got a branch, record its number.
 367         if ($real_branch_rev)
 368         {
 369           $branch_names{$real_branch_rev} = $tag_name;
 370           if (@Follow_Branches) {
 371             if (grep ($_ eq $tag_name, @Follow_Branches)) {
 372               $branch_numbers{$tag_name} = $real_branch_rev;
 373             }
 374           }
 375         }
 376         else {
 377           # Else it's just a regular (non-branch) tag.
 378           push (@{$symbolic_names{$tag_rev}}, $tag_name);
 379         }
 380       }
 381     }
 382     # End of code for collecting tag names.
 383
 384     # If have file name, but not revision, and see revision, then grab
 385     # it.  (We collect unconditionally, even though we may or may not
 386     # ever use it.)
 387     if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/))
 388     {
 389       $revision = $1;
 390
 391       if (@Follow_Branches)
 392       {
 393         foreach my $branch (@Follow_Branches)
 394         {
 395           # Special case for following trunk revisions
 396           if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/))
 397           {
 398             goto dengo;
 399           }
 400
 401           my $branch_number = $branch_numbers{$branch};
 402           if ($branch_number)
 403           {
 404             # Are we on one of the follow branches or an ancestor of
 405             # same?
 406             #
 407             # If this revision is a prefix of the branch number, or
 408             # possibly is less in the minormost number, OR if this
 409             # branch number is a prefix of the revision, then yes.
 410             # Otherwise, no.
 411             #
 412             # So below, we determine if any of those conditions are
 413             # met.
 414
 415             # Trivial case: is this revision on the branch?
 416             # (Compare this way to avoid regexps that screw up Emacs
 417             # indentation, argh.)
 418             if ((substr ($revision, 0, ((length ($branch_number)) + 1)))
 419                 eq ($branch_number . "."))
 420             {
 421               goto dengo;
 422             }
 423             # Non-trivial case: check if rev is ancestral to branch
 424             elsif ((length ($branch_number)) > (length ($revision)))
 425             {
 426               $revision =~ /^((?:\d+\.)+)(\d+)$/;
 427               my $r_left = $1;          # still has the trailing "."
 428               my $r_end = $2;
 429
 430               $branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/;
 431               my $b_left = $1;  # still has trailing "."
 432               my $b_mid  = $2;   # has no trailing "."
 433
 434               if (($r_left eq $b_left)
 435                   && ($r_end <= $b_mid))
 436               {
 437                 goto dengo;
 438               }
 439             }
 440           }
 441         }
 442       }
 443       else    # (! @Follow_Branches)
 444       {
 445         next;
 446       }
 447
 448       # Else we are following branches, but this revision isn't on the
 449       # path.  So skip it.
 450       undef $revision;
 451     dengo:
 452       next;
 453     }
 454
 455     # If we don't have a revision right now, we couldn't possibly
 456     # be looking at anything useful.
 457     if (! (defined ($revision))) {
 458       $detected_file_separator = /^$file_separator$/o;
 459       if ($detected_file_separator) {
 460         # No revisions for this file; can happen, e.g. "cvs log -d DATE"
 461         goto CLEAR;
 462       }
 463       else {
 464         next;
 465       }
 466     }
 467
 468     # If have file name but not date and author, and see date or
 469     # author, then grab them:
 470     unless (defined $time)
 471     {
 472       if (/^date: .*/)
 473       {
 474         ($time, $author) = &parse_date_and_author ($_);
 475         if (defined ($usermap{$author}) and $usermap{$author}) {
 476           $author = $usermap{$author};
 477         }
 478       }
 479       else {
 480         $detected_file_separator = /^$file_separator$/o;
 481         if ($detected_file_separator) {
 482           # No revisions for this file; can happen, e.g. "cvs log -d DATE"
 483           goto CLEAR;
 484         }
 485       }
 486       # If the date/time/author hasn't been found yet, we couldn't
 487       # possibly care about anything we see.  So skip:
 488       next;
 489     }
 490
 491     # A "branches: ..." line here indicates that one or more branches
 492     # are rooted at this revision.  If we're showing branches, then we
 493     # want to show that fact as well, so we collect all the branches
 494     # that this is the latest ancestor of and store them in
 495     # @branch_roots.  Just for reference, the format of the line we're
 496     # seeing at this point is:
 497     #
 498     #    branches:  1.5.2;  1.5.4;  ...;
 499     #
 500     # Okay, here goes:
 501
 502     if (/^branches:\s+(.*);$/)
 503     {
 504       if ($Show_Branches)
 505       {
 506         my $lst = $1;
 507         $lst =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
 508         if ($lst) {
 509           @branch_roots = split (/;\s+/, $lst);
 510         }
 511         else {
 512           undef @branch_roots;
 513         }
 514         next;
 515       }
 516       else
 517       {
 518         # Ugh.  This really bothers me.  Suppose we see a log entry
 519         # like this:
 520         #
 521         #    ----------------------------
 522         #    revision 1.1
 523         #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
 524         #    branches:  1.1.2;
 525         #    Intended first line of log message begins here.
 526         #    ----------------------------
 527         #
 528         # The question is, how we can tell the difference between that
 529         # log message and a *two*-line log message whose first line is
 530         #
 531         #    "branches:  1.1.2;"
 532         #
 533         # See the problem?  The output of "cvs log" is inherently
 534         # ambiguous.
 535         #
 536         # For now, we punt: we liberally assume that people don't
 537         # write log messages like that, and just toss a "branches:"
 538         # line if we see it but are not showing branches.  I hope no
 539         # one ever loses real log data because of this.
 540         next;
 541       }
 542     }
 543
 544     # If have file name, time, and author, then we're just grabbing
 545     # log message texts:
 546     $detected_file_separator = /^$file_separator$/o;
 547     if ($detected_file_separator && ! (defined $revision)) {
 548       # No revisions for this file; can happen, e.g. "cvs log -d DATE"
 549       goto CLEAR;
 550     }
 551     unless ($detected_file_separator || /^$logmsg_separator$/o)
 552     {
 553       $msg_txt .= $_;   # Normally, just accumulate the message...
 554       next;
 555     }
 556     # ... until a msg separator is encountered:
 557     # Ensure the message contains something:
 558     if ((! $msg_txt)
 559         || ($msg_txt =~ /^\s*\.\s*$|^\s*$/)
 560         || ($msg_txt =~ /\*\*\* empty log message \*\*\*/))
 561     {
 562       if ($Prune_Empty_Msgs) {
 563         goto CLEAR;
 564       }
 565       # else
 566       $msg_txt = "[no log message]\n";
 567     }
 568
 569     ### Store it all in the Grand Poobah:
 570     {
 571       my $dir_key;        # key into %grand_poobah
 572       my %qunk;           # complicated little jobbie, see below
 573
 574       # Each revision of a file has a little data structure (a `qunk')
 575       # associated with it.  That data structure holds not only the
 576       # file's name, but any additional information about the file
 577       # that might be needed in the output, such as the revision
 578       # number, tags, branches, etc.  The reason to have these things
 579       # arranged in a data structure, instead of just appending them
 580       # textually to the file's name, is that we may want to do a
 581       # little rearranging later as we write the output.  For example,
 582       # all the files on a given tag/branch will go together, followed
 583       # by the tag in parentheses (so trunk or otherwise non-tagged
 584       # files would go at the end of the file list for a given log
 585       # message).  This rearrangement is a lot easier to do if we
 586       # don't have to reparse the text.
 587       #
 588       # A qunk looks like this:
 589       #
 590       #   {
 591       #     filename    =>    "hello.c",
 592       #     revision    =>    "1.4.3.2",
 593       #     time        =>    a timegm() return value (moment of commit)
 594       #     tags        =>    [ "tag1", "tag2", ... ],
 595       #     branch      =>    "branchname" # There should be only one, right?
 596       #     branchroots =>    [ "branchtag1", "branchtag2", ... ]
 597       #   }
 598
 599       if ($Distributed) {
 600         # Just the basename, don't include the path.
 601         ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path);
 602       }
 603       else {
 604         $dir_key = "./";
 605         $qunk{'filename'} = $file_full_path;
 606       }
 607
 608       # This may someday be used in a more sophisticated calculation
 609       # of what other files are involved in this commit.  For now, we
 610       # don't use it much except for delta mode, because the
 611       # common-commit-detection algorithm is hypothesized to be
 612       # "good enough" as it stands.
 613       $qunk{'time'} = $time;
 614
 615       # We might be including revision numbers and/or tags and/or
 616       # branch names in the output.  Most of the code from here to
 617       # loop-end deals with organizing these in qunk.
 618
 619       $qunk{'revision'} = $revision;
 620
 621       # Grab the branch, even though we may or may not need it:
 622       $qunk{'revision'} =~ /((?:\d+\.)+)\d+/;
 623       my $branch_prefix = $1;
 624       $branch_prefix =~ s/\.$//;  # strip off final dot
 625       if ($branch_names{$branch_prefix}) {
 626         $qunk{'branch'} = $branch_names{$branch_prefix};
 627       }
 628
 629       # If there's anything in the @branch_roots array, then this
 630       # revision is the root of at least one branch.  We'll display
 631       # them as branch names instead of revision numbers, the
 632       # substitution for which is done directly in the array:
 633       if (@branch_roots) {
 634         my @roots = map { $branch_names{$_} } @branch_roots;
 635         $qunk{'branchroots'} = \@roots;
 636       }
 637
 638       # Save tags too.
 639       if (defined ($symbolic_names{$revision})) {
 640         $qunk{'tags'} = $symbolic_names{$revision};
 641         delete $symbolic_names{$revision};
 642
 643         # If we're in 'delta' mode, update the latest observed
 644         # times for the beginning and ending tags, and
 645         # when we get around to printing output, we will simply restrict
 646         # ourselves to that timeframe...
 647
 648         if ($Delta_Mode) {
 649           if (($time > $Delta_StartTime) &&
 650               (grep { $_ eq $Delta_From } @{$qunk{'tags'}}))
 651           {
 652             $Delta_StartTime = $time;
 653           }
 654
 655           if (($time > $Delta_EndTime) &&
 656               (grep { $_ eq $Delta_To } @{$qunk{'tags'}}))
 657           {
 658             $Delta_EndTime = $time;
 659           }
 660         }
 661       }
 662
 663       # Add this file to the list
 664       # (We use many spoonfuls of autovivication magic. Hashes and arrays
 665       # will spring into existence if they aren't there already.)
 666
 667       &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n");
 668
 669       # Store with the files in this commit.  Later we'll loop through
 670       # again, making sure that revisions with the same log message
 671       # and nearby commit times are grouped together as one commit.
 672       push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk);
 673     }
 674
 675   CLEAR:
 676     # Make way for the next message
 677     undef $msg_txt;
 678     undef $time;
 679     undef $revision;
 680     undef $author;
 681     undef @branch_roots;
 682
 683     # Maybe even make way for the next file:
 684     if ($detected_file_separator) {
 685       undef $file_full_path;
 686       undef %branch_names;
 687       undef %branch_numbers;
 688       undef %symbolic_names;
 689     }
 690   }
 691
 692   close (LOG_SOURCE);
 693
 694   ### Process each ChangeLog
 695
 696   while (my ($dir,$authorhash) = each %grand_poobah)
 697   {
 698     &debug ("DOING DIR: $dir\n");
 699
 700     # Here we twist our hash around, from being
 701     #   author => time => message => filelist
 702     # in %$authorhash to
 703     #   time => author => message => filelist
 704     # in %changelog.
 705     #
 706     # This is also where we merge entries.  The algorithm proceeds
 707     # through the timeline of the changelog with a sliding window of
 708     # $Max_Checkin_Duration seconds; within that window, entries that
 709     # have the same log message are merged.
 710     #
 711     # (To save space, we zap %$authorhash after we've copied
 712     # everything out of it.)
 713
 714     my %changelog;
 715     while (my ($author,$timehash) = each %$authorhash)
 716     {
 717       my $lasttime;
 718       my %stamptime;
 719       foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash))
 720       {
 721         my $msghash = $timehash->{$time};
 722         while (my ($msg,$qunklist) = each %$msghash)
 723         {
 724           my $stamptime = $stamptime{$msg};
 725           if ((defined $stamptime)
 726               and (($time - $stamptime) < $Max_Checkin_Duration)
 727               and (defined $changelog{$stamptime}{$author}{$msg}))
 728           {
 729             push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist);
 730           }
 731           else {
 732             $changelog{$time}{$author}{$msg} = $qunklist;
 733             $stamptime{$msg} = $time;
 734           }
 735         }
 736       }
 737     }
 738     undef (%$authorhash);
 739
 740     ### Now we can write out the ChangeLog!
 741
 742     my ($logfile_here, $logfile_bak, $tmpfile);
 743
 744     if (! $Output_To_Stdout) {
 745       $logfile_here =  $dir . $Log_File_Name;
 746       $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
 747       $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
 748       $logfile_bak  = "${logfile_here}.bak";
 749
 750       open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
 751     }
 752     else {
 753       open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
 754     }
 755
 756     print LOG_OUT $ChangeLog_Header;
 757
 758     if ($XML_Output) {
 759       my $encoding    =
 760         length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
 761       my $version     = 'version="1.0"';
 762       my $declaration =
 763         sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
 764       my $root        =
 765         '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
 766       print LOG_OUT "$declaration\n\n$root\n\n";
 767     }
 768
 769     foreach my $time (sort {$main::b <=> $main::a} (keys %changelog))
 770     {
 771       next if ($Delta_Mode &&
 772                (($time <= $Delta_StartTime) ||
 773                 ($time > $Delta_EndTime && $Delta_EndTime)));
 774
 775       # Set up the date/author line.
 776       # kff todo: do some more XML munging here, on the header
 777       # part of the entry:
 778       my ($ignore,$min,$hour,$mday,$mon,$year,$wday)
 779           = $UTC_Times ? gmtime($time) : localtime($time);
 780
 781       # XML output includes everything else, we might as well make
 782       # it always include Day Of Week too, for consistency.
 783       if ($Show_Day_Of_Week or $XML_Output) {
 784         $wday = ("Sunday", "Monday", "Tuesday", "Wednesday",
 785                  "Thursday", "Friday", "Saturday")[$wday];
 786         $wday = ($XML_Output) ? "<weekday>${wday}</weekday>\n" : " $wday";
 787       }
 788       else {
 789         $wday = "";
 790       }
 791
 792       my $authorhash = $changelog{$time};
 793       if ($Show_Tag_Dates) {
 794         my %tags;
 795         while (my ($author,$mesghash) = each %$authorhash) {
 796           while (my ($msg,$qunk) = each %$mesghash) {
 797             foreach my $qunkref2 (@$qunk) {
 798               if (defined ($$qunkref2{'tags'})) {
 799                 foreach my $tag (@{$$qunkref2{'tags'}}) {
 800                   $tags{$tag} = 1;
 801                 }
 802               }
 803             }
 804           }
 805         }
 806         foreach my $tag (keys %tags) {
 807           if (!defined $tag_date_printed{$tag}) {
 808             $tag_date_printed{$tag} = $time;
 809             if ($XML_Output) {
 810               # NOT YET DONE
 811             }
 812             else {
 813               printf LOG_OUT ("%4u-%02u-%02u${wday} %02u:%02u  tag %s\n\n",
 814                               $year+1900, $mon+1, $mday, $hour, $min, $tag);
 815             }
 816           }
 817         }
 818       }
 819       while (my ($author,$mesghash) = each %$authorhash)
 820       {
 821         # If XML, escape in outer loop to avoid compound quoting:
 822         if ($XML_Output) {
 823           $author = &xml_escape ($author);
 824         }
 825
 826       FOOBIE:
 827         while (my ($msg,$qunklist) = each %$mesghash)
 828         {
 829           ## MJP: 19.xii.01 : Exclude @ignore_tags
 830           for my $ignore_tag (@ignore_tags) {
 831             next FOOBIE
 832               if grep $_ eq $ignore_tag, map(@{$_->{tags}},
 833                                              grep(defined $_->{tags},
 834                                                   @$qunklist));
 835           }
 836           ## MJP: 19.xii.01 : End exclude @ignore_tags
 837
 838           my $files               = &pretty_file_list ($qunklist);
 839           my $header_line;          # date and author
 840           my $body;                 # see below
 841           my $wholething;           # $header_line + $body
 842
 843           if ($XML_Output) {
 844             $header_line =
 845                 sprintf ("<date>%4u-%02u-%02u</date>\n"
 846                          . "${wday}"
 847                          . "<time>%02u:%02u</time>\n"
 848                          . "<author>%s</author>\n",
 849                          $year+1900, $mon+1, $mday, $hour, $min, $author);
 850           }
 851           else {
 852             $header_line =
 853                 sprintf ("%4u-%02u-%02u${wday} %02u:%02u  %s\n\n",
 854                          $year+1900, $mon+1, $mday, $hour, $min, $author);
 855           }
 856
 857           $Text::Wrap::huge = 'overflow'
 858             if $Text::Wrap::VERSION >= 2001.0130;
 859           # Reshape the body according to user preferences.
 860           if ($XML_Output)
 861           {
 862             $msg = &preprocess_msg_text ($msg);
 863             $body = $files . $msg;
 864           }
 865           elsif ($No_Wrap)
 866           {
 867             $msg = &preprocess_msg_text ($msg);
 868             $files = wrap ("\t", "      ", "$files");
 869             $msg =~ s/\n(.*)/\n\t$1/g;
 870             unless ($After_Header eq " ") {
 871               $msg =~ s/^(.*)/\t$1/g;
 872             }
 873             $body = $files . $After_Header . $msg;
 874           }
 875           else  # do wrapping, either FSF-style or regular
 876           {
 877             if ($FSF_Style)
 878             {
 879               $files = wrap ("\t", "        ", "$files");
 880
 881               my $files_last_line_len = 0;
 882               if ($After_Header eq " ")
 883               {
 884                 $files_last_line_len = &last_line_len ($files);
 885                 $files_last_line_len += 1;  # for $After_Header
 886               }
 887
 888               $msg = &wrap_log_entry
 889                   ($msg, "\t", 69 - $files_last_line_len, 69);
 890               $body = $files . $After_Header . $msg;
 891             }
 892             else  # not FSF-style
 893             {
 894               $msg = &preprocess_msg_text ($msg);
 895               $body = $files . $After_Header . $msg;
 896               $body = wrap ("\t", "        ", "$body");
 897             }
 898           }
 899
 900           $wholething = $header_line . $body;
 901
 902           if ($XML_Output) {
 903             $wholething = "<entry>\n${wholething}</entry>\n";
 904           }
 905
 906           # One last check: make sure it passes the regexp test, if the
 907           # user asked for that.  We have to do it here, so that the
 908           # test can match against information in the header as well
 909           # as in the text of the log message.
 910
 911           # How annoying to duplicate so much code just because I
 912           # can't figure out a way to evaluate scalars on the trailing
 913           # operator portion of a regular expression.  Grrr.
 914           if ($Case_Insensitive) {
 915             unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) {
 916               print LOG_OUT "${wholething}\n";
 917             }
 918           }
 919           else {
 920             unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) {
 921               print LOG_OUT "${wholething}\n";
 922             }
 923           }
 924         }
 925       }
 926     }
 927
 928     if ($XML_Output) {
 929       print LOG_OUT "</changelog>\n";
 930     }
 931
 932     close (LOG_OUT);
 933
 934     if (! $Output_To_Stdout)
 935     {
 936       # If accumulating, append old data to new before renaming.  But
 937       # don't append the most recent entry, since it's already in the
 938       # new log due to CVS's idiosyncratic interpretation of "log -d".
 939       if ($Cumulative && -f $logfile_here)
 940       {
 941         open (NEW_LOG, ">>$tmpfile")
 942             or die "trouble appending to $tmpfile ($!)";
 943
 944         open (OLD_LOG, "<$logfile_here")
 945             or die "trouble reading from $logfile_here ($!)";
 946
 947         my $started_first_entry = 0;
 948         my $passed_first_entry = 0;
 949         while (<OLD_LOG>)
 950         {
 951           if (! $passed_first_entry)
 952           {
 953             if ((! $started_first_entry)
 954                 && /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/) {
 955               $started_first_entry = 1;
 956             }
 957             elsif (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/) {
 958               $passed_first_entry = 1;
 959               print NEW_LOG $_;
 960             }
 961           }
 962           else {
 963             print NEW_LOG $_;
 964           }
 965         }
 966
 967         close (NEW_LOG);
 968         close (OLD_LOG);
 969       }
 970
 971       if (-f $logfile_here) {
 972         rename ($logfile_here, $logfile_bak);
 973       }
 974       rename ($tmpfile, $logfile_here);
 975     }
 976   }
 977 }
 978
 979 sub parse_date_and_author ()
 980 {
 981   # Parses the date/time and author out of a line like:
 982   #
 983   # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
 984
 985   my $line = shift;
 986
 987   my ($year, $mon, $mday, $hours, $min, $secs, $author) = $line =~
 988       m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);#
 989           or  die "Couldn't parse date ``$line''";
 990   die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258);
 991   # Kinda arbitrary, but useful as a sanity check
 992   my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900);
 993
 994   return ($time, $author);
 995 }
 996
 997 # Here we take a bunch of qunks and convert them into printed
 998 # summary that will include all the information the user asked for.
 999 sub pretty_file_list ()
1000 {
1001   if ($Hide_Filenames and (! $XML_Output)) {
1002     return "";
1003   }
1004
1005   my $qunksref = shift;
1006   my @qunkrefs = @$qunksref;
1007   my @filenames;
1008   my $beauty = "";          # The accumulating header string for this entry.
1009   my %non_unanimous_tags;   # Tags found in a proper subset of qunks
1010   my %unanimous_tags;       # Tags found in all qunks
1011   my %all_branches;         # Branches found in any qunk
1012   my $common_dir = undef;   # Dir prefix common to all files ("" if none)
1013   my $fbegun = 0;           # Did we begin printing filenames yet?
1014
1015   # First, loop over the qunks gathering all the tag/branch names.
1016   # We'll put them all in non_unanimous_tags, and take out the
1017   # unanimous ones later.
1018  QUNKREF:
1019   foreach my $qunkref (@qunkrefs)
1020   {
1021     ## MJP: 19.xii.01 : Exclude @ignore_tags
1022     for my $ignore_tag (@ignore_tags) {
1023       next QUNKREF
1024         if grep $_ eq $ignore_tag, @{$$qunkref{'tags'}};
1025     }
1026     ## MJP: 19.xii.01 : End exclude @ignore_tags
1027
1028     # Keep track of whether all the files in this commit were in the
1029     # same directory, and memorize it if so.  We can make the output a
1030     # little more compact by mentioning the directory only once.
1031     if ((scalar (@qunkrefs)) > 1)
1032     {
1033       if (! (defined ($common_dir)))
1034       {
1035         my ($base, $dir);
1036         ($base, $dir, undef) = fileparse ($$qunkref{'filename'});
1037
1038         if ((! (defined ($dir)))  # this first case is sheer paranoia
1039             or ($dir eq "")
1040             or ($dir eq "./")
1041             or ($dir eq ".\\"))
1042         {
1043           $common_dir = "";
1044         }
1045         else
1046         {
1047           $common_dir = $dir;
1048         }
1049       }
1050       elsif ($common_dir ne "")
1051       {
1052         # Already have a common dir prefix, so how much of it can we preserve?
1053         $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir);
1054       }
1055     }
1056     else  # only one file in this entry anyway, so common dir not an issue
1057     {
1058       $common_dir = "";
1059     }
1060
1061     if (defined ($$qunkref{'branch'})) {
1062       $all_branches{$$qunkref{'branch'}} = 1;
1063     }
1064     if (defined ($$qunkref{'tags'})) {
1065       foreach my $tag (@{$$qunkref{'tags'}}) {
1066         $non_unanimous_tags{$tag} = 1;
1067       }
1068     }
1069   }
1070
1071   # Any tag held by all qunks will be printed specially... but only if
1072   # there are multiple qunks in the first place!
1073   if ((scalar (@qunkrefs)) > 1) {
1074     foreach my $tag (keys (%non_unanimous_tags)) {
1075       my $everyone_has_this_tag = 1;
1076       foreach my $qunkref (@qunkrefs) {
1077         if ((! (defined ($$qunkref{'tags'})))
1078             or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) {
1079           $everyone_has_this_tag = 0;
1080         }
1081       }
1082       if ($everyone_has_this_tag) {
1083         $unanimous_tags{$tag} = 1;
1084         delete $non_unanimous_tags{$tag};
1085       }
1086     }
1087   }
1088
1089   if ($XML_Output)
1090   {
1091     # If outputting XML, then our task is pretty simple, because we
1092     # don't have to detect common dir, common tags, branch prefixing,
1093     # etc.  We just output exactly what we have, and don't worry about
1094     # redundancy or readability.
1095
1096     foreach my $qunkref (@qunkrefs)
1097     {
1098       my $filename    = $$qunkref{'filename'};
1099       my $revision    = $$qunkref{'revision'};
1100       my $tags        = $$qunkref{'tags'};
1101       my $branch      = $$qunkref{'branch'};
1102       my $branchroots = $$qunkref{'branchroots'};
1103
1104       $filename = &xml_escape ($filename);   # probably paranoia
1105       $revision = &xml_escape ($revision);   # definitely paranoia
1106
1107       $beauty .= "<file>\n";
1108       $beauty .= "<name>${filename}</name>\n";
1109       $beauty .= "<revision>${revision}</revision>\n";
1110       if ($branch) {
1111         $branch   = &xml_escape ($branch);     # more paranoia
1112         $beauty .= "<branch>${branch}</branch>\n";
1113       }
1114       foreach my $tag (@$tags) {
1115         $tag = &xml_escape ($tag);  # by now you're used to the paranoia
1116         $beauty .= "<tag>${tag}</tag>\n";
1117       }
1118       foreach my $root (@$branchroots) {
1119         $root = &xml_escape ($root);  # which is good, because it will continue
1120         $beauty .= "<branchroot>${root}</branchroot>\n";
1121       }
1122       $beauty .= "</file>\n";
1123     }
1124
1125     # Theoretically, we could go home now.  But as long as we're here,
1126     # let's print out the common_dir and utags, as a convenience to
1127     # the receiver (after all, earlier code calculated that stuff
1128     # anyway, so we might as well take advantage of it).
1129
1130     if ((scalar (keys (%unanimous_tags))) > 1) {
1131       foreach my $utag ((keys (%unanimous_tags))) {
1132         $utag = &xml_escape ($utag);   # the usual paranoia
1133         $beauty .= "<utag>${utag}</utag>\n";
1134       }
1135     }
1136     if ($common_dir) {
1137       $common_dir = &xml_escape ($common_dir);
1138       $beauty .= "<commondir>${common_dir}</commondir>\n";
1139     }
1140
1141     # That's enough for XML, time to go home:
1142     return $beauty;
1143   }
1144
1145   # Else not XML output, so complexly compactify for chordate
1146   # consumption.  At this point we have enough global information
1147   # about all the qunks to organize them non-redundantly for output.
1148
1149   if ($common_dir) {
1150     # Note that $common_dir still has its trailing slash
1151     $beauty .= "$common_dir: ";
1152   }
1153
1154   if ($Show_Branches)
1155   {
1156     # For trailing revision numbers.
1157     my @brevisions;
1158
1159     foreach my $branch (keys (%all_branches))
1160     {
1161       foreach my $qunkref (@qunkrefs)
1162       {
1163         if ((defined ($$qunkref{'branch'}))
1164             and ($$qunkref{'branch'} eq $branch))
1165         {
1166           if ($fbegun) {
1167             # kff todo: comma-delimited in XML too?  Sure.
1168             $beauty .= ", ";
1169           }
1170           else {
1171             $fbegun = 1;
1172           }
1173           my $fname = substr ($$qunkref{'filename'}, length ($common_dir));
1174           $beauty .= $fname;
1175           $$qunkref{'printed'} = 1;  # Just setting a mark bit, basically
1176
1177           if ($Show_Tags && (defined @{$$qunkref{'tags'}})) {
1178             my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
1179
1180             if (@tags) {
1181               $beauty .= " (tags: ";
1182               $beauty .= join (', ', @tags);
1183               $beauty .= ")";
1184             }
1185           }
1186
1187           if ($Show_Revisions) {
1188             # Collect the revision numbers' last components, but don't
1189             # print them -- they'll get printed with the branch name
1190             # later.
1191             $$qunkref{'revision'} =~ /.+\.([\d]+)$/;
1192             push (@brevisions, $1);
1193
1194             # todo: we're still collecting branch roots, but we're not
1195             # showing them anywhere.  If we do show them, it would be
1196             # nifty to just call them revision "0" on a the branch.
1197             # Yeah, that's the ticket.
1198           }
1199         }
1200       }
1201       $beauty .= " ($branch";
1202       if (@brevisions) {
1203         if ((scalar (@brevisions)) > 1) {
1204           $beauty .= ".[";
1205           $beauty .= (join (',', @brevisions));
1206           $beauty .= "]";
1207         }
1208         else {
1209           # Square brackets are spurious here, since there's no range to
1210           # encapsulate
1211           $beauty .= ".$brevisions[0]";
1212         }
1213       }
1214       $beauty .= ")";
1215     }
1216   }
1217
1218   # Okay; any qunks that were done according to branch are taken care
1219   # of, and marked as printed.  Now print everyone else.
1220
1221   foreach my $qunkref (@qunkrefs)
1222   {
1223     next if (defined ($$qunkref{'printed'}));   # skip if already printed
1224
1225     if ($fbegun) {
1226       $beauty .= ", ";
1227     }
1228     else {
1229       $fbegun = 1;
1230     }
1231     $beauty .= substr ($$qunkref{'filename'}, length ($common_dir));
1232     # todo: Shlomo's change was this:
1233     # $beauty .= substr ($$qunkref{'filename'},
1234     #              (($common_dir eq "./") ? "" : length ($common_dir)));
1235     $$qunkref{'printed'} = 1;  # Set a mark bit.
1236
1237     if ($Show_Revisions || $Show_Tags)
1238     {
1239       my $started_addendum = 0;
1240
1241       if ($Show_Revisions) {
1242         $started_addendum = 1;
1243         $beauty .= " (";
1244         $beauty .= "$$qunkref{'revision'}";
1245       }
1246       if ($Show_Tags && (defined $$qunkref{'tags'})) {
1247         my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}});
1248         if ((scalar (@tags)) > 0) {
1249           if ($started_addendum) {
1250             $beauty .= ", ";
1251           }
1252           else {
1253             $beauty .= " (tags: ";
1254           }
1255           $beauty .= join (', ', @tags);
1256           $started_addendum = 1;
1257         }
1258       }
1259       if ($started_addendum) {
1260         $beauty .= ")";
1261       }
1262     }
1263   }
1264
1265   # Unanimous tags always come last.
1266   if ($Show_Tags && %unanimous_tags)
1267   {
1268     $beauty .= " (utags: ";
1269     $beauty .= join (', ', sort keys (%unanimous_tags));
1270     $beauty .= ")";
1271   }
1272
1273   # todo: still have to take care of branch_roots?
1274
1275   $beauty = "* $beauty:";
1276
1277   return $beauty;
1278 }
1279
1280 sub common_path_prefix ()
1281 {
1282   my $path1 = shift;
1283   my $path2 = shift;
1284
1285   my ($dir1, $dir2);
1286   (undef, $dir1, undef) = fileparse ($path1);
1287   (undef, $dir2, undef) = fileparse ($path2);
1288
1289   # Transmogrify Windows filenames to look like Unix.
1290   # (It is far more likely that someone is running cvs2cl.pl under
1291   # Windows than that they would genuinely have backslashes in their
1292   # filenames.)
1293   $dir1 =~ tr#\\#/#;
1294   $dir2 =~ tr#\\#/#;
1295
1296   my $accum1 = "";
1297   my $accum2 = "";
1298   my $last_common_prefix = "";
1299
1300   while ($accum1 eq $accum2)
1301   {
1302     $last_common_prefix = $accum1;
1303     last if ($accum1 eq $dir1);
1304     my ($tmp1) = split (/\//, (substr ($dir1, length ($accum1))));
1305     my ($tmp2) = split (/\//, (substr ($dir2, length ($accum2))));
1306     $accum1 .= "$tmp1/" if (defined $tmp1 and $tmp1 ne '');
1307     $accum2 .= "$tmp2/" if (defined $tmp2 and $tmp2 ne '');
1308   }
1309
1310   return $last_common_prefix;
1311 }
1312
1313 sub preprocess_msg_text ()
1314 {
1315   my $text = shift;
1316
1317   # Strip out carriage returns (as they probably result from DOSsy editors).
1318   $text =~ s/\r\n/\n/g;
1319
1320   # If it *looks* like two newlines, make it *be* two newlines:
1321   $text =~ s/\n\s*\n/\n\n/g;
1322
1323   if ($XML_Output)
1324   {
1325     $text = &xml_escape ($text);
1326     $text = "<msg>${text}</msg>\n";
1327   }
1328   elsif (! $No_Wrap)
1329   {
1330     # Strip off lone newlines, but only for lines that don't begin with
1331     # whitespace or a mail-quoting character, since we want to preserve
1332     # that kind of formatting.  Also don't strip newlines that follow a
1333     # period; we handle those specially next.  And don't strip
1334     # newlines that precede an open paren.
1335     1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g);
1336
1337     # If a newline follows a period, make sure that when we bring up the
1338     # bottom sentence, it begins with two spaces.
1339     1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g);
1340   }
1341
1342   return $text;
1343 }
1344
1345 sub last_line_len ()
1346 {
1347   my $files_list = shift;
1348   my @lines = split (/\n/, $files_list);
1349   my $last_line = pop (@lines);
1350   return length ($last_line);
1351 }
1352
1353 # A custom wrap function, sensitive to some common constructs used in
1354 # log entries.
1355 sub wrap_log_entry ()
1356 {
1357   my $text = shift;                  # The text to wrap.
1358   my $left_pad_str = shift;          # String to pad with on the left.
1359
1360   # These do NOT take left_pad_str into account:
1361   my $length_remaining = shift;      # Amount left on current line.
1362   my $max_line_length  = shift;      # Amount left for a blank line.
1363
1364   my $wrapped_text = "";             # The accumulating wrapped entry.
1365   my $user_indent = "";              # Inherited user_indent from prev line.
1366
1367   my $first_time = 1;                # First iteration of the loop?
1368   my $suppress_line_start_match = 0; # Set to disable line start checks.
1369
1370   my @lines = split (/\n/, $text);
1371   while (@lines)   # Don't use `foreach' here, it won't work.
1372   {
1373     my $this_line = shift (@lines);
1374     chomp $this_line;
1375
1376     if ($this_line =~ /^(\s+)/) {
1377       $user_indent = $1;
1378     }
1379     else {
1380       $user_indent = "";
1381     }
1382
1383     # If it matches any of the line-start regexps, print a newline now...
1384     if ($suppress_line_start_match)
1385     {
1386       $suppress_line_start_match = 0;
1387     }
1388     elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1389            || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1390            || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1391            || ($this_line =~ /^(\s+)(\S+)/)
1392            || ($this_line =~ /^(\s*)- +/)
1393            || ($this_line =~ /^()\s*$/)
1394            || ($this_line =~ /^(\s*)\*\) +/)
1395            || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1396     {
1397       # Make a line break immediately, unless header separator is set
1398       # and this line is the first line in the entry, in which case
1399       # we're getting the blank line for free already and shouldn't
1400       # add an extra one.
1401       unless (($After_Header ne " ") and ($first_time))
1402       {
1403         if ($this_line =~ /^()\s*$/) {
1404           $suppress_line_start_match = 1;
1405           $wrapped_text .= "\n${left_pad_str}";
1406         }
1407
1408         $wrapped_text .= "\n${left_pad_str}";
1409       }
1410
1411       $length_remaining = $max_line_length - (length ($user_indent));
1412     }
1413
1414     # Now that any user_indent has been preserved, strip off leading
1415     # whitespace, so up-folding has no ugly side-effects.
1416     $this_line =~ s/^\s*//;
1417
1418     # Accumulate the line, and adjust parameters for next line.
1419     my $this_len = length ($this_line);
1420     if ($this_len == 0)
1421     {
1422       # Blank lines should cancel any user_indent level.
1423       $user_indent = "";
1424       $length_remaining = $max_line_length;
1425     }
1426     elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1427     {
1428       # Walk backwards from the end.  At first acceptable spot, break
1429       # a new line.
1430       my $idx = $length_remaining - 1;
1431       if ($idx < 0) { $idx = 0 };
1432       while ($idx > 0)
1433       {
1434         if (substr ($this_line, $idx, 1) =~ /\s/)
1435         {
1436           my $line_now = substr ($this_line, 0, $idx);
1437           my $next_line = substr ($this_line, $idx);
1438           $this_line = $line_now;
1439
1440           # Clean whitespace off the end.
1441           chomp $this_line;
1442
1443           # The current line is ready to be printed.
1444           $this_line .= "\n${left_pad_str}";
1445
1446           # Make sure the next line is allowed full room.
1447           $length_remaining = $max_line_length - (length ($user_indent));
1448
1449           # Strip next_line, but then preserve any user_indent.
1450           $next_line =~ s/^\s*//;
1451
1452           # Sneak a peek at the user_indent of the upcoming line, so
1453           # $next_line (which will now precede it) can inherit that
1454           # indent level.  Otherwise, use whatever user_indent level
1455           # we currently have, which might be none.
1456           my $next_next_line = shift (@lines);
1457           if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1458             $next_line = $1 . $next_line if (defined ($1));
1459             # $length_remaining = $max_line_length - (length ($1));
1460             $next_next_line =~ s/^\s*//;
1461           }
1462           else {
1463             $next_line = $user_indent . $next_line;
1464           }
1465           if (defined ($next_next_line)) {
1466             unshift (@lines, $next_next_line);
1467           }
1468           unshift (@lines, $next_line);
1469
1470           # Our new next line might, coincidentally, begin with one of
1471           # the line-start regexps, so we temporarily turn off
1472           # sensitivity to that until we're past the line.
1473           $suppress_line_start_match = 1;
1474
1475           last;
1476         }
1477         else
1478         {
1479           $idx--;
1480         }
1481       }
1482
1483       if ($idx == 0)
1484       {
1485         # We bottomed out because the line is longer than the
1486         # available space.  But that could be because the space is
1487         # small, or because the line is longer than even the maximum
1488         # possible space.  Handle both cases below.
1489
1490         if ($length_remaining == ($max_line_length - (length ($user_indent))))
1491         {
1492           # The line is simply too long -- there is no hope of ever
1493           # breaking it nicely, so just insert it verbatim, with
1494           # appropriate padding.
1495           $this_line = "\n${left_pad_str}${this_line}";
1496         }
1497         else
1498         {
1499           # Can't break it here, but may be able to on the next round...
1500           unshift (@lines, $this_line);
1501           $length_remaining = $max_line_length - (length ($user_indent));
1502           $this_line = "\n${left_pad_str}";
1503         }
1504       }
1505     }
1506     else  # $this_len < $length_remaining, so tack on what we can.
1507     {
1508       # Leave a note for the next iteration.
1509       $length_remaining = $length_remaining - $this_len;
1510
1511       if ($this_line =~ /\.$/)
1512       {
1513         $this_line .= "  ";
1514         $length_remaining -= 2;
1515       }
1516       else  # not a sentence end
1517       {
1518         $this_line .= " ";
1519         $length_remaining -= 1;
1520       }
1521     }
1522
1523     # Unconditionally indicate that loop has run at least once.
1524     $first_time = 0;
1525
1526     $wrapped_text .= "${user_indent}${this_line}";
1527   }
1528
1529   # One last bit of padding.
1530   $wrapped_text .= "\n";
1531
1532   return $wrapped_text;
1533 }
1534
1535 sub xml_escape ()
1536 {
1537   my $txt = shift;
1538   $txt =~ s/&/&amp;/g;
1539   $txt =~ s/</&lt;/g;
1540   $txt =~ s/>/&gt;/g;
1541   return $txt;
1542 }
1543
1544 sub maybe_read_user_map_file ()
1545 {
1546   my %expansions;
1547
1548   if ($User_Map_File)
1549   {
1550     open (MAPFILE, "<$User_Map_File")
1551         or die ("Unable to open $User_Map_File ($!)");
1552
1553     while (<MAPFILE>)
1554     {
1555       next if /^\s*#/;  # Skip comment lines.
1556       next if not /:/;  # Skip lines without colons.
1557
1558       # It is now safe to split on ':'.
1559       my ($username, $expansion) = split ':';
1560       chomp $expansion;
1561       $expansion =~ s/^'(.*)'$/$1/;
1562       $expansion =~ s/^"(.*)"$/$1/;
1563
1564       # If it looks like the expansion has a real name already, then
1565       # we toss the username we got from CVS log.  Otherwise, keep
1566       # it to use in combination with the email address.
1567
1568       if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
1569         # Also, add angle brackets if none present
1570         if (! ($expansion =~ /<\S+@\S+>/)) {
1571           $expansions{$username} = "$username <$expansion>";
1572         }
1573         else {
1574           $expansions{$username} = "$username $expansion";
1575         }
1576       }
1577       else {
1578         $expansions{$username} = $expansion;
1579       }
1580     }
1581
1582     close (MAPFILE);
1583   }
1584
1585   return %expansions;
1586 }
1587
1588 sub parse_options ()
1589 {
1590   # Check this internally before setting the global variable.
1591   my $output_file;
1592
1593   # If this gets set, we encountered unknown options and will exit at
1594   # the end of this subroutine.
1595   my $exit_with_admonishment = 0;
1596
1597   while (my $arg = shift (@ARGV))
1598   {
1599     if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) {
1600       $Print_Usage = 1;
1601     }
1602     elsif ($arg =~ /^--delta$/) {
1603       my $narg = shift(@ARGV) || die "$arg needs argument.\n";
1604       if ($narg =~ /^([A-Za-z][A-Za-z0-9_\-]*):([A-Za-z][A-Za-z0-9_\-]*)$/) {
1605         $Delta_From = $1;
1606         $Delta_To = $2;
1607         $Delta_Mode = 1;
1608       } else {
1609         die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
1610       }
1611     }
1612     elsif ($arg =~ /^--debug$/) {        # unadvertised option, heh
1613       $Debug = 1;
1614     }
1615     elsif ($arg =~ /^--version$/) {
1616       $Print_Version = 1;
1617     }
1618     elsif ($arg =~ /^-g$|^--global-opts$/) {
1619       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1620       # Don't assume CVS is called "cvs" on the user's system:
1621       $Log_Source_Command =~ s/(^\S*)/$1 $narg/;
1622     }
1623     elsif ($arg =~ /^-l$|^--log-opts$/) {
1624       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1625       $Log_Source_Command .= " $narg";
1626     }
1627     elsif ($arg =~ /^-f$|^--file$/) {
1628       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1629       $output_file = $narg;
1630     }
1631     elsif ($arg =~ /^--accum$/) {
1632       $Cumulative = 1;
1633     }
1634     elsif ($arg =~ /^--fsf$/) {
1635       $FSF_Style = 1;
1636     }
1637     elsif ($arg =~ /^-U$|^--usermap$/) {
1638       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1639       $User_Map_File = $narg;
1640     }
1641     elsif ($arg =~ /^-W$|^--window$/) {
1642       defined(my $narg = shift (@ARGV)) || die "$arg needs argument.\n";
1643       $Max_Checkin_Duration = $narg;
1644     }
1645     elsif ($arg =~ /^-I$|^--ignore$/) {
1646       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1647       push (@Ignore_Files, $narg);
1648     }
1649     elsif ($arg =~ /^-C$|^--case-insensitive$/) {
1650       $Case_Insensitive = 1;
1651     }
1652     elsif ($arg =~ /^-R$|^--regexp$/) {
1653       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1654       $Regexp_Gate = $narg;
1655     }
1656     elsif ($arg =~ /^--stdout$/) {
1657       $Output_To_Stdout = 1;
1658     }
1659     elsif ($arg =~ /^--version$/) {
1660       $Print_Version = 1;
1661     }
1662     elsif ($arg =~ /^-d$|^--distributed$/) {
1663       $Distributed = 1;
1664     }
1665     elsif ($arg =~ /^-P$|^--prune$/) {
1666       $Prune_Empty_Msgs = 1;
1667     }
1668     elsif ($arg =~ /^-S$|^--separate-header$/) {
1669       $After_Header = "\n\n";
1670     }
1671     elsif ($arg =~ /^--no-wrap$/) {
1672       $No_Wrap = 1;
1673     }
1674     elsif ($arg =~ /^--gmt$|^--utc$/) {
1675       $UTC_Times = 1;
1676     }
1677     elsif ($arg =~ /^-w$|^--day-of-week$/) {
1678       $Show_Day_Of_Week = 1;
1679     }
1680     elsif ($arg =~ /^-r$|^--revisions$/) {
1681       $Show_Revisions = 1;
1682     }
1683     elsif ($arg =~ /^-t$|^--tags$/) {
1684       $Show_Tags = 1;
1685     }
1686     elsif ($arg =~ /^-T$|^--tagdates$/) {
1687       $Show_Tag_Dates = 1;
1688     }
1689     elsif ($arg =~ /^-b$|^--branches$/) {
1690       $Show_Branches = 1;
1691     }
1692     elsif ($arg =~ /^-F$|^--follow$/) {
1693       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1694       push (@Follow_Branches, $narg);
1695     }
1696     elsif ($arg =~ /^--stdin$/) {
1697       $Input_From_Stdin = 1;
1698     }
1699     elsif ($arg =~ /^--header$/) {
1700       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1701       $ChangeLog_Header = &slurp_file ($narg);
1702       if (! defined ($ChangeLog_Header)) {
1703         $ChangeLog_Header = "";
1704       }
1705     }
1706     elsif ($arg =~ /^--xml-encoding$/) {
1707       my $narg = shift (@ARGV) || die "$arg needs argument.\n";
1708       $XML_Encoding = $narg ;
1709     }
1710     elsif ($arg =~ /^--xml$/) {
1711       $XML_Output = 1;
1712     }
1713     elsif ($arg =~ /^--hide-filenames$/) {
1714       $Hide_Filenames = 1;
1715       $After_Header = "";
1716     }
1717     elsif ($arg =~ /^--ignore-tag$/ ) {
1718       die "$arg needs argument.\n"
1719         unless @ARGV;
1720       push @ignore_tags, shift @ARGV;
1721     }
1722     else {
1723       # Just add a filename as argument to the log command
1724       $Log_Source_Command .= " '$arg'";
1725     }
1726   }
1727
1728   ## Check for contradictions...
1729
1730   if ($Output_To_Stdout && $Distributed) {
1731     print STDERR "cannot pass both --stdout and --distributed\n";
1732     $exit_with_admonishment = 1;
1733   }
1734
1735   if ($Output_To_Stdout && $output_file) {
1736     print STDERR "cannot pass both --stdout and --file\n";
1737     $exit_with_admonishment = 1;
1738   }
1739
1740   if ($XML_Output && $Cumulative) {
1741     print STDERR "cannot pass both --xml and --accum\n";
1742     $exit_with_admonishment = 1;
1743   }
1744
1745   # Or if any other error message has already been printed out, we
1746   # just leave now:
1747   if ($exit_with_admonishment) {
1748     &usage ();
1749     exit (1);
1750   }
1751   elsif ($Print_Usage) {
1752     &usage ();
1753     exit (0);
1754   }
1755   elsif ($Print_Version) {
1756     &version ();
1757     exit (0);
1758   }
1759
1760   ## Else no problems, so proceed.
1761
1762   if ($output_file) {
1763     $Log_File_Name = $output_file;
1764   }
1765 }
1766
1767 sub slurp_file ()
1768 {
1769   my $filename = shift || die ("no filename passed to slurp_file()");
1770   my $retstr;
1771
1772   open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
1773   my $saved_sep = $/;
1774   undef $/;
1775   $retstr = <SLURPEE>;
1776   $/ = $saved_sep;
1777   close (SLURPEE);
1778   return $retstr;
1779 }
1780
1781 sub debug ()
1782 {
1783   if ($Debug) {
1784     my $msg = shift;
1785     print STDERR $msg;
1786   }
1787 }
1788
1789 sub version ()
1790 {
1791   print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
1792 }
1793
1794 sub usage ()
1795 {
1796   &version ();
1797   print <<'END_OF_INFO';
1798 Generate GNU-style ChangeLogs in CVS working copies.
1799
1800 Notes about the output format(s):
1801
1802    The default output of cvs2cl.pl is designed to be compact, formally
1803    unambiguous, but still easy for humans to read.  It is largely
1804    self-explanatory, I hope; the one abbreviation that might not be
1805    obvious is "utags".  That stands for "universal tags" -- a
1806    universal tag is one held by all the files in a given change entry.
1807
1808    If you need output that's easy for a program to parse, use the
1809    --xml option.  Note that with XML output, just about all available
1810    information is included with each change entry, whether you asked
1811    for it or not, on the theory that your parser can ignore anything
1812    it's not looking for.
1813
1814 Notes about the options and arguments (the actual options are listed
1815 last in this usage message):
1816
1817   * The -I and -F options may appear multiple times.
1818
1819   * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works).
1820     This is okay because no would ever, ever be crazy enough to name a
1821     branch "trunk", right?  Right.
1822
1823   * For the -U option, the UFILE should be formatted like
1824     CVSROOT/users. That is, each line of UFILE looks like this
1825        jrandom:jrandom@red-bean.com
1826     or maybe even like this
1827        jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
1828     Don't forget to quote the portion after the colon if necessary.
1829
1830   * Many people want to filter by date.  To do so, invoke cvs2cl.pl
1831     like this:
1832        cvs2cl.pl -l "-d'DATESPEC'"
1833     where DATESPEC is any date specification valid for "cvs log -d".
1834     (Note that CVS 1.10.7 and below requires there be no space between
1835     -d and its argument).
1836
1837 Options/Arguments:
1838
1839   -h, -help, --help, or -?     Show this usage and exit
1840   --version                    Show version and exit
1841   -r, --revisions              Show revision numbers in output
1842   -b, --branches               Show branch names in revisions when possible
1843   -t, --tags                   Show tags (symbolic names) in output
1844   -T, --tagdates               Show tags in output on their first occurance
1845   --stdin                      Read from stdin, don't run cvs log
1846   --stdout                     Output to stdout not to ChangeLog
1847   -d, --distributed            Put ChangeLogs in subdirs
1848   -f FILE, --file FILE         Write to FILE instead of "ChangeLog"
1849   --fsf                        Use this if log data is in FSF ChangeLog style
1850   -W SECS, --window SECS       Window of time within which log entries unify
1851   -U UFILE, --usermap UFILE    Expand usernames to email addresses from UFILE
1852   -R REGEXP, --regexp REGEXP   Include only entries that match REGEXP
1853   -I REGEXP, --ignore REGEXP   Ignore files whose names match REGEXP
1854   -C, --case-insensitive       Any regexp matching is done case-insensitively
1855   -F BRANCH, --follow BRANCH   Show only revisions on or ancestral to BRANCH
1856   -S, --separate-header        Blank line between each header and log message
1857   --no-wrap                    Don't auto-wrap log message (recommend -S also)
1858   --gmt, --utc                 Show times in GMT/UTC instead of local time
1859   --accum                      Add to an existing ChangeLog (incompat w/ --xml)
1860   -w, --day-of-week            Show day of week
1861   --header FILE                Get ChangeLog header from FILE ("-" means stdin)
1862   --xml                        Output XML instead of ChangeLog format
1863   --xml-encoding ENCODING      Insert encoding clause in XML header
1864   --hide-filenames             Don't show filenames (ignored for XML output)
1865   -P, --prune                  Don't show empty log messages
1866   -g OPTS, --global-opts OPTS  Invoke like this "cvs OPTS log ..."
1867   -l OPTS, --log-opts OPTS     Invoke like this "cvs ... log OPTS"
1868   FILE1 [FILE2 ...]            Show only log information for the named FILE(s)
1869
1870 See http://www.red-bean.com/cvs2cl for maintenance and bug info.
1871 END_OF_INFO
1872 }
1873
1874 __END__
1875
1876 =head1 NAME
1877
1878 cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by
1879     running "cvs log" and parsing the output.  Shared log entries are
1880     unified in an intuitive way.
1881
1882 =head1 DESCRIPTION
1883
1884 This script generates GNU-style ChangeLog files from CVS log
1885 information.  Basic usage: just run it inside a working copy and a
1886 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1887 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1888
1889 See http://www.red-bean.com/cvs2cl for updates, and for instructions
1890 on getting anonymous CVS access to this script.
1891
1892 Maintainer: Karl Fogel <kfogel@red-bean.com>
1893 Please report bugs to <bug-cvs2cl@red-bean.com>.
1894
1895 =head1 README
1896
1897 This script generates GNU-style ChangeLog files from CVS log
1898 information.  Basic usage: just run it inside a working copy and a
1899 ChangeLog will appear.  It requires repository access (i.e., 'cvs log'
1900 must work).  Run "cvs2cl.pl --help" to see more advanced options.
1901
1902 See http://www.red-bean.com/cvs2cl for updates, and for instructions
1903 on getting anonymous CVS access to this script.
1904
1905 Maintainer: Karl Fogel <kfogel@red-bean.com>
1906 Please report bugs to <bug-cvs2cl@red-bean.com>.
1907
1908 =head1 PREREQUISITES
1909
1910 This script requires C<Text::Wrap>, C<Time::Local>, and
1911 C<File::Basename>.
1912 It also seems to require C<Perl 5.004_04> or higher.
1913
1914 =pod OSNAMES
1915
1916 any
1917
1918 =pod SCRIPT CATEGORIES
1919
1920 Version_Control/CVS
1921
1922 =cut
1923
1924 -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
1925
1926 Note about a bug-slash-opportunity:
1927 -----------------------------------
1928
1929 There's a bug in Text::Wrap, which affects cvs2cl.  This script
1930 reveals it:
1931
1932   #!/usr/bin/perl -w
1933
1934   use Text::Wrap;
1935
1936   my $test_text =
1937   "This script demonstrates a bug in Text::Wrap.  The very long line
1938   following this paragraph will be relocated relative to the surrounding
1939   text:
1940
1941   ====================================================================
1942
1943   See?  When the bug happens, we'll get the line of equal signs below
1944   this paragraph, even though it should be above.";
1945
1946   # Print out the test text with no wrapping:
1947   print "$test_text";
1948   print "\n";
1949   print "\n";
1950
1951   # Now print it out wrapped, and see the bug:
1952   print wrap ("\t", "        ", "$test_text");
1953   print "\n";
1954   print "\n";
1955
1956 If the line of equal signs were one shorter, then the bug doesn't
1957 happen.  Interesting.
1958
1959 Anyway, rather than fix this in Text::Wrap, we might as well write a
1960 new wrap() which has the following much-needed features:
1961
1962 * initial indentation, like current Text::Wrap()
1963 * subsequent line indentation, like current Text::Wrap()
1964 * user chooses among: force-break long words, leave them alone, or die()?
1965 * preserve existing indentation: chopped chunks from an indented line
1966   are indented by same (like this line, not counting the asterisk!)
1967 * optional list of things to preserve on line starts, default ">"
1968
1969 Note that the last two are essentially the same concept, so unify in
1970 implementation and give a good interface to controlling them.
1971
1972 And how about:
1973
1974 Optionally, when encounter a line pre-indented by same as previous
1975 line, then strip the newline and refill, but indent by the same.
1976 Yeah...
1977