Fixed binary search: no more infinite loops when vendor is unknown.
[tangerine.git] / scripts / nightly / cvs2cl.pl
blob6c005c6f896071872b6bf04971a2d3528faa79a4
1 #!/bin/sh
2 exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
3 #!perl -w
6 ##############################################################
7 ### ###
8 ### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
9 ### ###
10 ##############################################################
12 ## $Revision$
13 ## $Date$
14 ## $Author$
17 use strict;
19 use File::Basename qw( fileparse );
20 use Getopt::Long qw( GetOptions );
21 use Text::Wrap qw( );
22 use Time::Local qw( timegm );
23 use User::pwent qw( getpwnam );
25 # The Plan:
27 # Read in the logs for multiple files, spit out a nice ChangeLog that
28 # mirrors the information entered during `cvs commit'.
30 # The problem presents some challenges. In an ideal world, we could
31 # detect files with the same author, log message, and checkin time --
32 # each <filelist, author, time, logmessage> would be a changelog entry.
33 # We'd sort them; and spit them out. Unfortunately, CVS is *not atomic*
34 # so checkins can span a range of times. Also, the directory structure
35 # could be hierarchical.
37 # Another question is whether we really want to have the ChangeLog
38 # exactly reflect commits. An author could issue two related commits,
39 # with different log entries, reflecting a single logical change to the
40 # source. GNU style ChangeLogs group these under a single author/date.
41 # We try to do the same.
43 # So, we parse the output of `cvs log', storing log messages in a
44 # multilevel hash that stores the mapping:
45 # directory => author => time => message => filelist
46 # As we go, we notice "nearby" commit times and store them together
47 # (i.e., under the same timestamp), so they appear in the same log
48 # entry.
50 # When we've read all the logs, we twist this mapping into
51 # a time => author => message => filelist mapping for each directory.
53 # If we're not using the `--distributed' flag, the directory is always
54 # considered to be `./', even as descend into subdirectories.
56 # Call Tree
58 # name number of lines (10.xii.03)
59 # parse_options 192
60 # derive_changelog 13
61 # +-maybe_grab_accumulation_date 38
62 # +-read_changelog 277
63 # +-maybe_read_user_map_file 94
64 # +-run_ext 9
65 # +-read_file_path 29
66 # +-read_symbolic_name 43
67 # +-read_revision 49
68 # +-read_date_author_and_state 25
69 # +-parse_date_author_and_state 20
70 # +-read_branches 36
71 # +-output_changelog 424
72 # +-pretty_file_list 290
73 # +-common_path_prefix 35
74 # +-preprocess_msg_text 30
75 # +-min 1
76 # +-mywrap 16
77 # +-last_line_len 5
78 # +-wrap_log_entry 177
80 # Utilities
82 # xml_escape 6
83 # slurp_file 11
84 # debug 5
85 # version 2
86 # usage 142
88 # -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
90 # Note about a bug-slash-opportunity:
91 # -----------------------------------
93 # There's a bug in Text::Wrap, which affects cvs2cl. This script
94 # reveals it:
96 # #!/usr/bin/perl -w
98 # use Text::Wrap;
100 # my $test_text =
101 # "This script demonstrates a bug in Text::Wrap. The very long line
102 # following this paragraph will be relocated relative to the surrounding
103 # text:
105 # ====================================================================
107 # See? When the bug happens, we'll get the line of equal signs below
108 # this paragraph, even though it should be above.";
111 # # Print out the test text with no wrapping:
112 # print "$test_text";
113 # print "\n";
114 # print "\n";
116 # # Now print it out wrapped, and see the bug:
117 # print wrap ("\t", " ", "$test_text");
118 # print "\n";
119 # print "\n";
121 # If the line of equal signs were one shorter, then the bug doesn't
122 # happen. Interesting.
124 # Anyway, rather than fix this in Text::Wrap, we might as well write a
125 # new wrap() which has the following much-needed features:
127 # * initial indentation, like current Text::Wrap()
128 # * subsequent line indentation, like current Text::Wrap()
129 # * user chooses among: force-break long words, leave them alone, or die()?
130 # * preserve existing indentation: chopped chunks from an indented line
131 # are indented by same (like this line, not counting the asterisk!)
132 # * optional list of things to preserve on line starts, default ">"
134 # Note that the last two are essentially the same concept, so unify in
135 # implementation and give a good interface to controlling them.
137 # And how about:
139 # Optionally, when encounter a line pre-indented by same as previous
140 # line, then strip the newline and refill, but indent by the same.
141 # Yeah...
143 # Globals --------------------------------------------------------------------
145 use constant MAILNAME => "/etc/mailname";
147 # In case we have to print it out:
148 my $VERSION = '$Revision$';
149 $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
151 ## Vars set by options:
153 # Print debugging messages?
154 my $Debug = 0;
156 # Just show version and exit?
157 my $Print_Version = 0;
159 # Just print usage message and exit?
160 my $Print_Usage = 0;
162 # What file should we generate (defaults to "ChangeLog")?
163 my $Log_File_Name = "ChangeLog";
165 # Grab most recent entry date from existing ChangeLog file, just add
166 # to that ChangeLog.
167 my $Cumulative = 0;
169 # `cvs log -d`, this will repeat the last entry in the old log. This is OK,
170 # as it guarantees at least one entry in the update changelog, which means
171 # that there will always be a date to extract for the next update. The repeat
172 # entry can be removed in postprocessing, if necessary.
174 # MJP 2003-08-02
175 # I don't think this actually does anything useful
176 my $Update = 0;
178 # Expand usernames to email addresses based on a map file?
179 my $User_Map_File = '';
180 my $User_Passwd_File;
181 my $Mail_Domain;
183 # Output log in chronological order? [default is reverse chronological order]
184 my $Chronological_Order = 0;
186 # Grab user details via gecos
187 my $Gecos = 0;
189 # User domain for gecos email addresses
190 my $Domain;
192 # Output to a file or to stdout?
193 my $Output_To_Stdout = 0;
195 # Eliminate empty log messages?
196 my $Prune_Empty_Msgs = 0;
198 # Tags of which not to output
199 my %ignore_tags;
201 # Show only revisions with Tags
202 my %show_tags;
204 # Don't call Text::Wrap on the body of the message
205 my $No_Wrap = 0;
207 # Indentation of log messages
208 my $Indent = "\t";
210 # Don't do any pretty print processing
211 my $Summary = 0;
213 # Separates header from log message. Code assumes it is either " " or
214 # "\n\n", so if there's ever an option to set it to something else,
215 # make sure to go through all conditionals that use this var.
216 my $After_Header = " ";
218 # XML Encoding
219 my $XML_Encoding = '';
221 # Format more for programs than for humans.
222 my $XML_Output = 0;
223 my $No_XML_Namespace = 0;
224 my $No_XML_ISO_Date = 0;
226 # Do some special tweaks for log data that was written in FSF
227 # ChangeLog style.
228 my $FSF_Style = 0;
230 # Show times in UTC instead of local time
231 my $UTC_Times = 0;
233 # Show times in output?
234 my $Show_Times = 1;
236 # Show day of week in output?
237 my $Show_Day_Of_Week = 0;
239 # Show revision numbers in output?
240 my $Show_Revisions = 0;
242 # Show dead files in output?
243 my $Show_Dead = 0;
245 # Hide dead trunk files which were created as a result of additions on a
246 # branch?
247 my $Hide_Branch_Additions = 1;
249 # Show tags (symbolic names) in output?
250 my $Show_Tags = 0;
252 # Show tags separately in output?
253 my $Show_Tag_Dates = 0;
255 # Show branches by symbolic name in output?
256 my $Show_Branches = 0;
258 # Show only revisions on these branches or their ancestors.
259 my @Follow_Branches;
261 # Don't bother with files matching this regexp.
262 my @Ignore_Files;
264 # How exactly we match entries. We definitely want "o",
265 # and user might add "i" by using --case-insensitive option.
266 my $Case_Insensitive = 0;
268 # Maybe only show log messages matching a certain regular expression.
269 my $Regexp_Gate = '';
271 # Pass this global option string along to cvs, to the left of `log':
272 my $Global_Opts = '';
274 # Pass this option string along to the cvs log subcommand:
275 my $Command_Opts = '';
277 # Read log output from stdin instead of invoking cvs log?
278 my $Input_From_Stdin = 0;
280 # Don't show filenames in output.
281 my $Hide_Filenames = 0;
283 # Don't shorten directory names from filenames.
284 my $Common_Dir = 1;
286 # Max checkin duration. CVS checkin is not atomic, so we may have checkin
287 # times that span a range of time. We assume that checkins will last no
288 # longer than $Max_Checkin_Duration seconds, and that similarly, no
289 # checkins will happen from the same users with the same message less
290 # than $Max_Checkin_Duration seconds apart.
291 my $Max_Checkin_Duration = 180;
293 # What to put at the front of [each] ChangeLog.
294 my $ChangeLog_Header = '';
296 # Whether to enable 'delta' mode, and for what start/end tags.
297 my $Delta_Mode = 0;
298 my $Delta_From = '';
299 my $Delta_To = '';
301 my $TestCode;
303 # Whether to parse filenames from the RCS filename, and if so what
304 # prefix to strip.
305 my $RCS_Root;
307 ## end vars set by options.
309 # latest observed times for the start/end tags in delta mode
310 my $Delta_StartTime = 0;
311 my $Delta_EndTime = 0;
313 # In 'cvs log' output, one long unbroken line of equal signs separates
314 # files:
315 my $file_separator = "======================================="
316 . "======================================";
318 # In 'cvs log' output, a shorter line of dashes separates log messages
319 # within a file:
320 my $logmsg_separator = "----------------------------";
322 my $No_Ancestors = 0;
324 my $No_Extra_Indent = 0;
326 my $GroupWithinDate = 0;
328 # ----------------------------------------------------------------------------
330 package CVS::Utils::ChangeLog::EntrySet;
332 sub new {
333 my $class = shift;
334 my %self;
335 bless \%self, $class;
338 # -------------------------------------
340 sub output_changelog {
341 my $output_type = $XML_Output ? 'XML' : 'Text';
342 my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
343 $output_class->new->output_changelog(@_);
346 # ----------------------------------------------------------------------------
348 package CVS::Utils::ChangeLog::EntrySet::Output::Text;
350 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
352 use File::Basename qw( fileparse );
354 sub new {
355 my $class = shift;
356 bless \(my($ self)), $class;
359 # -------------------------------------
361 sub wday {
362 my $self = shift; my $class = ref $self;
363 my ($wday) = @_;
365 return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
368 # -------------------------------------
370 sub header_line {
371 my $self = shift;
372 my ($time, $author, $lastdate) = @_;
374 my $header_line = '';
376 my (undef,$min,$hour,$mday,$mon,$year,$wday)
377 = $UTC_Times ? gmtime($time) : localtime($time);
379 my $date = $self->fdatetime($time);
381 if ($Show_Times) {
382 $header_line =
383 sprintf "%s %s\n\n", $date, $author;
384 } else {
385 if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) {
386 if ( $GroupWithinDate ) {
387 $header_line = "$date\n\n";
388 } else {
389 $header_line = "$date $author\n\n";
391 } else {
392 $header_line = '';
397 # -------------------------------------
399 sub preprocess_msg_text {
400 my $self = shift;
401 my ($text) = @_;
403 $text = $self->SUPER::preprocess_msg_text($text);
405 unless ( $No_Wrap ) {
406 # Strip off lone newlines, but only for lines that don't begin with
407 # whitespace or a mail-quoting character, since we want to preserve
408 # that kind of formatting. Also don't strip newlines that follow a
409 # period; we handle those specially next. And don't strip
410 # newlines that precede an open paren.
411 1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
413 # If a newline follows a period, make sure that when we bring up the
414 # bottom sentence, it begins with two spaces.
415 1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g;
418 return $text;
421 # -------------------------------------
423 # Here we take a bunch of qunks and convert them into printed
424 # summary that will include all the information the user asked for.
425 sub pretty_file_list {
426 my $self = shift;
428 return ''
429 if $Hide_Filenames;
431 my $qunksref = shift;
433 my @filenames;
434 my $beauty = ''; # The accumulating header string for this entry.
435 my %non_unanimous_tags; # Tags found in a proper subset of qunks
436 my %unanimous_tags; # Tags found in all qunks
437 my %all_branches; # Branches found in any qunk
438 my $fbegun = 0; # Did we begin printing filenames yet?
440 my ($common_dir, $qunkrefs) =
441 $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
443 my @qunkrefs = @$qunkrefs;
445 # Not XML output, so complexly compactify for chordate consumption. At this
446 # point we have enough global information about all the qunks to organize
447 # them non-redundantly for output.
449 if ($common_dir) {
450 # Note that $common_dir still has its trailing slash
451 $beauty .= "$common_dir: ";
454 if ($Show_Branches)
456 # For trailing revision numbers.
457 my @brevisions;
459 foreach my $branch (keys (%all_branches))
461 foreach my $qunkref (@qunkrefs)
463 if ((defined ($qunkref->branch))
464 and ($qunkref->branch eq $branch))
466 if ($fbegun) {
467 # kff todo: comma-delimited in XML too? Sure.
468 $beauty .= ", ";
470 else {
471 $fbegun = 1;
473 my $fname = substr ($qunkref->filename, length ($common_dir));
474 $beauty .= $fname;
475 $qunkref->{'printed'} = 1; # Just setting a mark bit, basically
477 if ( $Show_Tags and defined $qunkref->tags ) {
478 my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
480 if (@tags) {
481 $beauty .= " (tags: ";
482 $beauty .= join (', ', @tags);
483 $beauty .= ")";
487 if ($Show_Revisions) {
488 # Collect the revision numbers' last components, but don't
489 # print them -- they'll get printed with the branch name
490 # later.
491 $qunkref->revision =~ /.+\.([\d]+)$/;
492 push (@brevisions, $1);
494 # todo: we're still collecting branch roots, but we're not
495 # showing them anywhere. If we do show them, it would be
496 # nifty to just call them revision "0" on a the branch.
497 # Yeah, that's the ticket.
501 $beauty .= " ($branch";
502 if (@brevisions) {
503 if ((scalar (@brevisions)) > 1) {
504 $beauty .= ".[";
505 $beauty .= (join (',', @brevisions));
506 $beauty .= "]";
508 else {
509 # Square brackets are spurious here, since there's no range to
510 # encapsulate
511 $beauty .= ".$brevisions[0]";
514 $beauty .= ")";
518 # Okay; any qunks that were done according to branch are taken care
519 # of, and marked as printed. Now print everyone else.
521 my %fileinfo_printed;
522 foreach my $qunkref (@qunkrefs)
524 next if (defined ($qunkref->{'printed'})); # skip if already printed
526 my $b = substr ($qunkref->filename, length ($common_dir));
527 # todo: Shlomo's change was this:
528 # $beauty .= substr ($qunkref->filename,
529 # (($common_dir eq "./") ? '' : length ($common_dir)));
530 $qunkref->{'printed'} = 1; # Set a mark bit.
532 if ($Show_Revisions || $Show_Tags || $Show_Dead)
534 my $started_addendum = 0;
536 if ($Show_Revisions) {
537 $started_addendum = 1;
538 $b .= " (";
539 $b .= $qunkref->revision;
541 if ($Show_Dead && $qunkref->state =~ /dead/)
543 # Deliberately not using $started_addendum. Keeping it simple.
544 $b .= "[DEAD]";
546 if ($Show_Tags && (defined $qunkref->tags)) {
547 my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
548 if ((scalar (@tags)) > 0) {
549 if ($started_addendum) {
550 $b .= ", ";
552 else {
553 $b .= " (tags: ";
555 $b .= join (', ', @tags);
556 $started_addendum = 1;
559 if ($started_addendum) {
560 $b .= ")";
564 unless ( exists $fileinfo_printed{$b} ) {
565 if ($fbegun) {
566 $beauty .= ", ";
567 } else {
568 $fbegun = 1;
570 $beauty .= $b, $fileinfo_printed{$b} = 1;
574 # Unanimous tags always come last.
575 if ($Show_Tags && %unanimous_tags)
577 $beauty .= " (utags: ";
578 $beauty .= join (', ', sort keys (%unanimous_tags));
579 $beauty .= ")";
582 # todo: still have to take care of branch_roots?
584 $beauty = "$beauty:";
586 return $beauty;
589 # -------------------------------------
591 sub output_tagdate {
592 my $self = shift;
593 my ($fh, $time, $tag) = @_;
595 my $fdatetime = $self->fdatetime($time);
596 print $fh "$fdatetime tag $tag\n\n";
597 return;
600 # -------------------------------------
602 sub format_body {
603 my $self = shift;
604 my ($msg, $files, $qunklist) = @_;
606 my $body;
608 if ( $No_Wrap and ! $Summary ) {
609 $msg = $self->preprocess_msg_text($msg);
610 $files = $self->mywrap("\t", "\t ", "* $files");
611 $msg =~ s/\n(.+)/\n$Indent$1/g;
612 unless ($After_Header eq " ") {
613 $msg =~ s/^(.+)/$Indent$1/g;
615 if ( $Hide_Filenames ) {
616 $body = $After_Header . $msg;
617 } else {
618 $body = $files . $After_Header . $msg;
620 } elsif ( $Summary ) {
621 my ($filelist, $qunk);
622 my (@DeletedQunks, @AddedQunks, @ChangedQunks);
624 $msg = $self->preprocess_msg_text($msg);
626 # Sort the files (qunks) according to the operation that was
627 # performed. Files which were added have no line change
628 # indicator, whereas deleted files have state dead.
630 foreach $qunk ( @$qunklist ) {
631 if ( "dead" eq $qunk->state) {
632 push @DeletedQunks, $qunk;
633 } elsif ( ! defined $qunk->lines ) {
634 push @AddedQunks, $qunk;
635 } else {
636 push @ChangedQunks, $qunk;
640 # The qunks list was originally in tree search order. Let's
641 # get that back. The lists, if they exist, will be reversed upon
642 # processing.
646 # Now write the three sections onto $filelist
648 if ( @DeletedQunks ) {
649 $filelist .= "\tDeleted:\n";
650 foreach $qunk ( @DeletedQunks ) {
651 $filelist .= "\t\t" . $qunk->filename;
652 $filelist .= " (" . $qunk->revision . ")";
653 $filelist .= "\n";
655 undef @DeletedQunks;
658 if ( @AddedQunks ) {
659 $filelist .= "\tAdded:\n";
660 foreach $qunk (@AddedQunks) {
661 $filelist .= "\t\t" . $qunk->filename;
662 $filelist .= " (" . $qunk->revision . ")";
663 $filelist .= "\n";
665 undef @AddedQunks ;
668 if ( @ChangedQunks ) {
669 $filelist .= "\tChanged:\n";
670 foreach $qunk (@ChangedQunks) {
671 $filelist .= "\t\t" . $qunk->filename;
672 $filelist .= " (" . $qunk->revision . ")";
673 $filelist .= ", \"" . $qunk->state . "\"";
674 $filelist .= ", lines: " . $qunk->lines;
675 $filelist .= "\n";
677 undef @ChangedQunks;
680 chomp $filelist;
682 if ( $Hide_Filenames ) {
683 $filelist = '';
686 $msg =~ s/\n(.*)/\n$Indent$1/g;
687 unless ( $After_Header eq " " or $FSF_Style ) {
688 $msg =~ s/^(.*)/$Indent$1/g;
691 unless ( $No_Wrap ) {
692 if ( $FSF_Style ) {
693 $msg = $self->wrap_log_entry($msg, '', 69, 69);
694 chomp($msg);
695 chomp($msg);
696 } else {
697 $msg = $self->mywrap('', $Indent, "$msg");
698 $msg =~ s/[ \t]+\n/\n/g;
702 $body = $filelist . $After_Header . $msg;
703 } else { # do wrapping, either FSF-style or regular
704 my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent ";
706 if ( $FSF_Style ) {
707 $files = $self->mywrap($Indent, $latter_wrap, "* $files");
709 my $files_last_line_len = 0;
710 if ( $After_Header eq " " ) {
711 $files_last_line_len = $self->last_line_len($files);
712 $files_last_line_len += 1; # for $After_Header
715 $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
716 $body = $files . $After_Header . $msg;
717 } else { # not FSF-style
718 $msg = $self->preprocess_msg_text($msg);
719 $body = $files . $After_Header . $msg;
720 $body = $self->mywrap($Indent, $latter_wrap, "* $body");
721 $body =~ s/[ \t]+\n/\n/g;
725 return $body;
728 # ----------------------------------------------------------------------------
730 package CVS::Utils::ChangeLog::EntrySet::Output::XML;
732 use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
734 use File::Basename qw( fileparse );
736 sub new {
737 my $class = shift;
738 bless \(my($ self)), $class;
741 # -------------------------------------
743 sub header_line {
744 my $self = shift;
745 my ($time, $author, $lastdate) = @_;
747 my $header_line = '';
749 my $isoDate;
751 my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
753 # Ideally, this would honor $UTC_Times and use +HH:MM syntax
754 $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
755 $y + 1900, $m + 1, $d, $H, $M, $S);
757 my (undef,$min,$hour,$mday,$mon,$year,$wday)
758 = $UTC_Times ? gmtime($time) : localtime($time);
760 my $date = $self->fdatetime($time);
761 $wday = $self->wday($wday);
763 $header_line =
764 sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
765 $year+1900, $mon+1, $mday, $hour, $min);
766 $header_line .= "<isoDate>$isoDate</isoDate>\n"
767 unless $No_XML_ISO_Date;
768 $header_line .= sprintf("<author>%s</author>\n" , $author);
771 # -------------------------------------
773 sub wday {
774 my $self = shift; my $class = ref $self;
775 my ($wday) = @_;
777 return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
780 # -------------------------------------
782 sub escape {
783 my $self = shift;
785 my $txt = shift;
786 $txt =~ s/&/&amp;/g;
787 $txt =~ s/</&lt;/g;
788 $txt =~ s/>/&gt;/g;
789 return $txt;
792 # -------------------------------------
794 sub output_header {
795 my $self = shift;
796 my ($fh) = @_;
798 my $encoding =
799 length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
800 my $version = 'version="1.0"';
801 my $declaration =
802 sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
803 my $root =
804 $No_XML_Namespace ?
805 '<changelog>' :
806 '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
807 print $fh "$declaration\n\n$root\n\n";
810 # -------------------------------------
812 sub output_footer {
813 my $self = shift;
814 my ($fh) = @_;
816 print $fh "</changelog>\n";
819 # -------------------------------------
821 sub preprocess_msg_text {
822 my $self = shift;
823 my ($text) = @_;
825 $text = $self->SUPER::preprocess_msg_text($text);
827 $text = $self->escape($text);
828 chomp $text;
829 $text = "<msg>${text}</msg>\n";
831 return $text;
834 # -------------------------------------
836 # Here we take a bunch of qunks and convert them into printed
837 # summary that will include all the information the user asked for.
838 sub pretty_file_list {
839 my $self = shift;
840 my ($qunksref) = @_;
842 my $beauty = ''; # The accumulating header string for this entry.
843 my %non_unanimous_tags; # Tags found in a proper subset of qunks
844 my %unanimous_tags; # Tags found in all qunks
845 my %all_branches; # Branches found in any qunk
846 my $fbegun = 0; # Did we begin printing filenames yet?
848 my ($common_dir, $qunkrefs) =
849 $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
850 $qunksref);
852 my @qunkrefs = @$qunkrefs;
854 # If outputting XML, then our task is pretty simple, because we
855 # don't have to detect common dir, common tags, branch prefixing,
856 # etc. We just output exactly what we have, and don't worry about
857 # redundancy or readability.
859 foreach my $qunkref (@qunkrefs)
861 my $filename = $qunkref->filename;
862 my $state = $qunkref->state;
863 my $revision = $qunkref->revision;
864 my $tags = $qunkref->tags;
865 my $branch = $qunkref->branch;
866 my $branchroots = $qunkref->roots;
868 $filename = $self->escape($filename); # probably paranoia
869 $revision = $self->escape($revision); # definitely paranoia
871 $beauty .= "<file>\n";
872 $beauty .= "<name>${filename}</name>\n";
873 $beauty .= "<cvsstate>${state}</cvsstate>\n";
874 $beauty .= "<revision>${revision}</revision>\n";
875 if ($branch) {
876 $branch = $self->escape($branch); # more paranoia
877 $beauty .= "<branch>${branch}</branch>\n";
879 foreach my $tag (@$tags) {
880 $tag = $self->escape($tag); # by now you're used to the paranoia
881 $beauty .= "<tag>${tag}</tag>\n";
883 foreach my $root (@$branchroots) {
884 $root = $self->escape($root); # which is good, because it will continue
885 $beauty .= "<branchroot>${root}</branchroot>\n";
887 $beauty .= "</file>\n";
890 # Theoretically, we could go home now. But as long as we're here,
891 # let's print out the common_dir and utags, as a convenience to
892 # the receiver (after all, earlier code calculated that stuff
893 # anyway, so we might as well take advantage of it).
895 if ((scalar (keys (%unanimous_tags))) > 1) {
896 foreach my $utag ((keys (%unanimous_tags))) {
897 $utag = $self->escape($utag); # the usual paranoia
898 $beauty .= "<utag>${utag}</utag>\n";
901 if ($common_dir) {
902 $common_dir = $self->escape($common_dir);
903 $beauty .= "<commondir>${common_dir}</commondir>\n";
906 # That's enough for XML, time to go home:
907 return $beauty;
910 # -------------------------------------
912 sub output_tagdate {
913 # NOT YET DONE
916 # -------------------------------------
918 sub output_entry {
919 my $self = shift;
920 my ($fh, $entry) = @_;
921 print $fh "<entry>\n$entry</entry>\n\n";
924 # -------------------------------------
926 sub format_body {
927 my $self = shift;
928 my ($msg, $files, $qunklist) = @_;
930 $msg = $self->preprocess_msg_text($msg);
931 return $files . $msg;
934 # ----------------------------------------------------------------------------
936 package CVS::Utils::ChangeLog::EntrySet::Output;
938 use Carp qw( croak );
939 use File::Basename qw( fileparse );
941 # Class Utility Functions -------------
943 { # form closure
945 my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
946 sub weekday_en {
947 my $class = shift;
948 return $weekdays[$_[0]];
953 # Abstract Subrs ----------------------
955 sub wday { croak "Whoops. Abtract method call (wday).\n" }
956 sub pretty_file_list { croak "Whoops. Abtract method call (pretty_file_list).\n" }
957 sub output_tagdate { croak "Whoops. Abtract method call (output_tagdate).\n" }
958 sub header_line { croak "Whoops. Abtract method call (header_line).\n" }
960 # Instance Subrs ----------------------
962 sub output_header { }
964 # -------------------------------------
966 sub output_entry {
967 my $self = shift;
968 my ($fh, $entry) = @_;
969 print $fh "$entry\n";
972 # -------------------------------------
974 sub output_footer { }
976 # -------------------------------------
978 sub escape { return $_[1] }
980 # -------------------------------------
982 sub output_changelog {
983 my $self = shift; my $class = ref $self;
984 my ($grand_poobah) = @_;
985 ### Process each ChangeLog
987 while (my ($dir,$authorhash) = each %$grand_poobah)
989 &main::debug ("DOING DIR: $dir\n");
991 # Here we twist our hash around, from being
992 # author => time => message => filelist
993 # in %$authorhash to
994 # time => author => message => filelist
995 # in %changelog.
997 # This is also where we merge entries. The algorithm proceeds
998 # through the timeline of the changelog with a sliding window of
999 # $Max_Checkin_Duration seconds; within that window, entries that
1000 # have the same log message are merged.
1002 # (To save space, we zap %$authorhash after we've copied
1003 # everything out of it.)
1005 my %changelog;
1006 while (my ($author,$timehash) = each %$authorhash)
1008 my %stamptime;
1009 foreach my $time (sort {$a <=> $b} (keys %$timehash))
1011 my $msghash = $timehash->{$time};
1012 while (my ($msg,$qunklist) = each %$msghash)
1014 my $stamptime = $stamptime{$msg};
1015 if ((defined $stamptime)
1016 and (($time - $stamptime) < $Max_Checkin_Duration)
1017 and (defined $changelog{$stamptime}{$author}{$msg}))
1019 push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1021 else {
1022 $changelog{$time}{$author}{$msg} = $qunklist->files;
1023 $stamptime{$msg} = $time;
1028 undef (%$authorhash);
1030 ### Now we can write out the ChangeLog!
1032 my ($logfile_here, $logfile_bak, $tmpfile);
1033 my $lastdate;
1035 if (! $Output_To_Stdout) {
1036 $logfile_here = $dir . $Log_File_Name;
1037 $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem
1038 $tmpfile = "${logfile_here}.cvs2cl$$.tmp";
1039 $logfile_bak = "${logfile_here}.bak";
1041 open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1043 else {
1044 open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1047 print LOG_OUT $ChangeLog_Header;
1049 my %tag_date_printed;
1051 $self->output_header(\*LOG_OUT);
1053 my @key_list = ();
1054 if($Chronological_Order) {
1055 @key_list = sort {$a <=> $b} (keys %changelog);
1056 } else {
1057 @key_list = sort {$b <=> $a} (keys %changelog);
1059 foreach my $time (@key_list)
1061 next if ($Delta_Mode &&
1062 (($time <= $Delta_StartTime) ||
1063 ($time > $Delta_EndTime && $Delta_EndTime)));
1065 # Set up the date/author line.
1066 # kff todo: do some more XML munging here, on the header
1067 # part of the entry:
1068 my (undef,$min,$hour,$mday,$mon,$year,$wday)
1069 = $UTC_Times ? gmtime($time) : localtime($time);
1071 $wday = $self->wday($wday);
1072 # XML output includes everything else, we might as well make
1073 # it always include Day Of Week too, for consistency.
1074 my $authorhash = $changelog{$time};
1075 if ($Show_Tag_Dates) {
1076 my %tags;
1077 while (my ($author,$mesghash) = each %$authorhash) {
1078 while (my ($msg,$qunk) = each %$mesghash) {
1079 foreach my $qunkref2 (@$qunk) {
1080 if (defined ($qunkref2->tags)) {
1081 foreach my $tag (@{$qunkref2->tags}) {
1082 $tags{$tag} = 1;
1088 # Sort here for determinism to ease testing
1089 foreach my $tag (sort keys %tags) {
1090 if ( ! defined $tag_date_printed{$tag} ) {
1091 $tag_date_printed{$tag} = $time;
1092 $self->output_tagdate(\*LOG_OUT, $time, $tag);
1096 while (my ($author,$mesghash) = each %$authorhash)
1098 # If XML, escape in outer loop to avoid compound quoting:
1099 $author = $self->escape($author);
1101 FOOBIE:
1102 # We sort here to enable predictable ordering for the testing porpoises
1103 for my $msg (sort keys %$mesghash)
1105 my $qunklist = $mesghash->{$msg};
1107 ## MJP: 19.xii.01 : Exclude @ignore_tags
1108 for my $ignore_tag (keys %ignore_tags) {
1109 next FOOBIE
1110 if grep($_ eq $ignore_tag, map(@{$_->{tags}},
1111 grep(defined $_->{tags},
1112 @$qunklist)));
1114 ## MJP: 19.xii.01 : End exclude @ignore_tags
1116 # show only files with tag --show-tag $show_tag
1117 if ( keys %show_tags ) {
1118 next FOOBIE
1119 if !grep(exists $show_tags{$_}, map(@{$_->{tags}},
1120 grep(defined $_->{tags},
1121 @$qunklist)));
1124 my $files = $self->pretty_file_list($qunklist);
1125 my $header_line; # date and author
1126 my $wholething; # $header_line + $body
1128 my $date = $self->fdatetime($time);
1129 $header_line = $self->header_line($time, $author, $lastdate);
1130 $lastdate = $date;
1132 $Text::Wrap::huge = 'overflow'
1133 if $Text::Wrap::VERSION >= 2001.0130;
1134 # Reshape the body according to user preferences.
1135 my $body = $self->format_body($msg, $files, $qunklist);
1137 $body =~ s/[ \t]+\n/\n/g;
1138 $wholething = $header_line . $body;
1140 # One last check: make sure it passes the regexp test, if the
1141 # user asked for that. We have to do it here, so that the
1142 # test can match against information in the header as well
1143 # as in the text of the log message.
1145 # How annoying to duplicate so much code just because I
1146 # can't figure out a way to evaluate scalars on the trailing
1147 # operator portion of a regular expression. Grrr.
1148 if ($Case_Insensitive) {
1149 unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1150 $self->output_entry(\*LOG_OUT, $wholething);
1153 else {
1154 unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1155 $self->output_entry(\*LOG_OUT, $wholething);
1162 $self->output_footer(\*LOG_OUT);
1164 close (LOG_OUT);
1166 if ( ! $Output_To_Stdout ) {
1167 # If accumulating, append old data to new before renaming. But
1168 # don't append the most recent entry, since it's already in the
1169 # new log due to CVS's idiosyncratic interpretation of "log -d".
1170 if ($Cumulative && -f $logfile_here) {
1171 open NEW_LOG, ">>$tmpfile"
1172 or die "trouble appending to $tmpfile ($!)";
1174 open OLD_LOG, "<$logfile_here"
1175 or die "trouble reading from $logfile_here ($!)";
1177 my $started_first_entry = 0;
1178 my $passed_first_entry = 0;
1179 while (<OLD_LOG>) {
1180 if ( ! $passed_first_entry ) {
1181 if ( ( ! $started_first_entry )
1182 and /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1183 $started_first_entry = 1;
1184 } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) {
1185 $passed_first_entry = 1;
1186 print NEW_LOG $_;
1188 } else {
1189 print NEW_LOG $_;
1193 close NEW_LOG;
1194 close OLD_LOG;
1197 if ( -f $logfile_here ) {
1198 rename $logfile_here, $logfile_bak;
1200 rename $tmpfile, $logfile_here;
1205 # -------------------------------------
1207 # Don't call this wrap, because with 5.5.3, that clashes with the
1208 # (unconditional :-( ) export of wrap() from Text::Wrap
1209 sub mywrap {
1210 my $self = shift;
1211 my ($indent1, $indent2, @text) = @_;
1212 # If incoming text looks preformatted, don't get clever
1213 my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1214 if ( grep /^\s+/m, @text ) {
1215 return $text;
1217 my @lines = split /\n/, $text;
1218 $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1219 $lines[0] =~ s/^$indent1\s+/$indent1/;
1220 s/^$indent2\s+/$indent2/
1221 for @lines[1..$#lines];
1222 my $newtext = join "\n", @lines;
1223 $newtext .= "\n"
1224 if substr($text, -1) eq "\n";
1225 return $newtext;
1228 # -------------------------------------
1230 sub preprocess_msg_text {
1231 my $self = shift;
1232 my ($text) = @_;
1234 # Strip out carriage returns (as they probably result from DOSsy editors).
1235 $text =~ s/\r\n/\n/g;
1236 # If it *looks* like two newlines, make it *be* two newlines:
1237 $text =~ s/\n\s*\n/\n\n/g;
1239 return $text;
1242 # -------------------------------------
1244 sub last_line_len {
1245 my $self = shift;
1247 my $files_list = shift;
1248 my @lines = split (/\n/, $files_list);
1249 my $last_line = pop (@lines);
1250 return length ($last_line);
1253 # -------------------------------------
1255 # A custom wrap function, sensitive to some common constructs used in
1256 # log entries.
1257 sub wrap_log_entry {
1258 my $self = shift;
1260 my $text = shift; # The text to wrap.
1261 my $left_pad_str = shift; # String to pad with on the left.
1263 # These do NOT take left_pad_str into account:
1264 my $length_remaining = shift; # Amount left on current line.
1265 my $max_line_length = shift; # Amount left for a blank line.
1267 my $wrapped_text = ''; # The accumulating wrapped entry.
1268 my $user_indent = ''; # Inherited user_indent from prev line.
1270 my $first_time = 1; # First iteration of the loop?
1271 my $suppress_line_start_match = 0; # Set to disable line start checks.
1273 my @lines = split (/\n/, $text);
1274 while (@lines) # Don't use `foreach' here, it won't work.
1276 my $this_line = shift (@lines);
1277 chomp $this_line;
1279 if ($this_line =~ /^(\s+)/) {
1280 $user_indent = $1;
1282 else {
1283 $user_indent = '';
1286 # If it matches any of the line-start regexps, print a newline now...
1287 if ($suppress_line_start_match)
1289 $suppress_line_start_match = 0;
1291 elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1292 || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1293 || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1294 || ($this_line =~ /^(\s+)(\S+)/)
1295 || ($this_line =~ /^(\s*)- +/)
1296 || ($this_line =~ /^()\s*$/)
1297 || ($this_line =~ /^(\s*)\*\) +/)
1298 || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1300 # Make a line break immediately, unless header separator is set
1301 # and this line is the first line in the entry, in which case
1302 # we're getting the blank line for free already and shouldn't
1303 # add an extra one.
1304 unless (($After_Header ne " ") and ($first_time))
1306 if ($this_line =~ /^()\s*$/) {
1307 $suppress_line_start_match = 1;
1308 $wrapped_text .= "\n${left_pad_str}";
1311 $wrapped_text .= "\n${left_pad_str}";
1314 $length_remaining = $max_line_length - (length ($user_indent));
1317 # Now that any user_indent has been preserved, strip off leading
1318 # whitespace, so up-folding has no ugly side-effects.
1319 $this_line =~ s/^\s*//;
1321 # Accumulate the line, and adjust parameters for next line.
1322 my $this_len = length ($this_line);
1323 if ($this_len == 0)
1325 # Blank lines should cancel any user_indent level.
1326 $user_indent = '';
1327 $length_remaining = $max_line_length;
1329 elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1331 # Walk backwards from the end. At first acceptable spot, break
1332 # a new line.
1333 my $idx = $length_remaining - 1;
1334 if ($idx < 0) { $idx = 0 };
1335 while ($idx > 0)
1337 if (substr ($this_line, $idx, 1) =~ /\s/)
1339 my $line_now = substr ($this_line, 0, $idx);
1340 my $next_line = substr ($this_line, $idx);
1341 $this_line = $line_now;
1343 # Clean whitespace off the end.
1344 chomp $this_line;
1346 # The current line is ready to be printed.
1347 $this_line .= "\n${left_pad_str}";
1349 # Make sure the next line is allowed full room.
1350 $length_remaining = $max_line_length - (length ($user_indent));
1352 # Strip next_line, but then preserve any user_indent.
1353 $next_line =~ s/^\s*//;
1355 # Sneak a peek at the user_indent of the upcoming line, so
1356 # $next_line (which will now precede it) can inherit that
1357 # indent level. Otherwise, use whatever user_indent level
1358 # we currently have, which might be none.
1359 my $next_next_line = shift (@lines);
1360 if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1361 $next_line = $1 . $next_line if (defined ($1));
1362 # $length_remaining = $max_line_length - (length ($1));
1363 $next_next_line =~ s/^\s*//;
1365 else {
1366 $next_line = $user_indent . $next_line;
1368 if (defined ($next_next_line)) {
1369 unshift (@lines, $next_next_line);
1371 unshift (@lines, $next_line);
1373 # Our new next line might, coincidentally, begin with one of
1374 # the line-start regexps, so we temporarily turn off
1375 # sensitivity to that until we're past the line.
1376 $suppress_line_start_match = 1;
1378 last;
1380 else
1382 $idx--;
1386 if ($idx == 0)
1388 # We bottomed out because the line is longer than the
1389 # available space. But that could be because the space is
1390 # small, or because the line is longer than even the maximum
1391 # possible space. Handle both cases below.
1393 if ($length_remaining == ($max_line_length - (length ($user_indent))))
1395 # The line is simply too long -- there is no hope of ever
1396 # breaking it nicely, so just insert it verbatim, with
1397 # appropriate padding.
1398 $this_line = "\n${left_pad_str}${this_line}";
1400 else
1402 # Can't break it here, but may be able to on the next round...
1403 unshift (@lines, $this_line);
1404 $length_remaining = $max_line_length - (length ($user_indent));
1405 $this_line = "\n${left_pad_str}";
1409 else # $this_len < $length_remaining, so tack on what we can.
1411 # Leave a note for the next iteration.
1412 $length_remaining = $length_remaining - $this_len;
1414 if ($this_line =~ /\.$/)
1416 $this_line .= " ";
1417 $length_remaining -= 2;
1419 else # not a sentence end
1421 $this_line .= " ";
1422 $length_remaining -= 1;
1426 # Unconditionally indicate that loop has run at least once.
1427 $first_time = 0;
1429 $wrapped_text .= "${user_indent}${this_line}";
1432 # One last bit of padding.
1433 $wrapped_text .= "\n";
1435 return $wrapped_text;
1438 # -------------------------------------
1440 sub _pretty_file_list {
1441 my $self = shift;
1443 my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1445 my @qunkrefs =
1446 grep +( ( ! $_->tags_exists
1448 ! grep exists $ignore_tags{$_}, @{$_->tags})
1450 ( ! keys %show_tags
1452 ( $_->tags_exists
1454 grep exists $show_tags{$_}, @{$_->tags} )
1457 @$qunksref;
1459 my $common_dir; # Dir prefix common to all files ('' if none)
1461 # First, loop over the qunks gathering all the tag/branch names.
1462 # We'll put them all in non_unanimous_tags, and take out the
1463 # unanimous ones later.
1464 QUNKREF:
1465 foreach my $qunkref (@qunkrefs)
1467 # Keep track of whether all the files in this commit were in the
1468 # same directory, and memorize it if so. We can make the output a
1469 # little more compact by mentioning the directory only once.
1470 if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1472 if (! (defined ($common_dir)))
1474 my ($base, $dir);
1475 ($base, $dir, undef) = fileparse ($qunkref->filename);
1477 if ((! (defined ($dir))) # this first case is sheer paranoia
1478 or ($dir eq '')
1479 or ($dir eq "./")
1480 or ($dir eq ".\\"))
1482 $common_dir = '';
1484 else
1486 $common_dir = $dir;
1489 elsif ($common_dir ne '')
1491 # Already have a common dir prefix, so how much of it can we preserve?
1492 $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1495 else # only one file in this entry anyway, so common dir not an issue
1497 $common_dir = '';
1500 if (defined ($qunkref->branch)) {
1501 $all_branches->{$qunkref->branch} = 1;
1503 if (defined ($qunkref->tags)) {
1504 foreach my $tag (@{$qunkref->tags}) {
1505 $non_unanimous_tags->{$tag} = 1;
1510 # Any tag held by all qunks will be printed specially... but only if
1511 # there are multiple qunks in the first place!
1512 if ((scalar (@qunkrefs)) > 1) {
1513 foreach my $tag (keys (%$non_unanimous_tags)) {
1514 my $everyone_has_this_tag = 1;
1515 foreach my $qunkref (@qunkrefs) {
1516 if ((! (defined ($qunkref->tags)))
1517 or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1518 $everyone_has_this_tag = 0;
1521 if ($everyone_has_this_tag) {
1522 $unanimous_tags->{$tag} = 1;
1523 delete $non_unanimous_tags->{$tag};
1528 return $common_dir, \@qunkrefs;
1531 # -------------------------------------
1533 sub fdatetime {
1534 my $self = shift;
1536 my ($year, $mday, $mon, $wday, $hour, $min);
1538 if ( @_ > 1 ) {
1539 ($year, $mday, $mon, $wday, $hour, $min) = @_;
1540 } else {
1541 my ($time) = @_;
1542 (undef, $min, $hour, $mday, $mon, $year, $wday) =
1543 $UTC_Times ? gmtime($time) : localtime($time);
1545 $year += 1900;
1546 $mon += 1;
1547 $wday = $self->wday($wday);
1550 my $fdate = $self->fdate($year, $mon, $mday, $wday);
1552 if ($Show_Times) {
1553 my $ftime = $self->ftime($hour, $min);
1554 return "$fdate $ftime";
1555 } else {
1556 return $fdate;
1560 # -------------------------------------
1562 sub fdate {
1563 my $self = shift;
1565 my ($year, $mday, $mon, $wday);
1567 if ( @_ > 1 ) {
1568 ($year, $mon, $mday, $wday) = @_;
1569 } else {
1570 my ($time) = @_;
1571 (undef, undef, undef, $mday, $mon, $year, $wday) =
1572 $UTC_Times ? gmtime($time) : localtime($time);
1574 $year += 1900;
1575 $mon += 1;
1576 $wday = $self->wday($wday);
1579 return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1582 # -------------------------------------
1584 sub ftime {
1585 my $self = shift;
1587 my ($hour, $min);
1589 if ( @_ > 1 ) {
1590 ($hour, $min) = @_;
1591 } else {
1592 my ($time) = @_;
1593 (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1596 return sprintf '%02u:%02u', $hour, $min;
1599 # ----------------------------------------------------------------------------
1601 package CVS::Utils::ChangeLog::Message;
1603 sub new {
1604 my $class = shift;
1605 my ($msg) = @_;
1607 my %self = (msg => $msg, files => []);
1609 bless \%self, $class;
1612 sub add_fileentry {
1613 my $self = shift;
1614 my ($fileentry) = @_;
1616 die "Not a fileentry: $fileentry"
1617 unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1619 push @{$self->{files}}, $fileentry;
1622 sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1624 # ----------------------------------------------------------------------------
1626 package CVS::Utils::ChangeLog::FileEntry;
1628 # Each revision of a file has a little data structure (a `qunk')
1629 # associated with it. That data structure holds not only the
1630 # file's name, but any additional information about the file
1631 # that might be needed in the output, such as the revision
1632 # number, tags, branches, etc. The reason to have these things
1633 # arranged in a data structure, instead of just appending them
1634 # textually to the file's name, is that we may want to do a
1635 # little rearranging later as we write the output. For example,
1636 # all the files on a given tag/branch will go together, followed
1637 # by the tag in parentheses (so trunk or otherwise non-tagged
1638 # files would go at the end of the file list for a given log
1639 # message). This rearrangement is a lot easier to do if we
1640 # don't have to reparse the text.
1642 # A qunk looks like this:
1645 # filename => "hello.c",
1646 # revision => "1.4.3.2",
1647 # time => a timegm() return value (moment of commit)
1648 # tags => [ "tag1", "tag2", ... ],
1649 # branch => "branchname" # There should be only one, right?
1650 # roots => [ "branchtag1", "branchtag2", ... ]
1653 # Single top-level ChangeLog, or one per subdirectory?
1654 my $distributed;
1655 sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1657 sub new {
1658 my $class = shift;
1659 my ($path, $time, $revision, $state, $lines,
1660 $branch_names, $branch_roots, $symbolic_names) = @_;
1662 my %self = (time => $time,
1663 revision => $revision,
1664 state => $state,
1665 lines => $lines,
1668 if ( $distributed ) {
1669 @self{qw(filename dir_key)} = fileparse($path);
1670 } else {
1671 @self{qw(filename dir_key)} = ($path, './');
1674 # Grab the branch, even though we may or may not need it:
1675 (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1676 $branch_prefix =~ s/\.$//;
1677 $self{branch} = $branch_names->{$branch_prefix}
1678 if $branch_names->{$branch_prefix};
1680 # If there's anything in the @branch_roots array, then this
1681 # revision is the root of at least one branch. We'll display
1682 # them as branch names instead of revision numbers, the
1683 # substitution for which is done directly in the array:
1684 $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1685 if @$branch_roots;
1687 if ( exists $symbolic_names->{$revision} ) {
1688 $self{tags} = delete $symbolic_names->{$revision};
1689 &main::delta_check($time, $self{tags});
1692 bless \%self, $class;
1695 sub filename { $_[0]->{filename} }
1696 sub dir_key { $_[0]->{dir_key} }
1697 sub revision { $_[0]->{revision} }
1698 sub branch { $_[0]->{branch} }
1699 sub state { $_[0]->{state} }
1700 sub lines { $_[0]->{lines} }
1701 sub roots { $_[0]->{roots} }
1703 sub tags { $_[0]->{tags} }
1704 sub tags_exists {
1705 exists $_[0]->{tags};
1708 # This may someday be used in a more sophisticated calculation of what other
1709 # files are involved in this commit. For now, we don't use it much except for
1710 # delta mode, because the common-commit-detection algorithm is hypothesized to
1711 # be "good enough" as it stands.
1712 sub time { $_[0]->{time} }
1714 package main;
1716 # Subrs ----------------------------------------------------------------------
1718 sub delta_check {
1719 my ($time, $tags) = @_;
1721 # If we're in 'delta' mode, update the latest observed times for the
1722 # beginning and ending tags, and when we get around to printing output, we
1723 # will simply restrict ourselves to that timeframe...
1724 return
1725 unless $Delta_Mode;
1727 $Delta_StartTime = $time
1728 if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags;
1730 $Delta_EndTime = $time
1731 if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags;
1734 sub run_ext {
1735 my ($cmd) = @_;
1736 $cmd = [$cmd]
1737 unless ref $cmd;
1738 local $" = ' ';
1739 my $out = qx"@$cmd 2>&1";
1740 my $rv = $?;
1741 my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
1742 return $out, $exit, $sig, $core;
1745 # -------------------------------------
1747 # If accumulating, grab the boundary date from pre-existing ChangeLog.
1748 sub maybe_grab_accumulation_date {
1749 if (! $Cumulative || $Update) {
1750 return '';
1753 # else
1755 open (LOG, "$Log_File_Name")
1756 or die ("trouble opening $Log_File_Name for reading ($!)");
1758 my $boundary_date;
1759 while (<LOG>)
1761 if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/)
1763 $boundary_date = "$1";
1764 last;
1768 close (LOG);
1770 # convert time from utc to local timezone if the ChangeLog has
1771 # dates/times in utc
1772 if ($UTC_Times && $boundary_date)
1774 # convert the utc time to a time value
1775 my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
1776 m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
1777 my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
1778 # print the timevalue in the local timezone
1779 my ($ignore,$wday);
1780 ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
1781 $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
1782 $year+1900,$mon+1,$mday,$hour,$min);
1785 return $boundary_date;
1788 # -------------------------------------
1790 sub maybe_read_user_map_file {
1791 my %expansions;
1792 my $User_Map_Input;
1794 if ($User_Map_File)
1796 if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
1797 !-f $User_Map_File )
1799 my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
1800 $User_Map_Input = "$rsh $1 'cat $2' |";
1801 &debug ("(run \"${User_Map_Input}\")\n");
1803 else
1805 $User_Map_Input = "<$User_Map_File";
1808 open (MAPFILE, $User_Map_Input)
1809 or die ("Unable to open $User_Map_File ($!)");
1811 while (<MAPFILE>)
1813 next if /^\s*#/; # Skip comment lines.
1814 next if not /:/; # Skip lines without colons.
1816 # It is now safe to split on ':'.
1817 my ($username, $expansion) = split ':';
1818 chomp $expansion;
1819 $expansion =~ s/^'(.*)'$/$1/;
1820 $expansion =~ s/^"(.*)"$/$1/;
1822 # If it looks like the expansion has a real name already, then
1823 # we toss the username we got from CVS log. Otherwise, keep
1824 # it to use in combination with the email address.
1826 if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
1827 # Also, add angle brackets if none present
1828 if (! ($expansion =~ /<\S+@\S+>/)) {
1829 $expansions{$username} = "$username <$expansion>";
1831 else {
1832 $expansions{$username} = "$username $expansion";
1835 else {
1836 $expansions{$username} = $expansion;
1838 } # fi ($User_Map_File)
1840 close (MAPFILE);
1843 if (defined $User_Passwd_File)
1845 if ( ! defined $Domain ) {
1846 if ( -e MAILNAME ) {
1847 chomp($Domain = slurp_file(MAILNAME));
1848 } else {
1849 MAILDOMAIN_CMD:
1850 for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
1851 my ($text, $exit, $sig, $core) = run_ext($_);
1852 if ( $exit == 0 && $sig == 0 && $core == 0 ) {
1853 chomp $text;
1854 if ( length $text ) {
1855 $Domain = $text;
1856 last MAILDOMAIN_CMD;
1863 die "No mail domain found\n"
1864 unless defined $Domain;
1866 open (MAPFILE, "<$User_Passwd_File")
1867 or die ("Unable to open $User_Passwd_File ($!)");
1868 while (<MAPFILE>)
1870 # all lines are valid
1871 my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
1872 my $expansion = '';
1873 ($expansion) = split (',', $gecos)
1874 if defined $gecos && length $gecos;
1876 my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
1877 $expansions{$username} = "$expansion <$mailname>";
1879 close (MAPFILE);
1882 return %expansions;
1885 # -------------------------------------
1887 sub read_file_path {
1888 my ($line) = @_;
1890 my $path;
1892 if ( $line =~ /^Working file: (.*)/ ) {
1893 $path = $1;
1894 } elsif ( defined $RCS_Root
1896 $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
1897 $path = $1;
1898 $path =~ s!Attic/!!;
1899 } else {
1900 return;
1903 if ( @Ignore_Files ) {
1904 my $base;
1905 ($base, undef, undef) = fileparse($path);
1907 my $xpath = $Case_Insensitive ? lc($path) : $path;
1908 if ( grep index($path, $_) > -1, @Ignore_Files ) {
1909 return;
1913 return $path;
1916 # -------------------------------------
1918 sub read_symbolic_name {
1919 my ($line, $branch_names, $branch_numbers, $symbolic_names) = @_;
1921 # All tag names are listed with whitespace in front in cvs log
1922 # output; so if see non-whitespace, then we're done collecting.
1923 if ( /^\S/ ) {
1924 return 0;
1925 } else {
1926 # we're looking at a tag name, so parse & store it
1928 # According to the Cederqvist manual, in node "Tags", tag names must start
1929 # with an uppercase or lowercase letter and can contain uppercase and
1930 # lowercase letters, digits, `-', and `_'. However, it's not our place to
1931 # enforce that, so we'll allow anything CVS hands us to be a tag:
1932 my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
1934 # A branch number either has an odd number of digit sections
1935 # (and hence an even number of dots), or has ".0." as the
1936 # second-to-last digit section. Test for these conditions.
1937 my $real_branch_rev = '';
1938 if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/ # Even number of dots...
1940 $tag_rev !~ /^(1\.)+1$/ ) { # ...but not "1.[1.]1"
1941 $real_branch_rev = $tag_rev;
1942 } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) { # Has ".0."
1943 $real_branch_rev = $1 . $3;
1946 # If we got a branch, record its number.
1947 if ( $real_branch_rev ) {
1948 $branch_names->{$real_branch_rev} = $tag_name;
1949 if ( @Follow_Branches ) {
1950 if ( grep $_ eq $tag_name, @Follow_Branches ) {
1951 $branch_numbers->{$tag_name} = $real_branch_rev;
1954 } else {
1955 # Else it's just a regular (non-branch) tag.
1956 push @{$symbolic_names->{$tag_rev}}, $tag_name;
1960 return 1;
1963 # -------------------------------------
1965 sub read_revision {
1966 my ($line, $branch_numbers) = @_;
1968 my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
1970 return
1971 unless $revision;
1973 return $revision
1974 unless @Follow_Branches;
1976 foreach my $branch (@Follow_Branches) {
1977 # Special case for following trunk revisions
1978 return $revision
1979 if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1981 if ( my $branch_number = $branch_numbers->{$branch} ) {
1982 # Are we on one of the follow branches or an ancestor of same?
1984 # If this revision is a prefix of the branch number, or possibly is less
1985 # in the minormost number, OR if this branch number is a prefix of the
1986 # revision, then yes. Otherwise, no.
1988 # So below, we determine if any of those conditions are met.
1990 # Trivial case: is this revision on the branch? (Compare this way to
1991 # avoid regexps that screw up Emacs indentation, argh.)
1992 if ( substr($revision, 0, (length($branch_number) + 1))
1994 ($branch_number . ".") ) {
1995 return $revision;
1996 } elsif ( length($branch_number) > length($revision)
1998 $No_Ancestors ) {
1999 # Non-trivial case: check if rev is ancestral to branch
2001 # r_left still has the trailing "."
2002 my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
2004 # b_left still has trailing "."
2005 # b_mid has no trailing "."
2006 my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
2008 return $revision
2009 if $r_left eq $b_left and $r_end <= $b_mid;
2014 # Else we are following branches, but this revision isn't on the
2015 # path. So skip it.
2016 return;
2019 # -------------------------------------
2021 { # Closure over %gecos_warned
2022 my %gecos_warned;
2023 sub read_date_author_and_state {
2024 my ($line, $usermap) = @_;
2026 my ($time, $author, $state, $lines) = parse_date_author_and_state($line);
2028 if ( defined($usermap->{$author}) and $usermap->{$author} ) {
2029 $author = $usermap->{$author};
2030 } elsif ( defined $Domain or $Gecos == 1 ) {
2031 my $email = $author;
2032 $email = $author."@".$Domain
2033 if defined $Domain && $Domain ne '';
2035 my $pw = getpwnam($author);
2036 my ($fullname, $office, $workphone, $homephone);
2037 if ( defined $pw ) {
2038 ($fullname, $office, $workphone, $homephone) =
2039 split /\s*,\s*/, $pw->gecos;
2040 } else {
2041 warn "Couldn't find gecos info for author '$author'\n"
2042 unless $gecos_warned{$author}++;
2043 $fullname = '';
2045 for (grep defined, $fullname, $office, $workphone, $homephone) {
2046 s/&/ucfirst(lc($pw->name))/ge;
2048 $author = $fullname . " <" . $email . ">"
2049 if $fullname ne '';
2052 return $time, $author, $state, $lines;
2056 # -------------------------------------
2058 sub read_branches {
2059 my ($line) = @_;
2061 if ( $Show_Branches ) {
2062 my $lst = $1;
2063 $lst =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1
2064 if ( $lst ) {
2065 return split (/;\s+/, $lst);
2066 } else {
2067 return;
2069 } else {
2070 # Ugh. This really bothers me. Suppose we see a log entry
2071 # like this:
2073 # ----------------------------
2074 # revision 1.1
2075 # date: 1999/10/17 03:07:38; author: jrandom; state: Exp;
2076 # branches: 1.1.2;
2077 # Intended first line of log message begins here.
2078 # ----------------------------
2080 # The question is, how we can tell the difference between that
2081 # log message and a *two*-line log message whose first line is
2083 # "branches: 1.1.2;"
2085 # See the problem? The output of "cvs log" is inherently
2086 # ambiguous.
2088 # For now, we punt: we liberally assume that people don't
2089 # write log messages like that, and just toss a "branches:"
2090 # line if we see it but are not showing branches. I hope no
2091 # one ever loses real log data because of this.
2092 return;
2096 # -------------------------------------
2098 sub read_changelog {
2099 my ($command) = @_;
2101 my $grand_poobah = CVS::Utils::ChangeLog::EntrySet->new;
2103 my $file_full_path;
2104 my $detected_file_separator;
2105 my $author;
2106 my $revision;
2107 my $time;
2108 my $state;
2109 my $lines;
2110 my $msg_txt;
2112 # We might be expanding usernames
2113 my %usermap = maybe_read_user_map_file;
2115 # In general, it's probably not very maintainable to use state
2116 # variables like this to tell the loop what it's doing at any given
2117 # moment, but this is only the first one, and if we never have more
2118 # than a few of these, it's okay.
2119 my $collecting_symbolic_names = 0;
2120 my %symbolic_names; # Where tag names get stored.
2121 my %branch_names; # We'll grab branch names while we're at it.
2122 my %branch_numbers; # Save some revisions for @Follow_Branches
2123 my @branch_roots; # For showing which files are branch ancestors.
2125 if (! $Input_From_Stdin) {
2126 my $Log_Source_Command = join(' ', @$command);
2127 &debug ("(run \"${Log_Source_Command}\")\n");
2128 open (LOG_SOURCE, "$Log_Source_Command |")
2129 or die "unable to run \"${Log_Source_Command}\"";
2131 else {
2132 open (LOG_SOURCE, "-") or die "unable to open stdin for reading";
2135 binmode LOG_SOURCE;
2137 XX_Log_Source:
2138 while (<LOG_SOURCE>) {
2139 # Canonicalize line endings
2140 s/\r$//;
2142 # If on a new file and don't see filename, skip until we find it, and
2143 # when we find it, grab it.
2144 if ( ! defined $file_full_path ) {
2145 $file_full_path = read_file_path($_);
2146 next XX_Log_Source;
2147 } elsif ( /^symbolic names:$/ ) {
2148 # Collect tag names in case we're asked to print them in the output.
2149 $collecting_symbolic_names = 1;
2150 next XX_Log_Source; # There's no more info on this line, so skip to next
2151 } elsif ($collecting_symbolic_names) {
2152 $collecting_symbolic_names =
2153 read_symbolic_name($_,
2154 \(%branch_names, %branch_numbers, %symbolic_names));
2155 next XX_Log_Source;
2158 # If have file name, but not revision, and see revision, then grab
2159 # it. (We collect unconditionally, even though we may or may not
2160 # ever use it.)
2161 if ( ( ! defined $revision) ) {
2162 $revision = read_revision($_, \%branch_numbers);
2163 # This breaks, because files with no messages don't get to call clear
2164 # and so the file picks up messages from the next file in sequence
2165 # next XX_Log_Source;
2168 # If we don't have a revision right now, we couldn't possibly
2169 # be looking at anything useful.
2170 if (! (defined ($revision))) {
2171 $detected_file_separator = /^$file_separator$/o;
2172 if ($detected_file_separator) {
2173 # No revisions for this file; can happen, e.g. "cvs log -d DATE"
2174 goto XX_Clear;
2176 else {
2177 next XX_Log_Source;
2181 # If have file name but not date and author, and see date or
2182 # author, then grab them:
2183 unless (defined $time) {
2184 if (/^date: .*/) {
2185 ($time, $author, $state, $lines) =
2186 read_date_author_and_state($_, \%usermap);
2187 } else {
2188 $detected_file_separator = /^$file_separator$/o;
2189 goto XX_Clear
2190 # No revisions for this file; can happen, e.g. "cvs log -d DATE"
2191 if $detected_file_separator;
2194 # If the date/time/author hasn't been found yet, we couldn't
2195 # possibly care about anything we see. So skip:
2196 next XX_Log_Source;
2199 # A "branches: ..." line here indicates that one or more branches
2200 # are rooted at this revision. If we're showing branches, then we
2201 # want to show that fact as well, so we collect all the branches
2202 # that this is the latest ancestor of and store them in
2203 # @branch_roots. Just for reference, the format of the line we're
2204 # seeing at this point is:
2206 # branches: 1.5.2; 1.5.4; ...;
2208 # Okay, here goes:
2209 if ( /^branches:\s+(.*);$/ ) {
2210 @branch_roots = read_branches($_);
2211 next XX_Log_Source;
2214 # If have file name, time, and author, then we're just grabbing
2215 # log message texts:
2216 $detected_file_separator = /^$file_separator$/o;
2217 if ($detected_file_separator && ! (defined $revision)) {
2218 # No revisions for this file; can happen, e.g. "cvs log -d DATE"
2219 goto XX_Clear;
2221 unless ($detected_file_separator || /^$logmsg_separator$/o)
2223 $msg_txt .= $_; # Normally, just accumulate the message...
2224 next XX_Log_Source;
2226 # ... until a msg separator is encountered:
2227 # Ensure the message contains something:
2228 if ((! $msg_txt)
2229 || ($msg_txt =~ /^\s*\.\s*$|^\s*$/)
2230 || ($msg_txt =~ /\*\*\* empty log message \*\*\*/))
2232 if ($Prune_Empty_Msgs) {
2233 goto XX_Clear;
2235 # else
2236 $msg_txt = "[no log message]\n";
2239 ### Store it all in the Grand Poobah:
2241 my $qunk = CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
2242 $state, $lines,
2243 \%branch_names, \@branch_roots,
2244 \%symbolic_names);
2246 # We might be including revision numbers and/or tags and/or
2247 # branch names in the output. Most of the code from here to
2248 # loop-end deals with organizing these in qunk.
2250 unless ( $Hide_Branch_Additions
2252 $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
2253 # Add this file to the list
2254 # (We use many spoonfuls of autovivication magic. Hashes and arrays
2255 # will spring into existence if they aren't there already.)
2257 &debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
2259 # Store with the files in this commit. Later we'll loop through
2260 # again, making sure that revisions with the same log message
2261 # and nearby commit times are grouped together as one commit.
2262 $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
2263 CVS::Utils::ChangeLog::Message->new($msg_txt)
2264 unless exists $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt};
2265 $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
2269 XX_Clear:
2270 # Make way for the next message
2271 undef $msg_txt;
2272 undef $time;
2273 undef $revision;
2274 undef $author;
2275 undef @branch_roots;
2277 # Maybe even make way for the next file:
2278 if ($detected_file_separator) {
2279 undef $file_full_path;
2280 undef %branch_names;
2281 undef %branch_numbers;
2282 undef %symbolic_names;
2286 close LOG_SOURCE
2287 or die sprintf("Problem reading log input (exit/signal/core: %d/%d/%d)\n",
2288 $? >> 8, $? & 127, $? & 128);
2290 return $grand_poobah;
2293 # -------------------------------------
2295 # Fills up a ChangeLog structure in the current directory.
2296 sub derive_changelog {
2297 my ($command) = @_;
2299 # See "The Plan" above for a full explanation.
2301 # Might be adding to an existing ChangeLog
2302 my $accumulation_date = maybe_grab_accumulation_date;
2303 if ($accumulation_date) {
2304 # Insert -d immediately after 'cvs log'
2305 my $Log_Date_Command = "-d\'>${accumulation_date}\'";
2307 my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2308 splice @$command, $log_index+1, 0, $Log_Date_Command;
2309 &debug ("(adding log msg starting from $accumulation_date)\n");
2312 # output_changelog(read_changelog($command));
2313 read_changelog($command)->output_changelog;
2316 # -------------------------------------
2318 sub parse_date_author_and_state {
2319 # Parses the date/time and author out of a line like:
2321 # date: 1999/02/19 23:29:05; author: apharris; state: Exp;
2323 my $line = shift;
2325 my ($year, $mon, $mday, $hours, $min, $secs, $author, $state, $rest) =
2326 $line =~
2327 m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);\s+state:\s+([^;]+);(.*)#
2328 or die "Couldn't parse date ``$line''";
2329 die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258);
2330 # Kinda arbitrary, but useful as a sanity check
2331 my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900);
2332 my $lines;
2333 if ( $rest =~ m#\s+lines:\s+(.*)# )
2335 $lines =$1;
2337 return ($time, $author, $state, $lines);
2340 # -------------------------------------
2342 sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2344 # -------------------------------------
2346 sub common_path_prefix {
2347 my ($path1, $path2) = @_;
2349 # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2350 # terms, and mould windoze filenames to match. Is this really appropriate?
2351 # If a file is checked in under UN*X, and cvs log run on windoze, which way
2352 # do the path separators slope? Can we use fileparse as per the local
2353 # conventions? If so, we should probably have a user option to specify an
2354 # OS to emulate to handle stdin-fed logs. If we did this, we could avoid
2355 # the nasty \-/ transmogrification below.
2357 my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2359 # Transmogrify Windows filenames to look like Unix.
2360 # (It is far more likely that someone is running cvs2cl.pl under
2361 # Windows than that they would genuinely have backslashes in their
2362 # filenames.)
2363 tr!\\!/!
2364 for $dir1, $dir2;
2366 my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2368 my @path1 = grep length($_), split qr!/!, $dir1;
2369 my @path2 = grep length($_), split qr!/!, $dir2;
2371 my @common_path;
2372 for (0..min($#path1,$#path2)) {
2373 if ( $path1[$_] eq $path2[$_]) {
2374 push @common_path, $path1[$_];
2375 } else {
2376 last;
2380 return join '', map "$_/", @common_path;
2383 # -------------------------------------
2384 sub parse_options {
2385 # Check this internally before setting the global variable.
2386 my $output_file;
2388 # If this gets set, we encountered unknown options and will exit at
2389 # the end of this subroutine.
2390 my $exit_with_admonishment = 0;
2392 # command to generate the log
2393 my @log_source_command = qw( cvs log );
2395 my (@Global_Opts, @Local_Opts);
2397 Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2398 pass_through no_ignore_case ));
2399 GetOptions('help|usage|h' => \$Print_Usage,
2400 'debug' => \$Debug, # unadvertised option, heh
2401 'version' => \$Print_Version,
2403 'file|f=s' => \$output_file,
2404 'accum' => \$Cumulative,
2405 'update' => \$Update,
2406 'fsf' => \$FSF_Style,
2407 'rcs=s' => \$RCS_Root,
2408 'usermap|U=s' => \$User_Map_File,
2409 'gecos' => \$Gecos,
2410 'domain=s' => \$Domain,
2411 'passwd=s' => \$User_Passwd_File,
2412 'window|W=i' => \$Max_Checkin_Duration,
2413 'chrono' => \$Chronological_Order,
2414 'ignore|I=s' => \@Ignore_Files,
2415 'case-insensitive|C' => \$Case_Insensitive,
2416 'regexp|R=s' => \$Regexp_Gate,
2417 'stdin' => \$Input_From_Stdin,
2418 'stdout' => \$Output_To_Stdout,
2419 'distributed|d' => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2420 'prune|P' => \$Prune_Empty_Msgs,
2421 'no-wrap' => \$No_Wrap,
2422 'gmt|utc' => \$UTC_Times,
2423 'day-of-week|w' => \$Show_Day_Of_Week,
2424 'revisions|r' => \$Show_Revisions,
2425 'show-dead' => \$Show_Dead,
2426 'tags|t' => \$Show_Tags,
2427 'tagdates|T' => \$Show_Tag_Dates,
2428 'branches|b' => \$Show_Branches,
2429 'follow|F=s' => \@Follow_Branches,
2430 'xml-encoding=s' => \$XML_Encoding,
2431 'xml' => \$XML_Output,
2432 'noxmlns' => \$No_XML_Namespace,
2433 'no-xml-iso-date' => \$No_XML_ISO_Date,
2434 'no-ancestors' => \$No_Ancestors,
2436 'no-indent' => sub {
2437 $Indent = '';
2440 'summary' => sub {
2441 $Summary = 1;
2442 $After_Header = "\n\n"; # Summary implies --separate-header
2445 'no-times' => sub {
2446 $Show_Times = 0;
2449 'no-hide-branch-additions' => sub {
2450 $Hide_Branch_Additions = 0;
2453 'no-common-dir' => sub {
2454 $Common_Dir = 0;
2457 'ignore-tag=s' => sub {
2458 $ignore_tags{$_[1]} = 1;
2461 'show-tag=s' => sub {
2462 $show_tags{$_[1]} = 1;
2465 # Deliberately undocumented. This is not a public interface, and
2466 # may change/disappear at any time.
2467 'test-code=s' => \$TestCode,
2469 'delta=s' => sub {
2470 my $arg = $_[1];
2471 if ( $arg =~
2472 /^([A-Za-z][A-Za-z0-9_\-]*):([A-Za-z][A-Za-z0-9_\-]*)$/ ) {
2473 $Delta_From = $1;
2474 $Delta_To = $2;
2475 $Delta_Mode = 1;
2476 } else {
2477 die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2481 'FSF' => sub {
2482 $Show_Times = 0;
2483 $Common_Dir = 0;
2484 $No_Extra_Indent = 1;
2485 $Indent = "\t";
2488 'header=s' => sub {
2489 my $narg = $_[1];
2490 $ChangeLog_Header = &slurp_file ($narg);
2491 if (! defined ($ChangeLog_Header)) {
2492 $ChangeLog_Header = '';
2496 'global-opts|g=s' => sub {
2497 my $narg = $_[1];
2498 push @Global_Opts, $narg;
2499 splice @log_source_command, 1, 0, $narg;
2502 'log-opts|l=s' => sub {
2503 my $narg = $_[1];
2504 push @Local_Opts, $narg;
2505 push @log_source_command, $narg;
2508 'mailname=s' => sub {
2509 my $narg = $_[1];
2510 warn "--mailname is deprecated; please use --domain instead\n";
2511 $Domain = $narg;
2514 'separate-header|S' => sub {
2515 $After_Header = "\n\n";
2516 $No_Extra_Indent = 1;
2519 'group-within-date' => sub {
2520 $GroupWithinDate = 1;
2521 $Show_Times = 0;
2524 'hide-filenames' => sub {
2525 $Hide_Filenames = 1;
2526 $After_Header = '';
2529 or die "options parsing failed\n";
2531 push @log_source_command, map "'$_'", @ARGV;
2533 ## Check for contradictions...
2535 if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2536 print STDERR "cannot pass both --stdout and --distributed\n";
2537 $exit_with_admonishment = 1;
2540 if ($Output_To_Stdout && $output_file) {
2541 print STDERR "cannot pass both --stdout and --file\n";
2542 $exit_with_admonishment = 1;
2545 if ($Input_From_Stdin && @Global_Opts) {
2546 print STDERR "cannot pass both --stdin and -g\n";
2547 $exit_with_admonishment = 1;
2550 if ($Input_From_Stdin && @Local_Opts) {
2551 print STDERR "cannot pass both --stdin and -l\n";
2552 $exit_with_admonishment = 1;
2555 if ($XML_Output && $Cumulative) {
2556 print STDERR "cannot pass both --xml and --accum\n";
2557 $exit_with_admonishment = 1;
2560 # Other consistency checks and option-driven logic
2562 # Bleargh. Compensate for a deficiency of custom wrapping.
2563 if ( ($After_Header ne " ") and $FSF_Style ) {
2564 $After_Header .= "\t";
2567 @Ignore_Files = map lc, @Ignore_Files
2568 if $Case_Insensitive;
2570 # Or if any other error message has already been printed out, we
2571 # just leave now:
2572 if ($exit_with_admonishment) {
2573 &usage ();
2574 exit (1);
2576 elsif ($Print_Usage) {
2577 &usage ();
2578 exit (0);
2580 elsif ($Print_Version) {
2581 &version ();
2582 exit (0);
2585 ## Else no problems, so proceed.
2587 if ($output_file) {
2588 $Log_File_Name = $output_file;
2591 return \@log_source_command;
2594 # -------------------------------------
2596 sub slurp_file {
2597 my $filename = shift || die ("no filename passed to slurp_file()");
2598 my $retstr;
2600 open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2601 my $saved_sep = $/;
2602 undef $/;
2603 $retstr = <SLURPEE>;
2604 $/ = $saved_sep;
2605 close (SLURPEE);
2606 return $retstr;
2609 # -------------------------------------
2611 sub debug {
2612 if ($Debug) {
2613 my $msg = shift;
2614 print STDERR $msg;
2618 # -------------------------------------
2620 sub version {
2621 print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2624 # -------------------------------------
2626 sub usage {
2627 &version ();
2629 eval "use Pod::Usage qw( pod2usage )";
2631 if ( $@ ) {
2632 print <<'END';
2634 * Pod::Usage was not found. The formatting may be suboptimal. Consider
2635 upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2636 versions of perl prior to 5.6 are getting rather rusty, now. Alternatively,
2637 install Pod::Usage direct from CPAN.
2640 local $/ = undef;
2641 my $message = <DATA>;
2642 $message =~ s/^=(head1|item) //gm;
2643 $message =~ s/^=(over|back).*\n//gm;
2644 $message =~ s/\n{3,}/\n\n/g;
2645 print $message;
2646 } else {
2647 print "\n";
2648 pod2usage( -exitval => 'NOEXIT',
2649 -verbose => 1,
2650 -output => \*STDOUT,
2654 return;
2657 # Main -----------------------------------------------------------------------
2659 my $log_source_command = parse_options;
2660 if ( defined $TestCode ) {
2661 eval $TestCode;
2662 die "Eval failed: '$@'\n"
2663 if $@;
2664 } else {
2665 derive_changelog($log_source_command);
2668 __DATA__
2670 =head1 NAME
2672 cvs2cl.pl - convert cvs log messages to changelogs
2674 =head1 SYNOPSIS
2676 B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2678 =head1 DESCRIPTION
2680 cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2681 running "cvs log" and parsing the output. Duplicate log messages get
2682 unified in the Right Way.
2684 The default output of cvs2cl is designed to be compact, formally unambiguous,
2685 but still easy for humans to read. It should be largely self-explanatory; the
2686 one abbreviation that might not be obvious is "utags". That stands for
2687 "universal tags" -- a universal tag is one held by all the files in a given
2688 change entry.
2690 If you need output that's easy for a program to parse, use the B<--xml> option.
2691 Note that with XML output, just about all available information is included
2692 with each change entry, whether you asked for it or not, on the theory that
2693 your parser can ignore anything it's not looking for.
2695 If filenames are given as arguments cvs2cl only shows log information for the
2696 named files.
2698 =head1 OPTIONS
2700 =over 4
2702 =item B<-h>, B<-help>, B<--help>, B<-?>
2704 Show a short help and exit.
2706 =item B<--version>
2708 Show version and exit.
2710 =item B<-r>, B<--revisions>
2712 Show revision numbers in output.
2714 =item B<-b>, B<--branches>
2716 Show branch names in revisions when possible.
2718 =item B<-t>, B<--tags>
2720 Show tags (symbolic names) in output.
2722 =item B<-T>, B<--tagdates>
2724 Show tags in output on their first occurance.
2726 =item B<--show-dead>
2728 Show dead files.
2730 =item B<--stdin>
2732 Read from stdin, don't run cvs log.
2734 =item B<--stdout>
2736 Output to stdout not to ChangeLog.
2738 =item B<-d>, B<--distributed>
2740 Put ChangeLogs in subdirs.
2742 =item B<-f> I<FILE>, B<--file> I<FILE>
2744 Write to I<FILE> instead of ChangeLog.
2746 =item B<--fsf>
2748 Use this if log data is in FSF ChangeLog style.
2750 =item B<--FSF>
2752 Attempt strict FSF-standard compatible output.
2754 =item B<-W> I<SECS>, B<--window> I<SECS>
2756 Window of time within which log entries unify.
2758 =item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2760 Expand usernames to email addresses from I<UFILE>.
2762 =item B<--passwd> I<PASSWORDFILE>
2764 Use system passwd file for user name expansion. If no mail domain is provided
2765 (via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2766 -d>, B<dnsdomainname>, or B<domain-name>. cvs2cl exits with an error if none of
2767 those options is successful. Use a domain of '' to prevent the addition of a
2768 mail domain.
2770 =item B<--domain> I<DOMAIN>
2772 Domain to build email addresses from.
2774 =item B<--gecos>
2776 Get user information from GECOS data.
2778 =item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2780 Include only entries that match I<REGEXP>. This option may be used multiple
2781 times.
2783 =item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2785 Ignore files whose names match I<REGEXP>. This option may be used multiple
2786 times.
2788 =item B<-C>, B<--case-insensitive>
2790 Any regexp matching is done case-insensitively.
2792 =item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2794 Show only revisions on or ancestral to I<BRANCH>.
2796 =item B<--no-ancestors>
2798 When using B<-F>, only track changes since the I<BRANCH> started.
2800 =item B<--no-hide-branch-additions>
2802 By default, entries generated by cvs for a file added on a branch (a dead 1.1
2803 entry) are not shown. This flag reverses that action.
2805 =item B<-S>, B<--separate-header>
2807 Blank line between each header and log message.
2809 =item B<--summary>
2811 Add CVS change summary information.
2813 =item B<--no-wrap>
2815 Don't auto-wrap log message (recommend B<-S> also).
2817 =item B<--no-indent>
2819 Don't indent log message
2821 =item B<--gmt>, B<--utc>
2823 Show times in GMT/UTC instead of local time.
2825 =item B<--accum>
2827 Add to an existing ChangeLog (incompatible with B<--xml>).
2829 =item B<-w>, B<--day-of-week>
2831 Show day of week.
2833 =item B<--no-times>
2835 Don't show times in output.
2837 =item B<--chrono>
2839 Output log in chronological order (default is reverse chronological order).
2841 =item B<--header> I<FILE>
2843 Get ChangeLog header from I<FILE> ("B<->" means stdin).
2845 =item B<--xml>
2847 Output XML instead of ChangeLog format.
2849 =item B<--xml-encoding> I<ENCODING.>
2851 Insert encoding clause in XML header.
2853 =item B<--noxmlns>
2855 Don't include xmlns= attribute in root element.
2857 =item B<--hide-filenames>
2859 Don't show filenames (ignored for XML output).
2861 =item B<--no-common-dir>
2863 Don't shorten directory names from filenames.
2865 =item B<--rcs> I<CVSROOT>
2867 Handle filenames from raw RCS, for instance those produced by "cvs rlog"
2868 output, stripping the prefix I<CVSROOT>.
2870 =item B<-P>, B<--prune>
2872 Don't show empty log messages.
2874 =item B<--ignore-tag> I<TAG>
2876 Ignore individual changes that are associated with a given tag.
2877 May be repeated, if so, changes that are associated with any of
2878 the given tags are ignored.
2880 =item B<--show-tag> I<TAG>
2882 Log only individual changes that are associated with a given
2883 tag. May be repeated, if so, changes that are associated with
2884 any of the given tags are logged.
2886 =item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
2888 Attempt a delta between two tags (since I<FROM_TAG> up to and
2889 including I<TO_TAG>). The algorithm is a simple date-based one
2890 (this is a hard problem) so results are imperfect.
2892 =item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
2894 Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
2896 =item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
2898 Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
2900 =back
2902 Notes about the options and arguments:
2904 =over 4
2906 =item *
2908 The B<-I> and B<-F> options may appear multiple times.
2910 =item *
2912 To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works). This is
2913 okay because no would ever, ever be crazy enough to name a branch "trunk",
2914 right? Right.
2916 =item *
2918 For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
2919 each line of I<UFILE> looks like this:
2921 jrandom:jrandom@red-bean.com
2923 or maybe even like this
2925 jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
2927 Don't forget to quote the portion after the colon if necessary.
2929 =item *
2931 Many people want to filter by date. To do so, invoke cvs2cl.pl like this:
2933 cvs2cl.pl -l "-d'DATESPEC'"
2935 where DATESPEC is any date specification valid for "cvs log -d". (Note that
2936 CVS 1.10.7 and below requires there be no space between -d and its argument).
2938 =item *
2940 Dates/times are interpreted in the local time zone.
2942 =item *
2944 Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
2945 spaces as argument separators.
2947 =item *
2949 See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
2950 systems) for more information.
2952 =item *
2954 Note that the rules for quoting under windows shells are different.
2956 =back
2958 =head1 EXAMPLES
2960 Some examples (working on UNIX shells):
2962 # logs after 6th March, 2003 (inclusive)
2963 cvs2cl.pl -l "-d'>2003-03-06'"
2964 # logs after 4:34PM 6th March, 2003 (inclusive)
2965 cvs2cl.pl -l "-d'>2003-03-06 16:34'"
2966 # logs between 4:46PM 6th March, 2003 (exclusive) and
2967 # 4:34PM 6th March, 2003 (inclusive)
2968 cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
2970 Some examples (on non-UNIX shells):
2972 # Reported to work on windows xp/2000
2973 cvs2cl.pl -l "-d"">2003-10-18;today<"""
2975 =head1 AUTHORS
2977 =over 4
2979 =item Karl Fogel
2981 =item Melissa O'Neal
2983 =item Martyn J. Pearce
2985 =back
2987 Contributions from
2989 =over 4
2991 =item Mike Ayers
2993 =item Tim Bradshaw
2995 =item Richard Broberg
2997 =item Nathan Bryant
2999 =item Oswald Buddenhagen
3001 =item Arthur de Jong
3003 =item Mark W. Eichin
3005 =item Dave Elcock
3007 =item Reid Ellis
3009 =item Simon Josefsson
3011 =item Robin Hugh Johnson
3013 =item Terry Kane
3015 =item Akos Kiss
3017 =item Claus Klein
3019 =item Eddie Kohler
3021 =item Richard Laager
3023 =item Kevin Lilly
3025 =item Karl-Heinz Marbaise
3027 =item Mitsuaki Masuhara
3029 =item Henrik Nordstrom
3031 =item Joe Orton
3033 =item Peter Palfrader
3035 =item Thomas Parmelan
3037 =item Johanne Stezenbach
3039 =item Joseph Walton
3041 =item Ernie Zapata
3043 =back
3045 =head1 BUGS
3047 Please report bugs to C<bug-cvs2cl@red-bean.com>.
3049 =head1 PREREQUISITES
3051 This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>. It
3052 also seems to require C<Perl 5.004_04> or higher.
3054 =head1 OPERATING SYSTEM COMPATIBILITY
3056 Should work on any OS.
3058 =head1 SCRIPT CATEGORIES
3060 Version_Control/CVS
3062 =head1 COPYRIGHT
3064 (C) 2001,2002,2003,2004 Martyn J. Pearce <fluffy@cpan.org>, under the GNU GPL.
3066 (C) 1999 Karl Fogel <kfogel@red-bean.com>, under the GNU GPL.
3068 cvs2cl.pl is free software; you can redistribute it and/or modify
3069 it under the terms of the GNU General Public License as published by
3070 the Free Software Foundation; either version 2, or (at your option)
3071 any later version.
3073 cvs2cl.pl is distributed in the hope that it will be useful,
3074 but WITHOUT ANY WARRANTY; without even the implied warranty of
3075 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3076 GNU General Public License for more details.
3078 You may have received a copy of the GNU General Public License
3079 along with cvs2cl.pl; see the file COPYING. If not, write to the
3080 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3081 Boston, MA 02111-1307, USA.
3083 =head1 SEE ALSO
3085 cvs(1)