tools/dev/contribulyze.py

   1 #!/usr/bin/env python
   2
   3 # See usage() for details, or run with --help option.
   4 #
   5 #          .-------------------------------------------------.
   6 #          |  "An ad hoc format deserves an ad hoc parser."  |
   7 #          `-------------------------------------------------'
   8 #
   9 # Some Subversion project log messages include parseable data to help
  10 # track who's contributing what.  The exact syntax is described in
  11 # hacking.html#crediting, but here's an example, indented by three
  12 # spaces, i.e., the "Patch by:" starts at the beginning of a line:
  13 #
  14 #    Patch by: David Anderson <david.anderson@calixo.net>
  15 #              <justin@erenkrantz.com>
  16 #              me
  17 #    (I wrote the regression tests.)
  18 #    Found by: Phineas T. Phinder <phtph@ph1nderz.com>
  19 #    Suggested by: Snosbig Q. Ptermione <sqptermione@example.com>
  20 #    Review by: Justin Erenkrantz <justin@erenkrantz.com>
  21 #               rooneg
  22 #    (They caught an off-by-one error in the main loop.)
  23 #
  24 # This is a pathological example, but it shows all the things we might
  25 # need to parse.  We need to:
  26 #
  27 #   - Detect the officially-approved "WORD by: " fields.
  28 #   - Grab every name (one per line) in each field.
  29 #   - Handle names in various formats, unifying where possible.
  30 #   - Expand "me" to the committer name for this revision.
  31 #   - Associate a parenthetical aside following a field with that field.
  32 #
  33 # NOTES: You might be wondering, why not take 'svn log --xml' input?
  34 # Well, that would be the Right Thing to do, but in practice this was
  35 # a lot easier to whip up for straight 'svn log' output.  I'd have no
  36 # objection to it being rewritten to take XML input.
  37
  38 import os
  39 import sys
  40 import re
  41 import getopt
  42 try:
  43   my_getopt = getopt.gnu_getopt
  44 except AttributeError:
  45   my_getopt = getopt.getopt
  46 from urllib import quote as url_encode
  47
  48 # Pretend we have true booleans on older python versions
  49 try:
  50   True
  51 except:
  52   True = 1
  53   False = 0
  54
  55 # Warnings and errors start with these strings.  They are typically
  56 # followed by a colon and a space, as in "%s: " ==> "WARNING: ".
  57 warning_prefix = 'WARNING'
  58 error_prefix = 'ERROR'
  59
  60 def complain(msg, fatal=False):
  61   """Print MSG as a warning, or if FATAL is true, print it as an error
  62   and exit."""
  63   prefix = 'WARNING: '
  64   if fatal:
  65     prefix = 'ERROR: '
  66   sys.stderr.write(prefix + msg + '\n')
  67   if fatal:
  68     sys.exit(1)
  69
  70
  71 def html_spam_guard(addr, entities_only=False):
  72   """Return a spam-protected version of email ADDR that renders the
  73   same in HTML as the original address.  If ENTITIES_ONLY, use a less
  74   thorough mangling scheme involving entities only, avoiding the use
  75   of tags."""
  76   if entities_only:
  77     def mangle(x):
  78       return "&#%d;" % ord (x)
  79   else:
  80     def mangle(x):
  81       return "<span>&#%d;</span>" % ord(x)
  82   return "".join(map(mangle, addr))
  83
  84
  85 def escape_html(str):
  86   """Return an HTML-escaped version of STR."""
  87   return str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
  88
  89
  90 _spam_guard_in_html_block_re = re.compile(r'&lt;([^&]*@[^&]*)&gt;')
  91 def _spam_guard_in_html_block_func(m):
  92   return "&lt;%s&gt;" % html_spam_guard(m.group(1))
  93 def spam_guard_in_html_block(str):
  94   """Take a block of HTML data, and run html_spam_guard() on parts of it."""
  95   return _spam_guard_in_html_block_re.subn(_spam_guard_in_html_block_func,
  96                                            str)[0]
  97
  98 def html_header(title, page_heading=None):
  99   """Write HTML file header.  TITLE and PAGE_HEADING parameters are
 100   expected to already by HTML-escaped if needed."""
 101   if not page_heading:
 102     page_heading = title
 103   s  = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n'
 104   s += ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n'
 105   s += '<html><head>\n'
 106   s += '<meta http-equiv="Content-Type"'
 107   s += ' content="text/html; charset=UTF-8" />\n'
 108   s += '<title>%s</title>\n' % title
 109   s += '</head>\n\n'
 110   s += '<body style="text-color: black; background-color: white">\n\n'
 111   s += '<h1 style="text-align: center">%s</h1>\n\n' % page_heading
 112   s += '<hr />\n\n'
 113   return s
 114
 115
 116 def html_footer():
 117   return '\n</body>\n</html>\n'
 118
 119
 120 class Contributor:
 121   # Map contributor names to contributor instances, so that there
 122   # exists exactly one instance associated with a given name.
 123   # Fold names with email addresses.  That is, if we see someone
 124   # listed first with just an email address, but later with a real
 125   # name and that same email address together, we create only one
 126   # instance, and store it under both the email and the real name.
 127   all_contributors = { }
 128
 129   # See __hash__() for why this is necessary.
 130   hash_value = 1
 131
 132   def __init__(self, username, real_name, email):
 133     """Instantiate a contributor.  Don't use this to generate a
 134     Contributor for an external caller, though, use .get() instead."""
 135     self.real_name = real_name
 136     self.username  = username
 137     self.email     = email
 138     self.is_committer = False       # Assume not until hear otherwise.
 139     self.is_full_committer = False  # Assume not until hear otherwise.
 140     # Map verbs (e.g., "Patch", "Suggested", "Review") to lists of
 141     # LogMessage objects.  For example, the log messages stored under
 142     # "Patch" represent all the revisions for which this contributor
 143     # contributed a patch.
 144     self.activities = { }
 145     # Sigh.
 146     self.unique_hash_value = Contributor.hash_value
 147     Contributor.hash_value += 1
 148
 149   def add_activity(self, field_name, log):
 150     """Record that this contributor was active in FIELD_NAME in LOG."""
 151     logs = self.activities.get(field_name)
 152     if not logs:
 153       logs = [ ]
 154       self.activities[field_name] = logs
 155     if not log in logs:
 156       logs.append(log)
 157
 158   def get(username, real_name, email):
 159     """If this contributor is already registered, just return it;
 160     otherwise, register it then return it.  Hint: use parse() to
 161     generate the arguments."""
 162     c = None
 163     for key in username, real_name, email:
 164       if key and Contributor.all_contributors.has_key(key):
 165         c = Contributor.all_contributors[key]
 166         break
 167     # If we didn't get a Contributor, create one now.
 168     if not c:
 169       c = Contributor(username, real_name, email)
 170     # If we know identifying information that the Contributor lacks,
 171     # then give it to the Contributor now.
 172     if username:
 173       if not c.username:
 174         c.username = username
 175       Contributor.all_contributors[username]  = c
 176     if real_name:
 177       if not c.real_name:
 178         c.real_name = real_name
 179       Contributor.all_contributors[real_name] = c
 180     if email:
 181       if not c.email:
 182         c.email = email
 183       Contributor.all_contributors[email]     = c
 184     # This Contributor has never been in better shape; return it.
 185     return c
 186   get = staticmethod(get)
 187
 188   def score(self):
 189     """Return a contribution score for this contributor."""
 190     # Right now we count a patch as 2, anything else as 1.
 191     score = 0
 192     for activity in self.activities.keys():
 193       if activity == 'Patch':
 194         score += len(self.activities[activity]) * 2
 195       else:
 196         score += len(self.activities[activity])
 197     return score
 198
 199   def score_str(self):
 200     """Return a contribution score HTML string for this contributor."""
 201     patch_score = 0
 202     other_score = 0
 203     for activity in self.activities.keys():
 204       if activity == 'Patch':
 205         patch_score += len(self.activities[activity])
 206       else:
 207         other_score += len(self.activities[activity])
 208     if patch_score == 0:
 209       patch_str = ""
 210     elif patch_score == 1:
 211       patch_str = "1&nbsp;patch"
 212     else:
 213       patch_str = "%d&nbsp;patches" % patch_score
 214     if other_score == 0:
 215       other_str = ""
 216     elif other_score == 1:
 217       other_str = "1&nbsp;non-patch"
 218     else:
 219       other_str = "%d&nbsp;non-patches" % other_score
 220     if patch_str:
 221       if other_str:
 222         return ",&nbsp;".join((patch_str, other_str))
 223       else:
 224         return patch_str
 225     else:
 226       return other_str
 227
 228   def __cmp__(self, other):
 229     if self.is_full_committer and not other.is_full_committer:
 230       return 1
 231     if other.is_full_committer and not self.is_full_committer:
 232       return -1
 233     result = cmp(self.score(), other.score())
 234     if result == 0:
 235       return cmp(self.big_name(), other.big_name())
 236     else:
 237       return 0 - result
 238
 239   def __hash__(self):
 240     """See LogMessage.__hash__() for why this exists."""
 241     return self.hash_value
 242
 243   def parse(name):
 244     """Parse NAME, which can be
 245
 246        - A committer username, or
 247        - A space-separated real name, or
 248        - A space-separated real name followed by an email address in
 249            angle brackets, or
 250        - Just an email address in angle brackets.
 251
 252      (The email address may have '@' disguised as '{_AT_}'.)
 253
 254      Return a tuple of (committer_username, real_name, email_address)
 255      any of which can be None if not available in NAME."""
 256     username  = None
 257     real_name = None
 258     email     = None
 259     name_components = name.split()
 260     if len(name_components) == 1:
 261       name = name_components[0] # Effectively, name = name.strip()
 262       if name[0] == '<' and name[-1] == '>':
 263         email = name[1:-1]
 264       elif name.find('@') != -1 or name.find('{_AT_}') != -1:
 265         email = name
 266       else:
 267         username = name
 268     elif name_components[-1][0] == '<' and name_components[-1][-1] == '>':
 269       real_name = ' '.join(name_components[0:-1])
 270       email = name_components[-1][1:-1]
 271     else:
 272       real_name = ' '.join(name_components)
 273
 274     if email is not None:
 275       # We unobfuscate here and work with the '@' internally, since
 276       # we'll obfuscate it again (differently) before writing it out.
 277       email = email.replace('{_AT_}', '@')
 278
 279     return username, real_name, email
 280   parse = staticmethod(parse)
 281
 282   def canonical_name(self):
 283     """Return a canonical name for this contributor.  The canonical
 284     name may or may not be based on the contributor's actual email
 285     address.
 286
 287     The canonical name will not contain filename-unsafe characters.
 288
 289     This method is guaranteed to return the same canonical name every
 290     time only if no further contributions are recorded from this
 291     contributor after the first call.  This is because a contribution
 292     may bring a new form of the contributor's name, one which affects
 293     the algorithm used to construct canonical names."""
 294     retval = None
 295     if self.username:
 296       retval = self.username
 297     elif self.email:
 298       # Take some rudimentary steps to shorten the email address, to
 299       # make it more manageable.  If this is ever discovered to result
 300       # in collisions, we can always just use to the full address.
 301       try:
 302         at_posn = self.email.index('@')
 303         first_dot_after_at = self.email.index('.', at_posn)
 304         retval = self.email[0:first_dot_after_at]
 305       except ValueError:
 306         retval = self.email
 307     elif self.real_name:
 308       # Last resort: construct canonical name based on real name.
 309       retval = ''.join(self.real_name.lower().split(' '))
 310     if retval is None:
 311       complain('Unable to construct a canonical name for Contributor.', True)
 312     return url_encode(retval, safe="!#$&'()+,;<=>@[]^`{}~")
 313
 314   def big_name(self, html=False, html_eo=False):
 315     """Return as complete a name as possible for this contributor.
 316     If HTML, then call html_spam_guard() on email addresses.
 317     If HTML_EO, then do the same, but specifying entities_only mode."""
 318     html = html or html_eo
 319     name_bits = []
 320     if self.real_name:
 321       if html:
 322         name_bits.append(escape_html(self.real_name))
 323       else:
 324         name_bits.append(self.real_name)
 325     if self.email:
 326       if not self.real_name and not self.username:
 327         name_bits.append(self.email)
 328       elif html:
 329         name_bits.append("&lt;%s&gt;" % html_spam_guard(self.email, html_eo))
 330       else:
 331         name_bits.append("<%s>" % self.email)
 332     if self.username:
 333       if not self.real_name and not self.email:
 334         name_bits.append(self.username)
 335       else:
 336         name_bits.append("(%s)" % self.username)
 337     return " ".join(name_bits)
 338
 339   def __str__(self):
 340     s = 'CONTRIBUTOR: '
 341     s += self.big_name()
 342     s += "\ncanonical name: '%s'" % self.canonical_name()
 343     if len(self.activities) > 0:
 344       s += '\n   '
 345     for activity in self.activities.keys():
 346       val = self.activities[activity]
 347       s += '[%s:' % activity
 348       for log in val:
 349         s += ' %s' % log.revision
 350       s += ']'
 351     return s
 352
 353   def html_out(self, revision_url_pattern, filename):
 354     """Create an HTML file named FILENAME, showing all the revisions in which
 355     this contributor was active."""
 356     out = open(filename, 'w')
 357     out.write(html_header(self.big_name(html_eo=True),
 358                           self.big_name(html=True)))
 359     unique_logs = { }
 360
 361     sorted_activities = self.activities.keys()
 362     sorted_activities.sort()
 363
 364     out.write('<div class="h2" id="activities" title="activities">\n\n')
 365     out.write('<table border="1">\n')
 366     out.write('<tr>\n')
 367     for activity in sorted_activities:
 368       out.write('<td>%s</td>\n\n' % activity)
 369     out.write('</tr>\n')
 370     out.write('<tr>\n')
 371     for activity in sorted_activities:
 372       out.write('<td>\n')
 373       first_activity = True
 374       for log in self.activities[activity]:
 375         s = ',\n'
 376         if first_activity:
 377           s = ''
 378           first_activity = False
 379         out.write('%s<a href="#%s">%s</a>' % (s, log.revision, log.revision))
 380         unique_logs[log] = True
 381       out.write('</td>\n')
 382     out.write('</tr>\n')
 383     out.write('</table>\n\n')
 384     out.write('</div>\n\n')
 385
 386     sorted_logs = unique_logs.keys()
 387     sorted_logs.sort()
 388     for log in sorted_logs:
 389       out.write('<hr />\n')
 390       out.write('<div class="h3" id="%s" title="%s">\n' % (log.revision,
 391                                                            log.revision))
 392       out.write('<pre>\n')
 393       if revision_url_pattern:
 394         revision_url = revision_url_pattern % log.revision[1:]
 395         revision = '<a href="%s">%s</a>' \
 396             % (escape_html(revision_url), log.revision)
 397       else:
 398         revision = log.revision
 399       out.write('<b>%s | %s | %s</b>\n\n' % (revision,
 400                                              escape_html(log.committer),
 401                                              escape_html(log.date)))
 402       out.write(spam_guard_in_html_block(escape_html(log.message)))
 403       out.write('</pre>\n')
 404       out.write('</div>\n\n')
 405     out.write('<hr />\n')
 406
 407     out.write(html_footer())
 408     out.close()
 409
 410
 411 class Field:
 412   """One field in one log message."""
 413   def __init__(self, name, alias = None):
 414     # The name of this field (e.g., "Patch", "Review", etc).
 415     self.name = name
 416     # An alias for the name of this field (e.g., "Reviewed").
 417     self.alias = alias
 418     # A list of contributor objects, in the order in which they were
 419     # encountered in the field.
 420     self.contributors = [ ]
 421     # Any parenthesized asides immediately following the field.  The
 422     # parentheses and trailing newline are left on.  In theory, this
 423     # supports concatenation of consecutive asides.  In practice, the
 424     # parser only detects the first one anyway, because additional
 425     # ones are very uncommon and furthermore by that point one should
 426     # probably be looking at the full log message.
 427     self.addendum = ''
 428   def add_contributor(self, contributor):
 429     self.contributors.append(contributor)
 430   def add_endum(self, addendum):
 431     self.addendum += addendum
 432   def __str__(self):
 433     s = 'FIELD: %s (%d contributors)\n' % (self.name, len(self.contributors))
 434     for contributor in self.contributors:
 435       s += str(contributor) + '\n'
 436     s += self.addendum
 437     return s
 438
 439
 440 class LogMessage:
 441   # Maps revision strings (e.g., "r12345") onto LogMessage instances,
 442   # holding all the LogMessage instances ever created.
 443   all_logs = { }
 444   # Keep track of youngest rev.
 445   max_revnum = 0
 446   def __init__(self, revision, committer, date):
 447     """Instantiate a log message.  All arguments are strings,
 448     including REVISION, which should retain its leading 'r'."""
 449     self.revision = revision
 450     self.committer = committer
 451     self.date = date
 452     self.message = ''
 453     # Map field names (e.g., "Patch", "Review", "Suggested") onto
 454     # Field objects.
 455     self.fields = { }
 456     if LogMessage.all_logs.has_key(revision):
 457       complain("Revision '%s' seen more than once" % revision, True)
 458     LogMessage.all_logs[revision] = self
 459     rev_as_number = int(revision[1:])
 460     if rev_as_number > LogMessage.max_revnum:
 461        LogMessage.max_revnum = rev_as_number
 462   def add_field(self, field):
 463     self.fields[field.name] = field
 464   def accum(self, line):
 465     """Accumulate one more line of raw message."""
 466     self.message += line
 467
 468   def __cmp__(self, other):
 469     """Compare two log messages by revision number, for sort().
 470     Return -1, 0 or 1 depending on whether a > b, a == b, or a < b.
 471     Note that this is reversed from normal sorting behavior, but it's
 472     what we want for reverse chronological ordering of revisions."""
 473     a = int(self.revision[1:])
 474     b = int(other.revision[1:])
 475     if a > b: return -1
 476     if a < b: return 1
 477     else:     return 0
 478
 479   def __hash__(self):
 480     """I don't really understand why defining __cmp__() but not
 481     __hash__() renders an object type unfit to be a dictionary key,
 482     especially in light of the recommendation that if a class defines
 483     mutable objects and implements __cmp__() or __eq__(), then it
 484     should not implement __hash__().  See these for details:
 485     http://mail.python.org/pipermail/python-dev/2004-February/042580.html
 486     http://mail.python.org/pipermail/python-bugs-list/2003-December/021314.html
 487
 488     In the meantime, I think it's safe to use the revision as a hash value."""
 489     return int(self.revision[1:])
 490
 491   def __str__(self):
 492     s = '=' * 15
 493     header = ' LOG: %s | %s ' % (self.revision, self.committer)
 494     s += header
 495     s += '=' * 15
 496     s += '\n'
 497     for field_name in self.fields.keys():
 498       s += str(self.fields[field_name]) + '\n'
 499     s += '-' * 15
 500     s += '-' * len(header)
 501     s += '-' * 15
 502     s += '\n'
 503     return s
 504
 505
 506 \f
 507 ### Code to parse the logs. ##
 508
 509 log_separator = '-' * 72 + '\n'
 510 log_header_re = re.compile\
 511                 ('^(r[0-9]+) \| ([^|]+) \| ([^|]+) \| ([0-9]+)[^0-9]')
 512 field_re = re.compile('^(Patch|Review(ed)?|Suggested|Found) by:\s*(.*)')
 513 field_aliases = { 'Reviewed' : 'Review' }
 514 parenthetical_aside_re = re.compile('^\s*\(.*\)\s*$')
 515
 516 def graze(input):
 517   just_saw_separator = False
 518
 519   while True:
 520     line = input.readline()
 521     if line == '': break
 522     if line == log_separator:
 523       if just_saw_separator:
 524         sys.stderr.write('Two separators in a row.\n')
 525         sys.exit(1)
 526       else:
 527         just_saw_separator = True
 528         num_lines = None
 529         continue
 530     else:
 531       if just_saw_separator:
 532         m = log_header_re.match(line)
 533         if not m:
 534           sys.stderr.write('Could not match log message header.\n')
 535           sys.stderr.write('Line was:\n')
 536           sys.stderr.write("'%s'\n" % line)
 537           sys.exit(1)
 538         else:
 539           log = LogMessage(m.group(1), m.group(2), m.group(3))
 540           num_lines = int(m.group(4))
 541           just_saw_separator = False
 542           line = input.readline()
 543           # Handle 'svn log -v' by waiting for the blank line.
 544           while line != '\n':
 545             line = input.readline()
 546           # Parse the log message.
 547           field = None
 548           while num_lines > 0:
 549             line = input.readline()
 550             log.accum(line)
 551             m = field_re.match(line)
 552             if m:
 553               # We're on the first line of a field.  Parse the field.
 554               while m:
 555                 if not field:
 556                   ident = m.group(1)
 557                   if field_aliases.has_key(ident):
 558                     field = Field(field_aliases[ident], ident)
 559                   else:
 560                     field = Field(ident)
 561                 # Each line begins either with "WORD by:", or with whitespace.
 562                 in_field_re = re.compile('^('
 563                                          + (field.alias or field.name)
 564                                          + ' by:\s+|\s+)([^\s(].*)')
 565                 m = in_field_re.match(line)
 566                 user, real, email = Contributor.parse(m.group(2))
 567                 if user == 'me':
 568                   user = log.committer
 569                 c = Contributor.get(user, real, email)
 570                 c.add_activity(field.name, log)
 571                 field.add_contributor(c)
 572                 line = input.readline()
 573                 log.accum(line)
 574                 num_lines -= 1
 575                 m = in_field_re.match(line)
 576                 if not m:
 577                   m = field_re.match(line)
 578                   if not m:
 579                     aside_match = parenthetical_aside_re.match(line)
 580                     if aside_match:
 581                       field.add_endum(line)
 582                   log.add_field(field)
 583                   field = None
 584             num_lines -= 1
 585         continue
 586
 587 index_introduction = '''
 588 <p>The following list of contributors and their contributions is meant
 589 to help us keep track of whom to consider for commit access.  The list
 590 was generated from "svn&nbsp;log" output by <a
 591 href="http://svn.collab.net/repos/svn/trunk/tools/dev/contribulyze.py"
 592 >contribulyze.py</a>, which looks for log messages that use the <a
 593 href="http://subversion.tigris.org/hacking.html#crediting">special
 594 contribution format</a>.</p>
 595
 596 <p><i>Please do not use this list as a generic guide to who has
 597 contributed what to Subversion!</i> It omits existing <a
 598 href="http://svn.collab.net/repos/svn/trunk/COMMITTERS"
 599 >full committers</a>, for example, because they are irrelevant to our
 600 search for new committers.  Also, it merely counts changes, it does
 601 not evaluate them.  To truly understand what someone has contributed,
 602 you have to read their changes in detail.  This page can only assist
 603 human judgement, not substitute for it.</p>
 604
 605 '''
 606
 607 def drop(revision_url_pattern):
 608   # Output the data.
 609   #
 610   # The data structures are all linked up nicely to one another.  You
 611   # can get all the LogMessages, and each LogMessage contains all the
 612   # Contributors involved with that commit; likewise, each Contributor
 613   # points back to all the LogMessages it contributed to.
 614   #
 615   # However, the HTML output is pretty simple right now.  It's not take
 616   # full advantage of all that cross-linking.  For each contributor, we
 617   # just create a file listing all the revisions contributed to; and we
 618   # build a master index of all contributors, each name being a link to
 619   # that contributor's individual file.  Much more is possible... but
 620   # let's just get this up and running first.
 621
 622   for key in LogMessage.all_logs.keys():
 623     # You could print out all log messages this way, if you wanted to.
 624     pass
 625     # print LogMessage.all_logs[key]
 626
 627   detail_subdir = "detail"
 628   if not os.path.exists(detail_subdir):
 629     os.mkdir(detail_subdir)
 630
 631   index = open('index.html', 'w')
 632   index.write(html_header('Contributors as of r%d' % LogMessage.max_revnum))
 633   index.write(index_introduction)
 634   index.write('<ol>\n')
 635   # The same contributor appears under multiple keys, so uniquify.
 636   seen_contributors = { }
 637   # Sorting alphabetically is acceptable, but even better would be to
 638   # sort by number of contributions, so the most active people appear at
 639   # the top -- that way we know whom to look at first for commit access
 640   # proposals.
 641   sorted_contributors = Contributor.all_contributors.values()
 642   sorted_contributors.sort()
 643   for c in sorted_contributors:
 644     if not seen_contributors.has_key(c):
 645       if c.score() > 0:
 646         if c.is_full_committer:
 647           # Don't even bother to print out full committers.  They are
 648           # a distraction from the purposes for which we're here.
 649           continue
 650         else:
 651           committerness = ''
 652           if c.is_committer:
 653             committerness = '&nbsp;(partial&nbsp;committer)'
 654           urlpath = "%s/%s.html" % (detail_subdir, c.canonical_name())
 655           fname = os.path.join(detail_subdir, "%s.html" % c.canonical_name())
 656           index.write('<li><p><a href="%s">%s</a>&nbsp;[%s]%s</p></li>\n'
 657                       % (url_encode(urlpath),
 658                          c.big_name(html=True),
 659                          c.score_str(), committerness))
 660           c.html_out(revision_url_pattern, fname)
 661     seen_contributors[c] = True
 662   index.write('</ol>\n')
 663   index.write(html_footer())
 664   index.close()
 665
 666
 667 def process_committers(committers):
 668   """Read from open file handle COMMITTERS, which should be in
 669   the same format as the Subversion 'COMMITTERS' file.  Create
 670   Contributor objects based on the contents."""
 671   line = committers.readline()
 672   while line != 'Blanket commit access:\n':
 673     line = committers.readline()
 674   in_full_committers = True
 675   matcher = re.compile('(\S+)\s+([^\(\)]+)\s+(\([^()]+\)){0,1}')
 676   line = committers.readline()
 677   while line:
 678     # Every @-sign we see after this point indicates a committer line...
 679     if line == 'Commit access for specific areas:\n':
 680       in_full_committers = False
 681     # ...except in the "dormant committers" area, which comes last anyway.
 682     if line == 'Committers who have asked to be listed as dormant:\n':
 683       in_full_committers = True
 684     elif line.find('@') >= 0:
 685       line = line.strip()
 686       m = matcher.match(line)
 687       user = m.group(1)
 688       real_and_email = m.group(2).strip()
 689       ignored, real, email = Contributor.parse(real_and_email)
 690       c = Contributor.get(user, real, email)
 691       c.is_committer = True
 692       c.is_full_committer = in_full_committers
 693     line = committers.readline()
 694
 695
 696 def usage():
 697   print 'USAGE: %s [-C COMMITTERS_FILE] < SVN_LOG_OR_LOG-V_OUTPUT' \
 698         % os.path.basename(sys.argv[0])
 699   print ''
 700   print 'Create HTML files in the current directory, rooted at index.html,'
 701   print 'in which you can browse to see who contributed what.'
 702   print ''
 703   print 'The log input should use the contribution-tracking format defined'
 704   print 'in http://subversion.tigris.org/hacking.html#crediting.'
 705   print ''
 706   print 'Options:'
 707   print ''
 708   print '  -h, -H, -?, --help   Print this usage message and exit'
 709   print '  -C FILE              Use FILE as the COMMITTERS file'
 710   print '  -U URL               Use URL as a Python interpolation pattern to'
 711   print '                       generate URLs to link revisions to some kind'
 712   print '                       of web-based viewer (e.g. ViewCVS).  The'
 713   print '                       interpolation pattern should contain exactly'
 714   print '                       one format specifier, \'%s\', which will be'
 715   print '                       replaced with the revision number.'
 716   print ''
 717
 718
 719 def main():
 720   try:
 721     opts, args = my_getopt(sys.argv[1:], 'C:U:hH?', [ 'help' ])
 722   except getopt.GetoptError, e:
 723     complain(str(e) + '\n\n')
 724     usage()
 725     sys.exit(1)
 726
 727   # Parse options.
 728   revision_url_pattern = None
 729   for opt, value in opts:
 730     if opt in ('--help', '-h', '-H', '-?'):
 731       usage()
 732       sys.exit(0)
 733     elif opt == '-C':
 734       process_committers(open(value))
 735     elif opt == '-U':
 736       revision_url_pattern = value
 737
 738   # Gather the data.
 739   graze(sys.stdin)
 740
 741   # Output the data.
 742   drop(revision_url_pattern)
 743
 744 if __name__ == '__main__':
 745   main()