lib/tools/svn-stats.py

   1 #!/usr/bin/env python
   2 # -*- coding: ascii -*-
   3
   4 # MPY SVN STATS - Subversion Repository Statistics Generator
   5 # Copyright (C) 2004 name of Maciej Pietrzak
   6 #
   7 # This program is free software; you can redistribute it and/or
   8 # modify it under the terms of the GNU General Public License
   9 # as published by the Free Software Foundation; either version 2
  10 # of the License, or (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  20
  21 """
  22 mpy-svn-stats is a simple statistics generator (log analyser) for
  23 Subversion repositories.
  24
  25 It aims to be easy to use, but still provide some interesting information.
  26
  27 It's possible that the profile of the generated stats will promote
  28 rivalisation in the project area.
  29
  30 Usage::
  31
  32     mpy-svn-stats [-h] [-o dir] <url>
  33
  34     -h      --help              - print this help message
  35     -o      --output-dir        - set output directory
  36     -i      --input             - input log file, no svn is called, - for stdin
  37             --svn-binary        - use different svn client instead of ``svn''
  38     <url>                       - repository url
  39
  40 Authors: Maciej Pietrzak, Joanna Chmiel, Marcin Mankiewicz
  41 MPY SVN STATS is licensed under GPL. See http://www.gnu.org/copyleft/gpl.html
  42 for more details.
  43 Project homepage is http://mpy-svn-stats.berlios.de/
  44 You can contact authors by email at mpietrzak@users.berlios.de
  45 """
  46
  47 __docformat__ = 'restructuredtext'
  48
  49 import sys,os,time
  50 import getopt
  51 import time, datetime
  52 import xml.dom
  53 import locale
  54 import math
  55 import datetime
  56 from cgi import escape
  57 from xml.dom.minidom import parseString
  58
  59
  60 # conditional imports
  61 try:
  62     import Image, ImageDraw, ImageFont
  63     _have_pil = True
  64 except:
  65     _have_pil = False
  66
  67
  68 # constants
  69 week_seconds = 7 * 24 * 60 * 60
  70 month_seconds = 30 * 24 * 60 * 60
  71 year_seconds = 365.25 * 24 * 60 * 60
  72
  73 def main(argv):
  74     config = Config(argv)
  75     if config.is_not_good(): return config.usage()
  76     if config.want_help(): return config.show_help()
  77
  78     stats = AllStatistics(config)
  79     stats.configure(config)
  80
  81     print "getting data"
  82     xmldata = get_data(config)
  83     print "done"
  84
  85     run_time_start = time.time()
  86
  87     print "parsing data"
  88     revision_data = RevisionData(config.get_repository_url(), parseString(xmldata))
  89     print "done"
  90
  91     print "calculating stats"
  92     stats.calculate(revision_data)
  93     print "done"
  94
  95     run_time_end = time.time()
  96
  97     print "writing data"
  98     stats.write(run_time=(run_time_end - run_time_start))
  99     print "done"
 100
 101     print "Have %d stats objects, %d of them are wanted." % (
 102         stats.count_all(),
 103         stats.count_wanted())
 104
 105 def get_data(config):
 106     """Get the analysis source data.
 107     Data source definition is in config variable.
 108     Data is obtained either by calling external svn
 109     binary or by reading from standard input.
 110     TODO: use python bindings to subversion (although
 111     it does not increase neither functionality nor
 112     security of script, so this is not critical).
 113     """
 114     if config.input_file:
 115         xml_data = get_data_from_file(config)
 116     else:
 117         xml_data = get_data_from_svn_binary(config)
 118     return xml_data
 119
 120 def get_data_from_file(config):
 121     """Read XML data (bytes) from file."""
 122     fname = config.input_file
 123     if fname == '-':
 124         f = sys.stdin
 125     else:
 126         f = file(fname)
 127     return f.read()
 128
 129 def get_data_from_svn_binary(config):
 130     svn_binary = config.get_svn_binary()
 131     svn_repository = config.get_repository_url()
 132     assert(svn_binary)
 133     assert(svn_repository)
 134     command = '%s -v --xml log %s' % (svn_binary, svn_repository)
 135     print 'running command: "%s"' % command
 136     f = os.popen(command)
 137     return f.read()
 138
 139 def generate_stats(config, data):
 140     try:
 141         dom = parseString(data)
 142     except Exception, x:
 143         print "failed to parse:\n%s\n" % data
 144         raise x
 145     return Stats(config, dom)
 146
 147 def _create_output_dir(dir):
 148     """Create output dir."""
 149     if not os.path.isdir(dir):
 150         os.mkdir(dir)
 151
 152 class Config:
 153     """This class contains all data about configuration, environment
 154     and parameters.
 155     Statistics' may choose to tune their parameters or even disable
 156     themselves based on this information.
 157     """
 158
 159     def __init__(self, argv):
 160         """Init based on argv from command line.
 161         """
 162         self._argv = argv
 163         self._broken = False
 164         self._repository = None
 165         self._want_help = False
 166         self._error_message = None
 167         self._svn_binary = 'svn'
 168         self.input_file = None
 169         self._output_dir = 'mpy-svn-stats'
 170
 171         self._enabled_stats = []
 172         self._disabled_stats = []
 173
 174         self.have_pil = _have_pil
 175         if not self.have_pil:
 176             self._print_warning_about_pil()
 177         else:
 178             print "Will generate PIL graphs."
 179
 180         try:
 181             optlist, args = getopt.getopt(
 182                 argv[1:],
 183                 'ho:i:e:',
 184                 [
 185                     'help',
 186                     'output-dir=',
 187                     'input=',
 188                     'svn-binary=',
 189                     'enable='
 190                 ])
 191         except getopt.GetoptError, e:
 192             self._broken = True
 193             self._error_message = str(e)
 194             return None
 195         #print "optlist: %s" % str(optlist)
 196         #print "args: %s" % str(args)
 197
 198         optdict = {}
 199
 200         for k,v in optlist:
 201             optdict[k] = v
 202
 203         if optdict.has_key('-h') or optdict.has_key('--help'):
 204             self._want_help = True
 205             return None
 206         if optdict.has_key('--with-diff-stats'):
 207             self._stats_to_generate.update('author_by_diff_size')
 208
 209         for key,value in optlist:
 210             if key == '-o': self._output_dir = value
 211             elif key == '--output-dir': self._output_dir = value
 212             elif key == '--svn-binary': self._svn_binary = value
 213             elif key == '-i' or key == '--input': self.input_file = value
 214
 215         if self.input_file is None:
 216             if len(args) != 1:
 217                 self._broken = True
 218                 self._repository = None
 219                 return None
 220
 221             self._repository = args[0]
 222         else:
 223             self._repository = None
 224
 225
 226         # by default we will generate stats from the beginning to now
 227         self.start_date = None
 228         self.end_date = time.time()
 229
 230     def is_not_good(self):
 231         return self._broken
 232
 233     def usage(self):
 234         if self._error_message is not None: print >>sys.stderr, 'Error: %s' % self._error_message
 235         print >>sys.stderr, 'Usage: %s [params] <repository-url>' % (self._argv[0])
 236         print >>sys.stderr, 'Use %s --help to get help.' % (self._argv[0])
 237         return -1
 238
 239     def get_repository_url(self):
 240         return self._repository
 241
 242     def get_svn_binary(self):
 243         if self.input_file:
 244             return None
 245         else:
 246             return self._svn_binary
 247
 248     def get_output_dir(self):
 249         return self._output_dir
 250
 251     def want_statistic(self, statistic_type):
 252         """Test whether statistic of type statistic_type is wanted.
 253         """
 254         if self._generate_all: return True
 255         else: return type in self._stats_to_generate
 256
 257     def want_help(self):
 258         return self._want_help
 259
 260     def show_help(self):
 261         print __doc__
 262         return None
 263
 264     def _print_warning_about_pil(self):
 265         """Print a warning."""
 266         print """Python Imagin Library could not be found - graphs are disabled."""
 267
 268
 269 class Statistic:
 270     """Abstract class for Stats' elements.
 271     """
 272
 273     wanted_by_default = True
 274     requires_graphics = False
 275
 276     def __init__(self, config, name, title):
 277         assert isinstance(name, basestring), ValueError("name must be a string, now: %s (%s)" % (
 278             repr(name),
 279             repr(type(name))))
 280         assert isinstance(title, basestring), ValueError("title must be a string")
 281         self._name = name
 282         self._title = title
 283         self._writers = {}
 284         self._wanted_output_modes = []
 285
 286     def title(self):
 287         assert(isinstance(self._title, basestring), 'Title of the statistic must be specified!')
 288         return self._title
 289
 290     def name(self):
 291         assert isinstance(self._name, basestring), ValueError('Name must be a string')
 292         return self._name
 293
 294     def is_wanted(self, mode=None):
 295         """Check if particular output mode is wanted (either by default or
 296         explicitly requested).
 297         If mode is Node, return True is there is at least one output mode.
 298         """
 299         if mode is not None:
 300             return mode in self._wanted_output_modes
 301         else:
 302             return len(self._wanted_output_modes) > 0
 303
 304     def _want_output_mode(self, name, setting=True):
 305         if setting:
 306             if name not in self._wanted_output_modes:
 307                 self._wanted_output_modes.append(name)
 308         else:
 309             if name in self._wanted_output_modes:
 310                 self._wanted_output_modes.remove(name)
 311
 312     def _set_writer(self, mode, writer):
 313         """Set writer object for mode.
 314         """
 315         assert isinstance(mode, str), ValueError("Mode must be a shor string (identifier)")
 316         assert isinstance(writer, StatisticWriter), ValueError("Writer must be a Writer instance")
 317         self._writers[mode] = writer
 318
 319     def configure(self, config):
 320         self._configure_writers(config)
 321         if self.requires_graphics and not config.have_pil:
 322             print "%s requires graphics - disabling." % str(self)
 323             self._want_output_mode('html', False)
 324
 325     def _configure_writers(self, config):
 326         for writer in self._writers.values():
 327             writer.configure(config)
 328
 329     def write(self, run_time):
 330         """Write out stats using all wanted modes."""
 331         for mode in self._wanted_output_modes:
 332             writer = self._writers[mode]
 333             writer.write(run_time=run_time)
 334
 335     def output(self, mode):
 336         writer = self._writers[mode]
 337         return writer.output()
 338
 339     def __str__(self):
 340         """Return human-readable representation."""
 341         return "Statistic(title='%(title)s', name='%(name)s')" % {
 342             'title': self.title(),
 343             'name': self.name()
 344         }
 345
 346
 347 class TableStatistic(Statistic):
 348     """A statistic that is presented as a table.
 349     """
 350     def __init__(self, config, name, title):
 351         Statistic.__init__(self, config, name, title)
 352
 353         # we want to be printed with TableHTMLWriter by default
 354         self._set_writer('html', TableHTMLWriter(self))
 355         self._want_output_mode('html')
 356
 357     def rows(self):
 358         return self._data
 359
 360     def show_numbers(self):
 361         return True
 362
 363     def show_th(self):
 364         return True
 365
 366 class GeneralStatistics(Statistic):
 367     """General (opening) statistics (like first commit, last commit, total commit count etc).
 368     Outputted by simple text.
 369     """
 370     def __init__(self, config):
 371         """Initialise."""
 372         Statistic.__init__(self, config, "general_statistics", "General statistics")
 373         self._set_writer('html', GeneralStatisticsHTMLWriter(self))
 374         self._want_output_mode('html')
 375
 376     def configure(self, config):
 377         pass
 378
 379     def calculate(self, revision_data):
 380         self._first_rev_number = revision_data.get_first_revision().get_number()
 381         self._last_rev_number = revision_data.get_last_revision().get_number()
 382         self._revision_count = len(revision_data)
 383         self._repository_url = revision_data.get_repository_url()
 384         self._first_rev_date = revision_data.get_first_revision().get_date()
 385         self._last_rev_date = revision_data.get_last_revision().get_date()
 386
 387     def get_first_rev_number(self):
 388         return self._first_rev_number
 389
 390     def get_last_rev_number(self):
 391         return self._last_rev_number
 392
 393     def get_revision_count(self):
 394         return self._revision_count
 395
 396     def get_repository_url(self):
 397         return self._repository_url
 398
 399     def get_first_rev_date(self):
 400         return self._first_rev_date
 401
 402     def get_last_rev_date(self):
 403         return self._last_rev_date
 404
 405
 406 class AuthorsByCommits(TableStatistic):
 407     """Specific statistic - show table author -> commit count sorted
 408     by commit count.
 409     """
 410     def __init__(self, config, start_date=None, end_date=None, id=None, title=None):
 411         """Generate statistics out of revision data.
 412         """
 413         if id is None:
 414             id = "authors_by_number_od_commits"
 415             if start_date:
 416                 id += "_fromdate_" + str(int(start_date))
 417             if end_date:
 418                 id += "_todate_" + str(int(end_date))
 419         if title is None:
 420             title = "Authors by total number of commits"
 421             if start_date:
 422                 title += " from " + str(start_date)
 423             if end_date:
 424                 title += " to " + str(end_date)
 425         TableStatistic.__init__(self, config, id, title)
 426         self.start_date = start_date
 427         self.end_date = end_date
 428
 429     def column_names(self):
 430         return ('Author', 'Total number of commits', 'Percentage of total commit count')
 431
 432     def configure(self, config):
 433         """Handle configuration - decide whether we are wanted/or possible to
 434         be calculated and output.
 435         """
 436
 437     def calculate(self, revision_data):
 438         """Do calculations based on revision data passed as
 439         parameter (which must be a RevisionData instance).
 440
 441         This method sets internal _data member.
 442         Output writer can then get it by calling rows().
 443         """
 444         assert isinstance(revision_data, RevisionData), ValueError(
 445             "Expected RevisionData instance, got %s", repr(revision_data)
 446         )
 447
 448         abc = {}
 449
 450         for rv in revision_data.get_revisions():
 451             if self.start_date:
 452                 if rv.get_date() < self.start_date:
 453                     continue
 454             if self.end_date:
 455                 if rv.get_date() > self.end_date:
 456                     continue
 457             author = rv.get_author()
 458             if not abc.has_key(author): abc[author] = 1
 459             else: abc[author] += 1
 460
 461         data = [(a, abc[a]) for a in abc.keys()]
 462         data.sort(lambda x,y: cmp(y[1], x[1]))
 463
 464         rows = []
 465
 466         for k,v in data:
 467             rows.append([k,
 468                     str(v),
 469                     "%.2f%%" % (float(v) * 100.0 / float(len(revision_data)))])
 470
 471         self._data = rows
 472
 473
 474 #class AuthorsByChangedPaths(TableStatistic):
 475 #    """Authors sorted by total number of changed paths.
 476 #    """
 477 #    def __init__(self, config):
 478 #        """Generate statistics out of revision data.
 479 #        """
 480 #        TableStatistic.__init__(self, config, 'authors_number_of_paths', 'Authors by total number of changed paths')
 481 #
 482 #    def configure(self, config):
 483 #        pass
 484 #
 485 #    def calculate(self, revision_data):
 486 #        """Perform calculations."""
 487 #        assert(isinstance(revision_data, RevisionData))
 488 #
 489 #        abp = {}
 490 #        max = 0
 491 #
 492 #        for rv in revision_data.get_revisions():
 493 #            author = rv.get_author()
 494 #            modified_path_count  = len(rv.get_modified_paths())
 495 #            if not abp.has_key(author): abp[author] = modified_path_count
 496 #            else: abp[author] += modified_path_count
 497 #            max += modified_path_count
 498 #
 499 #        data = [(a, abp[a]) for a in abp.keys()]
 500 #        data.sort(lambda x,y: cmp(y[1], x[1]))
 501 #
 502 #        self._data = data
 503 #
 504 #        rows = []
 505 #
 506 #        for k,v in data:
 507 #            percentage = float(v) * 100.0 / float(max)
 508 #            assert percentage >= 0.0
 509 #            assert percentage <= 100.0
 510 #            rows.append([k,
 511 #                str(v),
 512 #                "%.2f%%" % percentage])
 513 #
 514 #        self._data = rows
 515 #
 516 #    def column_names(self):
 517 #        """Return names of collumns."""
 518 #        return ('Author', 'Total number of changed paths', 'Percentage of all changed paths')
 519
 520
 521
 522 class GraphStatistic(Statistic):
 523     """This stats are presented as a graph.
 524
 525     This class holds graph abstract data.
 526     This is allways f(x) -> y graph, so
 527     there is a dict of (x,y) pairs.
 528
 529     GraphStatistic does not do any output,
 530     GraphImageHTMLWriter and possibly others
 531     translate logical data info image file.
 532     """
 533
 534     requires_graphics = True
 535
 536     _x_axis_is_time = True
 537     """Default, since most graphs are time based."""
 538
 539     def __init__(self, config, name, title):
 540         Statistic.__init__(self, config, name, title)
 541         self._set_writer('html', GraphImageHTMLWriter(self))
 542         self._want_output_mode('html')
 543
 544     def keys(self):
 545         return self._keys
 546
 547     def __getitem__(self, key):
 548         return self._data[key]
 549
 550     def get_x_range(self):
 551         return (self._min_x, self._max_x)
 552
 553     def get_y_range(self):
 554         return (self._min_y, self._max_y)
 555
 556     def x_labels(self):
 557         """Return dictionary of labels for
 558         horizontal axis of graphs.
 559         Keys should be values that are not
 560         less than self._min_x and not
 561         greater than _max_x.
 562         Values are strings that should
 563         be attached to axis.
 564
 565         Default implementation calls labels_for_time_span
 566         if self._x_axis_is_time is True, which is default.
 567             """
 568         if self._x_axis_is_time:
 569             return labels_for_time_span(
 570                 datetime.datetime.fromtimestamp(self._min_x),
 571                 datetime.datetime.fromtimestamp(self._max_x))
 572         else:
 573             return {}
 574
 575
 576 class CommitsByWeekGraphStatistic(GraphStatistic):
 577     """Graph showing number of commits by week."""
 578
 579     def __init__(self, config):
 580         """Initialise."""
 581         GraphStatistic.__init__(self, config, "commits_by_week_graph", "Number of commits in week")
 582
 583     def calculate(self, revision_data):
 584         """Calculate statistic."""
 585         assert len(self._wanted_output_modes) > 0
 586
 587         week_in_seconds = 7 * 24 * 60 * 60 * 1.0
 588
 589         start_of_week = revision_data.get_first_revision().get_date()
 590         end_of_week = start_of_week + week_in_seconds
 591
 592         self._min_x = revision_data.get_first_revision().get_date()
 593         self._max_x = revision_data.get_last_revision().get_date()
 594         self._min_y = 0
 595         self._max_y = 0
 596
 597         values = {}
 598
 599         while start_of_week < revision_data.get_last_revision().get_date():
 600             commits = revision_data.get_revisions_by_date(start_of_week, end_of_week)
 601             y = len(commits)
 602             fx = float(start_of_week+(end_of_week - start_of_week)/2)
 603             fy = float(y) * float(end_of_week - start_of_week) / float(week_in_seconds)
 604             values[fx] = fy
 605
 606             if y > self._max_y: self._max_y = y
 607
 608             start_of_week += week_in_seconds
 609             end_of_week = start_of_week + week_in_seconds
 610             if end_of_week > revision_data.get_last_revision().get_date():
 611                 end_of_week = revision_data.get_last_revision().get_date()
 612
 613         self.series_names = ['number_of_commits']
 614         self._values = {}
 615         self._values['number_of_commits'] = values
 616         self.colors = {}
 617         self.colors['number_of_commits'] = (0, 0, 0)
 618
 619     def horizontal_axis_title(self):
 620         return "Time"
 621
 622     def vertical_axis_title(self):
 623         return "Number of commits"
 624
 625
 626 #class CommitsByWeekPerUserGraphStatistic(GraphStatistic):
 627 #    """Show how many commits were made by most active
 628 #    users."""
 629 #
 630 #    def __init__(self, config):
 631 #        """Initialise."""
 632 #        self.number_of_users_to_show = 7
 633 #        GraphStatistic.__init__(self, config,
 634 #            "commits_by_week_per_user_graph",
 635 #            "Number of commits in week made by most active users")
 636 #
 637 #    def _get_users(self, revision_data):
 638 #        """Find users to be included in graph."""
 639 #        return revision_data.get_users_sorted_by_commit_count()[:self.number_of_users_to_show]
 640 #
 641 #    def _make_colors(self, users):
 642 #        """Create different colors for each values."""
 643 #        saturation = 1.0
 644 #        brightness = 0.75
 645 #        self.colors = {}
 646 #        n = 0
 647 #        for user in self.series_names:
 648 #
 649 #            hue = float(n) / float(len(self.series_names))
 650 #            n += 1
 651 #
 652 #            assert hue >= 0.0 and hue <= 1.0
 653 #
 654 #            i = int(hue * 6.0)
 655 #            f = hue * 6.0 - float(i)
 656 #            p = brightness * (1.0 - saturation)
 657 #            q = brightness * (1.0 - saturation * f)
 658 #            t = brightness * (1.0 - saturation * (1.0 - f))
 659 #
 660 #            o = {
 661 #                0: (brightness, t, p),
 662 #                1: (q, brightness, p),
 663 #                2: (p, brightness, t),
 664 #                3: (p, q, brightness),
 665 #                4: (t, p, brightness),
 666 #                5: (brightness, p, q)
 667 #            }
 668 #
 669 #            (r, g, b) = o[i]
 670 #
 671 #            assert r >= 0.0 and r <= 1.0
 672 #            assert g >= 0.0 and g <= 1.0
 673 #            assert b >= 0.0 and b <= 1.0
 674 #
 675 #            self.colors[user] = (int(r*256.0), int(g*256.0), int(b*256.0))
 676 #
 677 #    def calculate(self, revision_data):
 678 #        """Calculate statistic."""
 679 #        assert len(self._wanted_output_modes) > 0
 680 #
 681 #        users = self._get_users(revision_data)
 682 #        self.series_names = users
 683 #        self._make_colors(users)
 684 #
 685 #        week_in_seconds = 7 * 24 * 60 * 60 * 1.0
 686 #
 687 #        start_of_week = revision_data.get_first_revision().get_date()
 688 #        end_of_week = start_of_week + week_in_seconds
 689 #
 690 #        self._min_x = revision_data.get_first_revision().get_date()
 691 #        self._max_x = revision_data.get_last_revision().get_date()
 692 #        self._min_y = 0
 693 #        self._max_y = 0
 694 #        self._values = {}
 695 #
 696 #        for user in users:
 697 #            self._values[user] = {}
 698 #
 699 #        i = 1
 700 #        while start_of_week < revision_data.get_last_revision().get_date():
 701 #            for user in users:
 702 #                commits = revision_data.get_revisions_by_date(start_of_week, end_of_week)
 703 #                y = len([rv for rv in revision_data.revisions_by_users[user] if (
 704 #                    (rv.get_date() > start_of_week and rv.get_date() < end_of_week))])
 705 #                fx = float(start_of_week+(end_of_week - start_of_week)/2)
 706 #                fy = float(y) * float(end_of_week - start_of_week) / float(week_in_seconds)
 707 #
 708 #                self._values[user][fx] = fy
 709 #
 710 #                if y > self._max_y: self._max_y = y
 711 #
 712 #            start_of_week += week_in_seconds
 713 #            end_of_week = start_of_week + week_in_seconds
 714 #            if end_of_week > revision_data.get_last_revision().get_date():
 715 #                end_of_week = revision_data.get_last_revision().get_date()
 716 #            i += 1
 717 #
 718 #    def horizontal_axis_title(self):
 719 #        return "Time"
 720 #
 721 #    def vertical_axis_title(self):
 722 #        return "Number of commits"
 723
 724
 725 class GroupStatistic(Statistic):
 726     """Statistic class for grouping other statistics.
 727     Every object of this one can contain more statistics.
 728     Rendering this type of statistics means rendering
 729     all children stats, and putting it in one group
 730     (for example - in web page section).
 731     """
 732     def __init__(self, config, name, title):
 733         """Initialize internal variables. Must be called.
 734         """
 735         Statistic.__init__(self, config, name, title)
 736         self._child_stats = []
 737
 738     def __getitem__(self, number):
 739         """Get a child Statistic object."""
 740         return self._child_stats[number]
 741
 742     def append(self, statistic):
 743         """Append given statistic to child list.
 744
 745         Parameters:
 746             - statistic - must be an instance of Statistic
 747         """
 748         assert isinstance(statistic, Statistic), ValueError(
 749             "Wrong parameter, expected Statistic instance, got %s" % (
 750                 repr(statistic)))
 751
 752         self._child_stats.append(statistic)
 753
 754     def children(self):
 755         """Get children."""
 756         return self._child_stats
 757
 758     def descendants(self):
 759         d = []
 760         for child in self.children():
 761             if isinstance(child, GroupStatistic):
 762                 d += child.descendants()
 763             else:
 764                 d.append(child)
 765         return d
 766
 767     def configure(self, config):
 768         Statistic.configure(self, config)
 769         for child in self._child_stats:
 770             child.configure(config)
 771
 772     def count_all(self):
 773         """Return the total number of leaf statistics in the group/tree.
 774         That is, group statistics are not included.
 775         """
 776         total = 0
 777         for stat in self._child_stats:
 778             if isinstance(stat, GroupStatistic):
 779                 total += stat.count_all()
 780             else:
 781                 total += 1
 782         return total
 783
 784     def count_wanted(self):
 785         return len([descendant for descendant in self.descendants() if descendant.is_wanted()])
 786
 787     def calculate(self, revision_data):
 788         """Pass data to children."""
 789
 790         for child in self._child_stats:
 791             if child.is_wanted():
 792                 child.calculate(revision_data)
 793
 794
 795 class AllStatistics(GroupStatistic):
 796     """This is a special type of group statistic - it
 797     is created at startup. It should create
 798     whole statistics objects tree.
 799
 800     After that, objects are queried whether they
 801     are to be calculated, and then written
 802     out using writers.
 803     """
 804
 805     def __init__(self, config):
 806         """This constructor takes no parameters.
 807         """
 808         GroupStatistic.__init__(self, config, "mpy_svn_stats", "MPY SVN Statistics")
 809         self.append(GeneralStatistics(config))
 810         self.append(CommitsGroup(config))
 811         self.append(ChangedPathsGroup(config))
 812         self.append(LogMessageLengthGroup(config))
 813 #        self.append(AuthorsByChangedPaths(config))
 814 #        self.append(AuthorsByCommitLogSize(config))
 815 #        self.append(CommitsByWeekGraphStatistic(config))
 816         self._set_writer('html', TopLevelGroupStatisticHTMLWriter(self))
 817         self._want_output_mode('html')
 818
 819
 820 class SimpleFunctionGroup(GroupStatistic):
 821     """A statistic for measuring one function of revision for each author
 822     (for example: commit count, changed paths, log message size etc).
 823     Includes:
 824      - authors sorted by value for:
 825        * total repo life
 826        * last month
 827        * last 7 days
 828      - graph for authors
 829      - graoh for function's value
 830     """
 831
 832     class SimpleTable(TableStatistic):
 833         """Specific statistic - show table author -> commit count sorted
 834         by commit count.
 835         """
 836         def __init__(self, config, parent, start_date=None, end_date=None, subtitle=None):
 837             id = parent.id + '_simple_table'
 838             self.parent = parent
 839             if start_date:
 840                 id += "_fromdate_" + str(int(start_date))
 841             if end_date:
 842                 id += "_todate_" + str(int(end_date))
 843
 844             title = 'Table of authors'
 845             if subtitle:
 846                 title += ': ' + subtitle
 847
 848             TableStatistic.__init__(self, config, id, title)
 849             self.start_date = start_date
 850             self.end_date = end_date
 851
 852         def column_names(self):
 853             return ('Author', 'Number', 'Percentage')
 854
 855         def configure(self, config):
 856             """Handle configuration - decide whether we are wanted/or possible to
 857             be calculated and output.
 858             """
 859             pass
 860
 861         def calculate(self, revision_data):
 862             """Do calculations based on revision data passed as
 863             parameter (which must be a RevisionData instance).
 864
 865             This method sets internal _data member.
 866             Output writer can then get it by calling rows().
 867             """
 868             assert isinstance(revision_data, RevisionData), ValueError(
 869                 "Expected RevisionData instance, got %s", repr(revision_data))
 870
 871             abc = {}
 872
 873             for rv in revision_data.get_revisions():
 874                 if self.start_date:
 875                     if rv.get_date() < self.start_date:
 876                         continue
 877                 if self.end_date:
 878                     if rv.get_date() > self.end_date:
 879                         continue
 880                 # revision_function always returns author -> some_value relation (dict)
 881                 values = self.parent.revision_function(rv)
 882                 for (author, value) in values.iteritems():
 883                     if not abc.has_key(author): abc[author] = value
 884                     else: abc[author] += value
 885
 886             data = [(a, abc[a]) for a in abc.keys()]
 887             data.sort(lambda x,y: cmp(y[1], x[1]))
 888
 889             total_sum = sum(abc.values())
 890
 891             rows = []
 892
 893             for k,v in data:
 894                 percentage = float(v) * 100.0 / float(total_sum)
 895                 assert percentage >= 0.0
 896                 assert percentage <= 100.0
 897                 rows.append([k,
 898                         str(v),
 899                         "%.2f%%" % percentage])
 900
 901             self._data = rows
 902
 903
 904     class SimpleMultiAuthorGraphStatistic(GraphStatistic):
 905
 906         def __init__(self, config, parent):
 907             """Initialise."""
 908             self.parent = parent
 909             self.id = parent.id + '_multi_author_graph'
 910             self.number_of_users_to_show = 9
 911             GraphStatistic.__init__(self, config,
 912                 self.id,
 913                 "Graph for most active commiters")
 914
 915         def _get_users(self, revision_data):
 916             """Find users to be included in graph."""
 917             return revision_data.get_users_sorted_by_commit_count()[:self.number_of_users_to_show]
 918
 919         def _make_colors(self, users):
 920             """Create different colors for each values."""
 921             saturation = 1.0
 922             brightness = 0.75
 923             self.colors = {}
 924             n = 0
 925             for user in self.series_names:
 926
 927                 hue = float(n) / float(len(self.series_names))
 928                 n += 1
 929
 930                 assert hue >= 0.0 and hue <= 1.0
 931
 932                 i = int(hue * 6.0)
 933                 f = hue * 6.0 - float(i)
 934                 p = brightness * (1.0 - saturation)
 935                 q = brightness * (1.0 - saturation * f)
 936                 t = brightness * (1.0 - saturation * (1.0 - f))
 937
 938                 o = {
 939                     0: (brightness, t, p),
 940                     1: (q, brightness, p),
 941                     2: (p, brightness, t),
 942                     3: (p, q, brightness),
 943                     4: (t, p, brightness),
 944                     5: (brightness, p, q)
 945                 }
 946
 947                 (r, g, b) = o[i]
 948
 949                 assert r >= 0.0 and r <= 1.0
 950                 assert g >= 0.0 and g <= 1.0
 951                 assert b >= 0.0 and b <= 1.0
 952
 953                 self.colors[user] = (int(r*256.0), int(g*256.0), int(b*256.0))
 954
 955         def calculate(self, revision_data):
 956             """Calculate statistic."""
 957             assert len(self._wanted_output_modes) > 0
 958
 959             users = self._get_users(revision_data)
 960             self.series_names = users
 961             self._make_colors(users)
 962
 963
 964             week_in_seconds = 7 * 24 * 60 * 60 * 1.0
 965
 966             start_of_week = revision_data.get_first_revision().get_date()
 967             end_of_week = start_of_week + week_in_seconds
 968
 969             self._min_x = revision_data.get_first_revision().get_date()
 970             self._max_x = revision_data.get_last_revision().get_date()
 971             self._min_y = 0
 972             self._max_y = 0
 973             self._values = {}
 974
 975             for user in users:
 976                 self._values[user] = {}
 977
 978             i = 1
 979             while start_of_week < revision_data.get_last_revision().get_date():
 980                 for user in users:
 981                     fx = float(start_of_week+(end_of_week - start_of_week)/2)
 982                     this_week_revisions = revision_data.get_revisions_by_date(start_of_week, end_of_week)
 983                     users_revisions = [revision for revision in this_week_revisions if revision.get_author() == user]
 984
 985                     y = 0.0
 986
 987                     for revision in users_revisions:
 988                         values = self.parent.revision_function(revision)
 989                         for (author, value) in values.iteritems():
 990                             assert(author == user)
 991                             y += value
 992
 993
 994                     fy = float(y) * float(end_of_week - start_of_week) / float(week_in_seconds)
 995                     self._values[user][fx] = fy
 996                     if y > self._max_y: self._max_y = y
 997
 998                 start_of_week += week_in_seconds
 999                 end_of_week = start_of_week + week_in_seconds
1000                 if end_of_week > revision_data.get_last_revision().get_date():
1001                     end_of_week = revision_data.get_last_revision().get_date()
1002                 i += 1
1003
1004         def horizontal_axis_title(self):
1005             return "Time"
1006
1007         def vertical_axis_title(self):
1008             return self.parent.value_description
1009
1010
1011     def __init__(self, config, id, name):
1012         self.id = id
1013         self.name = name
1014         self.value_description = name
1015         GroupStatistic.__init__(self, config, id, name)
1016
1017         self.append(self.SimpleTable(config, self))
1018         self.append(self.SimpleTable(config, self,
1019             config.end_date - month_seconds, config.end_date,
1020             'Last month'))
1021         self.append(self.SimpleTable(config, self,
1022             config.end_date - week_seconds, config.end_date,
1023             'Last week'))
1024         self.append(self.SimpleMultiAuthorGraphStatistic(config, self))
1025
1026         self._set_writer('html', GroupStatisticHTMLWriter(self))
1027         self._want_output_mode('html')
1028
1029
1030 #class CommitsGroup(GroupStatistic):
1031 #    """This class defines group of statistic that shows authors with
1032 #    their commit counts."""
1033 #
1034 #    def __init__(self, config):
1035 #        """Create group contents."""
1036 #        GroupStatistic.__init__(self, config, "authors_by_commits_group", "Number of commits")
1037 #        self.append(AuthorsByCommits(config))
1038 #        self.append(AuthorsByCommits(config,
1039 #            config.end_date - month_seconds, config.end_date,
1040 #            title='Authors by commits - last month'))
1041 #        self.append(AuthorsByCommits(config,
1042 #            config.end_date - week_seconds, config.end_date,
1043 #            title='Authors by commits - last week'))
1044 #        self.append(CommitsByWeekPerUserGraphStatistic(config))
1045 #        self._set_writer('html', GroupStatisticHTMLWriter(self))
1046 #        self._want_output_mode('html')
1047
1048
1049 class CommitsGroup(SimpleFunctionGroup):
1050     """This class defines group of statistic that shows authors with
1051     their commit counts.
1052     """
1053     def __init__(self, config):
1054         SimpleFunctionGroup.__init__(self, config, 'commits_group', 'Number of commits')
1055
1056     def revision_function(self, revision):
1057         """Return a dictionary of values derived from revision."""
1058         return {revision.get_author(): 1}
1059
1060
1061 class ChangedPathsGroup(SimpleFunctionGroup):
1062     """Implementation of SimpleFunctionGroup, gives info about changed paths."""
1063
1064     def __init__(self, config):
1065         SimpleFunctionGroup.__init__(self, config, 'changed_paths', 'Number of changed paths')
1066
1067     def revision_function(self, revision):
1068         return {revision.get_author(): len(revision.get_modified_paths())}
1069
1070
1071 class LogMessageLengthGroup(SimpleFunctionGroup):
1072     """Log message length."""
1073
1074     def __init__(self, config):
1075         SimpleFunctionGroup.__init__(self, config, 'log_message_length_group', 'Log message length')
1076
1077     def revision_function(self, revision):
1078         return {
1079             revision.author: len(revision.log_message)
1080         }
1081
1082
1083 class StatisticWriter:
1084     """Abstract class for all output generators.
1085     """
1086     pass
1087
1088
1089 class HTMLWriter(StatisticWriter):
1090     """An abstract class for HTML writing."""
1091
1092     def _standard_statistic_header(self):
1093         """Make all statistic header look the same."""
1094         r = ''
1095
1096         h2 = "<h2><a name=\"%s\"></a>%s</h2>\n" % (
1097             escape(self._statistic.name()),
1098             escape(self._statistic.title())
1099         )
1100
1101         goToTopLink = "<a class=\"topLink\" href=\"#top\">top</a>\n"
1102
1103
1104         r = "<table class=\"statisticHeader\"><tr><td>%s</td><td class=\"topLink\">%s</td></table>\n\n" % (
1105             h2, goToTopLink)
1106
1107         return r
1108
1109     def _standard_statistic_footer(self):
1110         """Make all statistic header look the same."""
1111         return "<hr class=\"statisticDelimiter\"/>"
1112
1113     def configure(self, config):
1114         self.is_configured = True
1115
1116
1117 class GroupStatisticHTMLWriter(HTMLWriter):
1118     """Class for writing group statistics (abstract)."""
1119     def __init__(self, group_statistic=None):
1120         self._statistic = group_statistic
1121
1122     def set_statistic(self, statistic):
1123         self._statistic = statistic
1124
1125
1126 class TopLevelGroupStatisticHTMLWriter(GroupStatisticHTMLWriter):
1127     """Class for writing one, top level
1128     GroupStatistic.
1129     """
1130
1131     output_mode = 'html'
1132
1133     def __init__(self, statistic=None):
1134         GroupStatisticHTMLWriter.__init__(self, statistic)
1135
1136     def configure(self, config):
1137         """Configure - generally - get the output directory."""
1138         self._output_dir = config.get_output_dir()
1139
1140     def write(self, run_time):
1141         """Write out generated statistics."""
1142         _create_output_dir(self._output_dir)
1143         filename = self._output_dir + '/index.html'
1144         output_file = file(filename, "w")
1145         output_file.write(
1146             self._page_head()
1147             + self._page_body()
1148             + self._page_foot(run_time=run_time)
1149         );
1150         output_file.close()
1151
1152     def _page_head(self):
1153         """Return HTML page head."""
1154         return """\
1155     <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1156         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
1157     <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
1158     <head>
1159         <meta name="Generator" content="mpy-svn-stats v. 0.1"/>
1160         <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
1161         <title>mpy-svn-stats</title>
1162         <style type="text/css">
1163             body, td, li {
1164                 font-size: 12px;
1165             }
1166
1167             table.statistic {
1168                 width: 80%;
1169                 float: center;
1170             }
1171
1172             table.statistic tr td {
1173                 border-style: solid;
1174                 border-width: 1px;
1175                 border-color: black;
1176                 text-align: center;
1177             }
1178
1179             table.statistic tr th {
1180                 border-style: solid;
1181                 border-width: 2px;
1182                 border-color: black;
1183                 background-color: lightgray;
1184             }
1185
1186             p.foot {
1187                 font-size: 75%;
1188                 text-align: center;
1189             }
1190
1191             h1,h2,th,caption {
1192                 font-family: Arial;
1193             }
1194
1195             h1,h2 {
1196                 text-align: center;
1197                 background-color: lightgray;
1198                 font-style: italic;
1199             }
1200
1201             table.statisticHeader {
1202                 margin-left: 0;
1203                 margin-right: 0;
1204                 clear: both;
1205                 width: 100%;
1206             }
1207
1208             table.statisticHeader td {
1209                 background-color: lightgray;
1210                 border-spacing: 0px;
1211                 margin: 0px;
1212                 padding: 0px;
1213             }
1214
1215             table.statisticHeader tr td.topLink {
1216                 text-align: center;
1217                 width: 3em;
1218             }
1219
1220             table.statisticHeader td h2 {
1221                 margin-top: 1px;
1222                 margin-bottom: 1px;
1223             }
1224
1225             td.menu_column {
1226                 padding-left: 1em;
1227                 padding-right: 1em;
1228                 vertical-align: top;
1229             }
1230
1231             td.statistics_column {
1232                 vertical-align: top;
1233             }
1234
1235             .topLink a:link, .topLink a:active, .topLink a:visited {
1236                 color: black;
1237             }
1238
1239             .topLink a:hover {
1240                 color: black;
1241             }
1242
1243             a.menuLink:link, a.menuLink:active, a.menuLink:visited {
1244                 color: blue;
1245             }
1246
1247             hr.statisticDelimiter {
1248                 border-spacing: 0px;
1249                 border-width: 2px 0px 0px 0px;
1250                 border-style: solid;
1251                 margin-bottom: 40pt;
1252                 border-color: lightgray;
1253             }
1254
1255             table.legend tr {
1256             }
1257
1258             table.legend td.name {
1259                 border-width: 1px 0px 1px 1px;
1260                 border-style: solid;
1261                 border-color: black;
1262                 padding: 0.2em 1em;
1263             }
1264
1265             table.legend td.color {
1266                 border-width: 1px 1px 1px 0px;
1267                 border-style: solid;
1268                 border-color: black;
1269                 width: 4em;
1270             }
1271
1272             ul.menu {
1273                 padding-left: 0px;
1274             }
1275
1276         </style>
1277     </head>
1278     <body>
1279         <h1><a name="top"></a>mpy-svn-stats</h1>
1280     """
1281
1282     def _page_foot(self, run_time):
1283         """Return HTML page foot."""
1284         return """
1285         <hr/>
1286         <p class="foot">
1287         Stats generated by <a href="http://mpy-svn-stats.berlios.de">mpy-svn-stats</a> in %(run_time).2f seconds.
1288         </body></html>
1289         """ % {
1290             'run_time': run_time
1291         }
1292
1293     def _page_body(self):
1294         return "<table><tr><td class=\"menu_column\">%(menu_column)s</td><td>%(body_column)s</td></tr></table>" % {
1295             'menu_column': self._page_menu(),
1296             'body_column': self._page_main()
1297             }
1298
1299     def _page_menu(self):
1300         return "<ul class=\"menu\">" + self._recursive_menu(self._statistic) + "</ul>\n"
1301
1302     def _recursive_menu(self, statistic):
1303         """Return statistic as li tag.
1304         """
1305
1306         if not statistic.is_wanted(self.output_mode):
1307             return ''
1308
1309         r = ""
1310         if isinstance(statistic, GroupStatistic):
1311
1312             # count wanted children
1313             wanted_children = len([child for child in statistic.children() if child.is_wanted(self.output_mode)])
1314             if wanted_children == 0:
1315                 return ''
1316
1317             r += "<li>%s:\n<ul>\n" % statistic.title()
1318             for child in statistic.children():
1319                 r += self._recursive_menu(child)
1320
1321             r += "</ul>\n</li>\n"
1322         else:
1323             r += "<li><a class=\"menuLink\" href=\"#%s\">%s</a></li>\n" % (
1324                 statistic.name(),
1325                 statistic.title())
1326         return r
1327
1328     def _page_main(self):
1329         flat = []
1330         stack = [self._statistic]
1331
1332         while len(stack) > 0:
1333             stat = stack.pop()
1334             if not isinstance(stat, GroupStatistic):
1335                 flat.append(stat)
1336             else:
1337                 children = stat.children()
1338                 children.reverse()
1339                 stack.extend(children)
1340
1341         r = ''
1342
1343         for stat in flat:
1344             if stat.is_wanted('html'):
1345                 r += stat.output('html')
1346
1347         return r
1348
1349
1350 class TableHTMLWriter(HTMLWriter):
1351     """Output table."""
1352
1353     def __init__(self, stat):
1354         assert isinstance(stat, TableStatistic), ValueError()
1355         self._statistic = stat
1356
1357     def output(self):
1358         r = '\n'
1359         r += self._standard_statistic_header()
1360         r += "<table class=\"statistic\">\n%s\n%s\n</table>\n\n" % (
1361             self._table_header(),
1362             self._table_body())
1363         r += self._standard_statistic_footer()
1364         return r
1365
1366     def _table_header(self):
1367         r = "<tr>\n"
1368         r += "  <th>No</th>\n"
1369         for column_name in self._statistic.column_names():
1370             r += "  <th>" + escape(column_name) + "</th>\n"
1371         r += "</tr>\n"
1372         return r
1373
1374
1375     def _table_body(self):
1376         r = ''
1377         i = 1
1378         for row in self._statistic.rows():
1379             r += "<tr>\n"
1380             r += "  <td>%d</td>\n" % i
1381             for cell in row:
1382                 r += "  <td>" + escape(cell) + "</td>\n"
1383             i += 1
1384             r += "</tr>\n"
1385         return r
1386
1387
1388 class GeneralStatisticsHTMLWriter(HTMLWriter):
1389     """Specialised GeneralStatistics HTML Writer class."""
1390
1391     def __init__(self, stat):
1392         self._statistic = stat
1393
1394     def output(self):
1395         statistic = self._statistic
1396         return """
1397             <h2><a name=\"%(statistic_name)s\"></a>%(statistic_title)s</h2>
1398
1399             <p>
1400                 Statistics for repository at: <b>%(repository_url)s</b>.<br/>
1401                 Smallest revision number: %(first_rev_number)s.<br/>
1402                 Biggest revision number: %(last_rev_number)s.<br/>
1403                 Revision count: %(revision_count)s.<br/>
1404                 First revision date: %(first_rev_date)s.<br/>
1405                 Last revision date: %(last_rev_date)s.<br/>
1406                 Age of the repository (from first to last revision): %(age_of_repository)s.<br/>
1407                 Commits per year: %(commits_per_year)s.<br/>
1408                 Commits per month: %(commits_per_month)s.<br/>
1409                 Commits per day: %(commits_per_day)s.
1410             </p>
1411         """ % {
1412             'repository_url': escape(statistic.get_repository_url()),
1413             'statistic_name': escape(statistic.name()),
1414             'statistic_title': escape(statistic.title()),
1415             'revision_count': str(statistic.get_revision_count()),
1416             'first_rev_number': str(statistic.get_first_rev_number()),
1417             'last_rev_number': str(statistic.get_last_rev_number()),
1418             'first_rev_date': time.strftime('%c', time.gmtime(statistic.get_first_rev_date())),
1419             'last_rev_date': time.strftime('%c', time.gmtime(statistic.get_last_rev_date())),
1420             'age_of_repository': self._format_time_span(
1421                                             statistic.get_first_rev_date(),
1422                                             statistic.get_last_rev_date()
1423                                         ),
1424             'commits_per_year': ("%.2f" % (statistic.get_revision_count() * 365.25 * 24 * 60 * 60
1425                     / (statistic.get_last_rev_date() - statistic.get_first_rev_date()))
1426                 ),
1427             'commits_per_month': ("%.2f" % (statistic.get_revision_count() * 30 * 24 * 60 * 60
1428                     / (statistic.get_last_rev_date() - statistic.get_first_rev_date()))
1429                 ),
1430             'commits_per_day': ("%.2f" % (statistic.get_revision_count() * 24 * 60 * 60
1431                     / (statistic.get_last_rev_date() - statistic.get_first_rev_date()))
1432                 ),
1433         }
1434
1435     def _format_time_span(self, from_time, to_time):
1436         """Format time span as a string."""
1437         seconds = to_time - from_time
1438         reminder = seconds
1439         s = ''
1440
1441         steps = [
1442             ('years', 365.25 * 24 * 60 * 60),
1443             ('months', 30 * 24 * 60 * 60),
1444             ('days', 24 * 60 * 60),
1445             ('hours', 60 * 60),
1446             ('minutes', 60),
1447         ]
1448
1449         have_nonzero_step = False
1450
1451         for step in steps:
1452             n = reminder / step[1]
1453             if int(n) > 0:
1454                 have_nonzero_step = True
1455                 reminder -= int(n) * step[1]
1456                 s += '%d %s' % (int(n), step[0])
1457                 if have_nonzero_step:
1458                     if step is steps[len(steps)-1]:
1459                         s += ' and '
1460                     else:
1461                         s += ' '
1462
1463         s += '%d seconds' % int(reminder)
1464
1465         return s
1466
1467
1468 class GraphImageHTMLWriter(HTMLWriter):
1469     """A class that writes graphs to image files.
1470     Basically, a GraphStatistic contains data that
1471     makes it possible to draw a graph.
1472     That is: axis max, axis min, axis label,
1473     argument -> value pairs that define function.
1474     Also it may contain type in future releases.
1475
1476     Fields include:
1477      - _statistic - parent statistic (the one data
1478        comes from)
1479     """
1480
1481     def __init__(self, statistic):
1482         """Initialise instance. Name will be used for
1483         image filename."""
1484         assert isinstance(statistic, GraphStatistic)
1485         self._statistic = statistic
1486         self.font = ImageFont.load_default()
1487
1488     def configure(self, config):
1489         """Configure Graph Image HTML Writer."""
1490         self._image_width = 600
1491         self._image_height = 400
1492         self._margin_bottom = 125
1493         self._margin_top = 20
1494         self._margin_left = 50
1495         self._margin_right = 20
1496         self._image_dir = config.get_output_dir()
1497
1498     def get_image_fname(self):
1499         return self._image_dir + '/' + self._statistic.name() + '.png'
1500
1501     def get_image_html_src(self):
1502         return self._statistic.name() + '.png'
1503
1504     def _write_image(self):
1505         """Write image files."""
1506         image_size = (self._image_width, self._image_height)
1507         im = Image.new("RGB", image_size, 'white')
1508         draw = ImageDraw.Draw(im)
1509
1510         self._draw_axes(im, draw)
1511         self._draw_axes_labels(im, draw)
1512         self._paint_content(im, draw)
1513
1514         del draw
1515         self._save(im)
1516
1517     def _save(self, im):
1518         im.save(self.get_image_fname())
1519
1520     def _paint_content(self, im, draw):
1521         for k,values in self._statistic._values.iteritems():
1522             keys = values.keys()
1523             keys.sort()
1524             color = self._statistic.colors[k]
1525
1526             last_pair = (None, None)
1527             for key in keys:
1528                 value = values[key]
1529                 last_pair = self._plot(draw, last_pair, (key, value), color)
1530
1531     def _plot(self, draw, from_tuple, to_tuple, color='black'):
1532         if from_tuple != (None, None):
1533             (imx1, imy1) = self._graph_to_image(from_tuple)
1534             (imx2, imy2) = self._graph_to_image(to_tuple)
1535             draw.line((imx1, imy1, imx2, imy2), color)
1536         return to_tuple
1537
1538     def _graph_to_image(self, point):
1539         """Convert position from
1540         theoretical (data) coordinates to
1541         image coordinates.
1542         Points are tuples of doubles.
1543         TODO: rewrite, it's too long.
1544         """
1545         gx = float(point[0])
1546         gy = float(point[1])
1547         point = (gx, gy)
1548
1549         assert gx >= self._statistic._min_x
1550         assert gx <= self._statistic._max_x, AssertionError("bad gx: %f (should be smaller than %f)" % (gx, self._statistic._max_x))
1551         assert gy >= self._statistic._min_y
1552         assert gy <= self._statistic._max_y
1553
1554         margin_left = self._margin_left
1555         margin_right = self._margin_right
1556         margin_top = self._margin_top
1557         margin_bottom = self._margin_bottom
1558
1559         image_width = self._image_width
1560         image_height = self._image_height
1561
1562         range_x = float(self._statistic._max_x - self._statistic._min_x)
1563         range_y = float(self._statistic._max_y - self._statistic._min_y)
1564
1565         assert range_x >= 0
1566         assert range_y >= 0
1567
1568         min_x = float(self._statistic._min_x)
1569         min_y = float(self._statistic._min_y)
1570
1571         x = margin_left + (gx - min_x) * (image_width - margin_left - margin_right) / range_x
1572
1573         pcy = (gy - min_y) / range_y
1574         graph_height = image_height - margin_top - margin_bottom
1575         pxy = pcy * graph_height
1576         y = margin_top + graph_height - pxy
1577
1578         return (x, y)
1579
1580     def _draw_axes(self, image, draw):
1581         self._draw_horizontal_axis(image, draw)
1582         self._draw_vertical_axis(image, draw)
1583         self._draw_horizontal_axis_title(image, draw)
1584         self._draw_vertical_axis_title(image, draw)
1585
1586
1587     def _draw_horizontal_axis(self, image, draw):
1588         start_x = self._margin_left
1589         start_y = self._image_height - self._margin_bottom
1590         end_x = self._image_width - self._margin_right
1591         end_y = start_y
1592
1593         length = self._image_width - self._margin_left - self._margin_right
1594
1595         draw.line((start_x, start_y, end_x, end_y), '#999')
1596         draw.line((end_x, end_y, end_x - 5, end_y - 3), '#999')
1597         draw.line((end_x, end_y, end_x - 5, end_y + 3), '#999')
1598
1599     def _draw_vertical_axis(self, image, draw):
1600         start_x = self._margin_left
1601         start_y = self._image_height - self._margin_bottom
1602         end_x = start_x
1603         end_y = self._margin_top
1604
1605         draw.line((start_x, start_y, end_x, end_y), '#999')
1606         draw.line((end_x, end_y, end_x + 3, end_y + 5), '#999')
1607         draw.line((end_x, end_y, end_x - 3, end_y + 5), '#999')
1608
1609     def _draw_horizontal_axis_title(self, image, draw):
1610         text = self._statistic.horizontal_axis_title()
1611         (text_width, text_height) = draw.textsize(text, font=self.font)
1612
1613         corner_x = self._image_width - self._margin_right
1614         corner_y = self._image_height - self._margin_bottom
1615
1616         pos_x = corner_x - text_width
1617         pos_y = corner_y - 15
1618
1619         draw.text((pos_x, pos_y), text, fill='black', font=self.font)
1620
1621     def _draw_vertical_axis_title(self, image, draw):
1622         text_im_width = 300
1623         text_im_height = 200
1624
1625         textim = Image.new('RGBA',
1626             (text_im_width, text_im_height), 'white')
1627         textdraw = ImageDraw.Draw(textim)
1628
1629         text = self._statistic.vertical_axis_title()
1630         (text_width, text_height) = textdraw.textsize(text, font=self.font)
1631
1632         textdraw.text((0,0), text, fill='black', font=self.font)
1633
1634         del textdraw
1635
1636         textim = textim.crop((0, 0, text_width, text_height))
1637         textim = textim.rotate(90)
1638
1639         corner_x = self._margin_left
1640         corner_y = self._margin_top
1641
1642         pos_x = corner_x - text_height - 10
1643         pos_y = corner_y + 10
1644
1645         image.paste(textim,
1646             (
1647                 pos_x, pos_y,
1648                 pos_x + text_height,
1649                 pos_y + text_width
1650             )
1651         )
1652
1653         del textim
1654
1655     def _draw_axes_labels(self, image, draw):
1656         labels = self._statistic.x_labels()
1657         if len(labels) == 0: return
1658         #print "%s: have %d labels" % (self, len(labels))
1659         for label_datetime, label in labels.iteritems():
1660             label_position = time.mktime(label_datetime.timetuple())
1661             label_text = label.text
1662             position = self._graph_to_image( (label_position, 0) )
1663
1664             #print " putting '%s' at %s" % (label_text, position)
1665             self._draw_text(image, draw, (position[0], position[1] + 4), label_text,
1666                 angle=90)
1667             draw.line(
1668                 (int(position[0]), int(position[1] - 4), int(position[0]), int(position[1] + 2)),
1669                 'black')
1670
1671     def _draw_text(self, im, draw, position, text, fill='black', angle=0):
1672         """Create rotated text. This must be done
1673         by creating temp image, drawing text on it,
1674         rotating it and then copying it to the original
1675         image.
1676         """
1677         textsize = draw.textsize(text)
1678         tim = Image.new('RGBA', textsize, (0,0,0,0))
1679         tdraw = ImageDraw.Draw(tim)
1680
1681         tdraw.text( (0,0), text, fill=fill, font=self.font)
1682         del tdraw
1683
1684         tim = tim.rotate(-90)
1685
1686         #print position
1687         im.paste(tim, (int(position[0] - textsize[1] / 2), int(position[1])), tim)
1688
1689         del tim
1690
1691     def output(self):
1692         """Outputting."""
1693         r = ''
1694         self._write_image()
1695
1696         r += self._standard_statistic_header()
1697         r += """
1698             <p>
1699                 <img border="1" src="%(image_src)s"/>
1700             </p>
1701         """ % {
1702             'image_src': self.get_image_html_src()
1703         }
1704
1705         if len(self._statistic.colors.keys()) > 1:
1706             r += self._legend()
1707
1708         r += self._standard_statistic_footer()
1709
1710         return r
1711
1712     def _legend(self):
1713         o = ''
1714         i = 0
1715         cols = 3
1716         colors = self._statistic.colors
1717         names = self._statistic.series_names
1718
1719         o += "<table class=\"legend\">\n"
1720
1721         while True:
1722             o += "  <tr>\n"
1723             for col_num in range(0, cols):
1724
1725                 if i < len(colors.keys()):
1726
1727                     name = names[i]
1728                     (r,g,b) = colors[name]
1729
1730
1731                     color = '#%s%s%s' % (
1732                         hex(r)[2:].zfill(2),
1733                         hex(g)[2:].zfill(2),
1734                         hex(b)[2:].zfill(2))
1735
1736                     o += "    <td class=\"name\">%s</td>\n<td class=\"color\" style=\"background-color: %s\">&nbsp;</td>\n" % (
1737                         name,
1738                         color)
1739
1740                 else:
1741                     o += "    <td></td>\n<td>\n</td>"
1742
1743                 i += 1
1744
1745             o += "  </tr>\n"
1746
1747             if i >= len(colors.keys()):
1748                 break
1749
1750         o += "</table>"
1751         return o
1752
1753
1754 class RevisionData:
1755     """Data about all revisions."""
1756     def __init__(self, url, dom):
1757         """Create revision data from xml.dom.Document."""
1758
1759         self._repository_url = url
1760
1761         log = dom.childNodes[0]
1762         revisions = []
1763
1764         for logentry in log.childNodes:
1765             if logentry.nodeType != logentry.ELEMENT_NODE: continue
1766             if logentry.nodeType == logentry.ELEMENT_NODE and logentry.nodeName != 'logentry':
1767                 raise '%s found, logentry expected' % str(logentry)
1768
1769             revisions.append(RevisionInfo(logentry))
1770         self._revisions = revisions
1771         self._revisions_by_keys = {}
1772         for rv in self._revisions:
1773             self._revisions_by_keys[rv.get_revision_number()] = rv
1774
1775         self._revisions.sort(lambda r1,r2: cmp(r1.get_revision_number(), r2.get_revision_number()))
1776
1777         self._generate_user_data()
1778
1779     def _generate_user_data(self):
1780         self.users = []
1781         self.revisions_by_users = {}
1782
1783         for rv in self._revisions:
1784             user = rv.get_author()
1785             if not user in self.users:
1786                 self.users.append(user)
1787                 self.revisions_by_users[user] = []
1788             self.revisions_by_users[user].append(rv)
1789
1790         self.users_sorted_by_revision_count = self.users
1791         self.users_sorted_by_revision_count.sort(lambda u1, u2: cmp(len(self.revisions_by_users[u2]), len(self.revisions_by_users[u1])))
1792
1793     def get_users_sorted_by_commit_count(self):
1794         return self.users_sorted_by_revision_count
1795
1796     def get_revision(self, number):
1797         return self._revisions_by_keys[number]
1798
1799     def get_first_revision(self):
1800         return self._revisions[0]
1801
1802     def get_last_revision(self):
1803         return self._revisions[len(self._revisions)-1]
1804
1805     def get_repository_url(self):
1806         if self._repository_url:
1807             return self._repository_url
1808         else:
1809             return "unknown"
1810
1811     def __len__(self):
1812         return len(self._revisions)
1813
1814     def __getitem__(self, index):
1815         return self._revisions_by_keys(index)
1816
1817     def keys(self):
1818         return self._revisions_keys
1819
1820     def get_revisions(self):
1821         return self._revisions
1822
1823     def values(self):
1824         return self.get_revisions()
1825
1826     def get_revisions_by_date(self, start_date, end_date):
1827         revisions = []
1828         for rv in self.get_revisions():
1829             if start_date <= rv.get_date() < end_date:
1830                 revisions.append(rv)
1831         return revisions
1832
1833
1834 class RevisionInfo:
1835     """All known data about single revision."""
1836     def __init__(self, message):
1837         self._modified_paths = []
1838         self._parse_message(message)
1839         self._have_diffs = False
1840         self._diffs = []
1841
1842     def get_author(self):
1843         return self.author
1844
1845     def get_revision_number(self):
1846         return self._revision_number
1847
1848     def get_number(self):
1849         """Same as get_revision_number."""
1850         return self._revision_number
1851
1852     def get_modified_paths(self):
1853         return self._modified_paths
1854
1855     def get_date(self):
1856         return self._date
1857
1858     def _parse_message(self, message):
1859         assert(isinstance(message, xml.dom.Node))
1860         self.author = self._parse_author(message)
1861         self._revision_number = self._parse_revision_number(message)
1862         self._modified_paths = self._parse_paths(message)
1863         self._date = self._parse_date(message)
1864         self.log_message = self._parse_commit_log_message(message)
1865
1866     def _parse_author(self, message):
1867         """Get author out of logentry.
1868         Suprisingly, not all logentries have authors.
1869         In that case, author is set to '' (empty string).
1870         cvs2svn does that.
1871         """
1872         a = message.getElementsByTagName('author')
1873         assert len(a) <= 1, AssertionError(
1874                 'There should be at most one author in revision.\nXML is:\n%s' % (
1875                     message.toprettyxml())
1876                 )
1877         if len(a) == 1:
1878             a[0].normalize()
1879             assert(len(a[0].childNodes) == 1)
1880             return a[0].childNodes[0].data
1881         else:
1882             return ''
1883
1884     def _parse_commit_log_message(self, message):
1885         l = message.getElementsByTagName('msg')
1886         l[0].normalize()
1887         try:
1888             return l[0].childNodes[0].data
1889         except:
1890             return ''
1891
1892     def _parse_revision_number(self, message):
1893         return int(message.getAttribute('revision'))
1894
1895     def _parse_paths(self, message):
1896         path_nodes = message.getElementsByTagName('path')
1897         modified_paths = []
1898         for path_node in path_nodes:
1899             path_node.normalize()
1900             action = path_node.getAttribute('action')
1901             path = self._get_element_contents(path_node)
1902             modified_paths.append(ModifiedPath(action, path))
1903         return modified_paths
1904
1905     def _parse_date(self, message):
1906         date_element = message.getElementsByTagName('date')[0]
1907         isodate = self._get_element_contents(date_element)
1908         return time.mktime(time.strptime(isodate[:19], '%Y-%m-%dT%H:%M:%S'))
1909
1910     def _get_element_contents(self, node):
1911         assert(isinstance(node, xml.dom.Node))
1912         children = node.childNodes
1913         contents = ''
1914         for child in children:
1915             if child.nodeType == child.TEXT_NODE:
1916                 contents += child.data
1917         return contents
1918
1919     def get_revision_number(self):
1920         return self._revision_number
1921
1922
1923 class ModifiedPath:
1924     def __init__(self, action, path):
1925         assert(isinstance(action, str) and len(action) == 1,
1926             'should be one-letter string, is: %s' % str(action))
1927         assert(isinstance(path, basestring), 'should be modified path, is: %s' % path)
1928         self._action = action
1929         self._path = path
1930
1931     def get_action(self):
1932         return self._action
1933
1934     def get_path(self):
1935         return self._path
1936
1937
1938 #class AuthorsByCommitLogSize(TableStatistic):
1939 #    """Specific statistic - show table author -> commit log, sorted
1940 #    by commit log size.
1941 #    """
1942 #    def __init__(self, config):
1943 #        """Generate statistics out of revision data.
1944 #        """
1945 #        TableStatistic.__init__(self, config, 'authors_by_log_size', """Authors by total size of commit log messages""")
1946 #
1947 #    def configure(self, config):
1948 #        """Handle configuration."""
1949 #        pass
1950 #
1951 #    def column_names(self):
1952 #        return ('Author',
1953 #            'Total numer od characters in all log messages',
1954 #            'Percentage of all log messages')
1955 #
1956 #    def calculate(self, revision_data):
1957 #        """Do calculations."""
1958 #        assert(isinstance(revision_data, RevisionData))
1959 #
1960 #        abc = {}
1961 #        sum = 0
1962 #
1963 #        for rv in revision_data.get_revisions():
1964 #            author = rv.get_author()
1965 #            log = rv.get_commit_log()
1966 #            size = len(log)
1967 #            if not abc.has_key(author): abc[author] = size
1968 #            else: abc[author] += size
1969 #            sum += size
1970 #
1971 #        data = [(a, abc[a]) for a in abc.keys()]
1972 #        data.sort(lambda x,y: cmp(y[1], x[1]))
1973 #
1974 #        rows = []
1975 #
1976 #        for k,v in data:
1977 #            rows.append([k,
1978 #                str(v),
1979 #                "%2.2f%%" % (float(v) * 100.0 / float(sum))])
1980 #
1981 #        self._data = rows
1982
1983
1984 #class AuthorsByDiffSize(TableStatistic):
1985 #    """Specific statistic - shows table author -> diffs size, sorted by
1986 #    size
1987 #    """
1988 #
1989 #    wanted_by_default = False
1990 #
1991 #    def __init__(self, config, revision_data):
1992 #        """Generate statistics out of revision data and `svn diff`.
1993 #        """
1994 #        TableStatistic.__init__(self, 'author_by_diff_size', 'Authors by total size of diffs')
1995 #        assert(isinstance(revision_data, RevisionData))
1996 #
1997 #        abc = {}
1998 #
1999 #        for rv in revision_data.get_revisions():
2000 #            author = rv.get_author()
2001 #            rev_number = rv.get_revision_number()
2002 #            command = "%s -r %d:%d diff %s" % (config.get_svn_binary(),
2003 #                rev_number-1, rev_number,
2004 #                config.get_repository_url())
2005 #            f = os.popen(command)
2006 #            result = f.read()
2007 #            f.close()
2008 #            if not abc.has_key(author):
2009 #                abc[author] = (len(result), len(result.split()))
2010 #            else:
2011 #                abc[author] = (abc[author][0] + len(result), abc[author][1] + len(result.split()))
2012 #
2013 #        data = [(a, abc[a][0], abc[a][1]) for a in abc.keys()]
2014 #        data.sort(lambda x,y: cmp(y[1], x[1]))
2015 #
2016 #        self._data = data
2017 #
2018 #    def column_names(self):
2019 #        return ('Author', 'Size of diffs', 'Number of lines in diffs')
2020
2021
2022 labels_for_time_span_cache = {}
2023
2024 def labels_for_time_span(start_time, end_time, max_labels=20):
2025
2026     if labels_for_time_span_cache.has_key((start_time, end_time, max_labels)):
2027         return labels_for_time_span_cache[(start_time, end_time, max_labels)]
2028
2029     labels = {}
2030     for unit in RoundedTimeIterator.units:
2031
2032         units_labels = {}
2033         for t in RoundedTimeIterator(start_time, end_time, unit):
2034             units_labels[t] = GraphTimeLabel(t, unit)
2035
2036         labels_candidate = {}
2037         labels_candidate.update(units_labels)
2038         labels_candidate.update(labels)
2039
2040         if len(labels_candidate) > max_labels:
2041             break
2042         else:
2043             labels = labels_candidate
2044
2045     labels_for_time_span_cache[(start_time, end_time, max_labels)] = labels
2046     return labels
2047
2048 class GraphTimeLabel(object):
2049     """Handle graph's time labels.
2050     Used as a value in labels dict.
2051     Actually, created to have "weight" or
2052     "importance" attached to label - so
2053     we can draw bigger strokes with years,
2054     and smaller with days.
2055     """
2056     def __init__(self, label_datetime, unit, weight=None):
2057         """Initialise instance.
2058         Weight means "importance" of the label, for example
2059         year is more important than month and gets bigger
2060         "stroke" or "tick" on graphs axis.
2061
2062         If weight is not specified as parameter it is taken from
2063         units index from RoundedTimeIterator.
2064         """
2065         self.datetime = label_datetime
2066         tt = label_datetime.timetuple()
2067
2068         if tt[3] == 0 and tt[4] == 0 and tt[5] == 0:
2069             # only y-m-d
2070             self.text = '%04d-%02d-%02d' % (tt[0], tt[1], tt[2])
2071         else:
2072             # full
2073             self.text = '%04d-%02d-%02d %02d:%02d:%02d' % tuple(tt[0:6])
2074
2075         if weight:
2076             self.weight = weight
2077         else:
2078             self.weight = list(RoundedTimeIterator.units).index(unit)
2079
2080         self.unit = unit
2081
2082     def __str__(self):
2083         return self.text()
2084
2085
2086 class RoundedTimeIterator(object):
2087     """Provide object, that iterates over time period by some fuzzy "round"
2088     time intervals like months, weeks etc.
2089
2090     TODO: Write doctest or unit test for this to define behaviour strictly.
2091     Then, rewrite again. Problem with this is that I'm not sure what results it
2092     should give in first place.
2093     """
2094
2095     _unit_settings = {
2096         'decade': (10, 0),
2097         'fiveyears': (5, 0),
2098         'twoyears': (2, 0),
2099         'year': (1, 0),
2100         'sixmonths': (6, 1),
2101         'quarter': (3, 1),
2102         'month': (1, 1),
2103         'day': (1, 2),
2104         'hour': (1, 3),
2105         'minute': (1, 4),
2106         'second': (1, 5),
2107     }
2108
2109
2110     units = (
2111         'decade',
2112         'fiveyears',
2113         'twoyears',
2114         'year',
2115         'sixmonths',
2116         'quarter',
2117         'month',
2118         'day',
2119         'hour',
2120         'minute',
2121         'second'
2122     )
2123
2124     def __init__(self, start_datetime, end_datetime, unit):
2125         """Create object.
2126         unit is a string - name of unit.
2127         start_datetime and end_datetime are datetime.datetime objects.
2128         """
2129         if unit not in self.units:
2130             raise Exception('illegal unit value: %s' % repr(unit))
2131         assert isinstance(start_datetime, datetime.datetime)
2132         assert isinstance(end_datetime, datetime.datetime)
2133         self.unit = unit
2134         self.start_datetime = start_datetime
2135         self.end_datetime = end_datetime
2136         self.first = True
2137         self.current_datetime = start_datetime
2138
2139
2140     def _find_next(self, current_datetime):
2141         return self._increase_date(self._reset_date(current_datetime))
2142
2143     def _increase_date(self, date):
2144         tl = list(date.timetuple())
2145         ch = self._unit_settings[self.unit]
2146         tl[ch[1]] += ch[0]
2147         nts = time.mktime(tuple(tl))
2148         return datetime.datetime.fromtimestamp(nts, date.tzinfo)
2149
2150     def _reset_date(self, date):
2151         tl = list(date.timetuple())
2152         ch = self._unit_settings[self.unit]
2153         default_values = (0, 1, 1, 0, 0, 0)
2154         for i in range(ch[1] + 1, len(default_values)):
2155             tl[i] = default_values[i]
2156
2157         return datetime.datetime.fromtimestamp(
2158             time.mktime(tuple(tl)), date.tzinfo)
2159
2160     def next(self):
2161         """Please don't read this ;)
2162         """
2163         if self.first and self.current_datetime == self._reset_date(self.current_datetime):
2164             # dont increase
2165             pass
2166         else:
2167             self.current_datetime = self._find_next(self.current_datetime)
2168
2169         self.first = False
2170
2171         if self.current_datetime >= self.start_datetime and self.current_datetime < self.end_datetime:
2172             return self.current_datetime
2173         else:
2174             raise StopIteration()
2175
2176     def __iter__(self):
2177         return self
2178
2179 if __name__ == '__main__':
2180     locale.setlocale(locale.LC_ALL)
2181     main(sys.argv)
2182