tools/dev/iz/find-fix.py

   1 #!/usr/bin/env python
   2 # -*- Python -*-
   3 """find-fix.py: produce a find/fix report for Subversion's IZ database
   4
   5 For simple text summary:
   6        find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
   7 Statistics will be printed for bugs found or fixed within the
   8 time frame.
   9
  10 For gnuplot presentation:
  11        find-fix.py query-set-1.tsv outfile
  12 Gnuplot provides its own way to select date ranges.
  13
  14 Either way, get a query-set-1.tsv from:
  15   http://subversion.tigris.org/iz-data/query-set-1.tsv  (updated nightly)
  16 See http://subversion.tigris.org/iz-data/README for more info on that file.
  17
  18 For more usage info on this script:
  19         find-fix.py --help
  20 """
  21
  22 _version = "$Revision:"
  23
  24 #
  25 # This can be run over the data file found at:
  26 #   http://subversion.tigris.org/iz-data/query-set-1.tsv
  27 #
  28
  29 import getopt
  30 try:
  31   my_getopt = getopt.gnu_getopt
  32 except AttributeError:
  33   my_getopt = getopt.getopt
  34 import operator
  35 import os
  36 import os.path
  37 import pydoc
  38 import re
  39 import string
  40 import sys
  41 import time
  42
  43 me = os.path.basename(sys.argv[0])
  44
  45 # Long options and their usage strings; "=" means it takes an argument.
  46 # To get a list suitable for getopt, just do
  47 #
  48 #   [x[0] for x in long_opts]
  49 #
  50 # Make sure to sacrifice a lamb to Guido for each element of the list.
  51 long_opts = [
  52   ["milestones=",      """Optional, milestones NOT to report on
  53         (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt,
  54         inapplicable)"""],
  55   ["update",          """Optional, update the statistics first."""],
  56   ["doc",             """Optional, print pydocs."""],
  57   ["help",            """Optional, print usage (this text)."""],
  58   ["verbose",         """Optional, print more progress messages."""],
  59   ]
  60
  61 help    = 0
  62 verbose = 0
  63 update  = 0
  64
  65 DATA_FILE = "http://subversion.tigris.org/iz-data/query-set-1.tsv"
  66 ONE_WEEK = 7 * 24 * 60 * 60
  67
  68 _types = []
  69 _milestone_filter = []
  70
  71 noncore_milestone_filter = [
  72   'Post-1.0',
  73   '1.1',
  74   'cvs2svn-1.0',
  75   'cvs2svn-opt',
  76   'inapplicable',
  77   'no milestone',
  78   ]
  79
  80 one_point_oh_milestone_filter = noncore_milestone_filter + []
  81
  82 beta_milestone_filter = one_point_oh_milestone_filter + ['1.0']
  83
  84
  85 _types = [
  86   'DEFECT',
  87   'TASK',
  88   'FEATURE',
  89   'ENHANCEMENT',
  90   'PATCH',
  91   ]
  92
  93
  94 def main():
  95   """Report bug find/fix rate statistics for Subversion."""
  96
  97   global verbose
  98   global update
  99   global _types
 100   global _milestone_filter
 101   global noncore_milestone_filter
 102
 103   try:
 104       opts, args = my_getopt(sys.argv[1:], "", [x[0] for x in long_opts])
 105   except getopt.GetoptError, e:
 106       sys.stderr.write("Error: %s\n" % e.msg)
 107       shortusage()
 108       sys.stderr.write("%s --help for options.\n" % me)
 109       sys.exit(1)
 110
 111   for opt, arg in opts:
 112     if opt == "--help":
 113       usage()
 114       sys.exit(0)
 115     elif opt == "--verbose":
 116       verbose = 1
 117     elif opt == "--milestones":
 118       for mstone in string.split(arg, ","):
 119         if mstone == "noncore":
 120           _milestone_filter = noncore_milestone_filter
 121         elif mstone == "beta":
 122           _milestone_filter = beta_milestone_filter
 123         elif mstone == "one":
 124           _milestone_filter = one_point_oh_milestone_filter
 125         elif mstone[0] == '-':
 126           if mstone[1:] in _milestone_filter:
 127             spot = _milestone_filter.index(mstone[1:])
 128             _milestone_filter = _milestone_filter[:spot] \
 129                                 + _milestone_filter[(spot+1):]
 130         else:
 131           _milestone_filter += [mstone]
 132
 133     elif opt == "--update":
 134       update = 1
 135     elif opt == "--doc":
 136       pydoc.doc(pydoc.importfile(sys.argv[0]))
 137       sys.exit(0)
 138
 139   if len(_milestone_filter) == 0:
 140     _milestone_filter = noncore_milestone_filter
 141
 142   if verbose:
 143     sys.stderr.write("%s: Filtering out milestones %s.\n"
 144                      % (me, string.join(_milestone_filter, ", ")))
 145
 146   if len(args) == 2:
 147     if verbose:
 148       sys.stderr.write("%s: Generating gnuplot data.\n" % me)
 149     if update:
 150       if verbose:
 151         sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
 152       if os.system("curl " + DATA_FILE + "> " + args[0]):
 153         os.system("wget " + DATA_FILE)
 154     plot(args[0], args[1])
 155
 156   elif len(args) == 3:
 157     if verbose:
 158       sys.stderr.write("%s: Generating summary from %s to %s.\n"
 159                        % (me, args[1], args[2]))
 160     if update:
 161       if verbose:
 162         sys.stderr.write("%s: Updating %s from %s.\n" % (me, args[0], DATA_FILE))
 163       if os.system("curl " + DATA_FILE + "> " + args[0]):
 164         os.system("wget " + DATA_FILE)
 165
 166     try:
 167       t_start = parse_time(args[1] + " 00:00:00")
 168     except ValueError:
 169       sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[1]))
 170       sys.exit(1)
 171
 172     try:
 173       t_end = parse_time(args[2] + " 00:00:00")
 174     except ValueError:
 175       sys.stderr.write('%s: ERROR: bad time value: %s\n' % (me, args[2]))
 176       sys.exit(1)
 177
 178     summary(args[0], t_start, t_end)
 179   else:
 180     usage()
 181
 182   sys.exit(0)
 183
 184
 185 def summary(datafile, d_start, d_end):
 186   "Prints a summary of activity within a specified date range."
 187
 188   data = load_data(datafile)
 189
 190   # activity during the requested period
 191   found, fixed, inval, dup, other = extract(data, 1, d_start, d_end)
 192
 193   # activity from the beginning of time to the end of the request
 194   # used to compute remaining
 195   # XXX It would be faster to change extract to collect this in one
 196   # pass.  But we don't presently have enough data, nor use this
 197   # enough, to justify that rework.
 198   fromzerofound, fromzerofixed, fromzeroinval, fromzerodup, fromzeroother \
 199               = extract(data, 1, 0, d_end)
 200
 201   alltypes_found = alltypes_fixed = alltypes_inval = alltypes_dup \
 202                    = alltypes_other = alltypes_rem = 0
 203   for t in _types:
 204     fromzerorem_t = fromzerofound[t]\
 205                     - (fromzerofixed[t] + fromzeroinval[t] + fromzerodup[t]
 206                        + fromzeroother[t])
 207     print '%12s: found=%3d  fixed=%3d  inval=%3d  dup=%3d  ' \
 208           'other=%3d  remain=%3d' \
 209           % (t, found[t], fixed[t], inval[t], dup[t], other[t], fromzerorem_t)
 210     alltypes_found = alltypes_found + found[t]
 211     alltypes_fixed = alltypes_fixed + fixed[t]
 212     alltypes_inval = alltypes_inval + inval[t]
 213     alltypes_dup   = alltypes_dup   + dup[t]
 214     alltypes_other = alltypes_other + other[t]
 215     alltypes_rem   = alltypes_rem + fromzerorem_t
 216
 217   print '-' * 77
 218   print '%12s: found=%3d  fixed=%3d  inval=%3d  dup=%3d  ' \
 219         'other=%3d  remain=%3d' \
 220         % ('totals', alltypes_found, alltypes_fixed, alltypes_inval,
 221            alltypes_dup, alltypes_other, alltypes_rem)
 222   # print '%12s  find/fix ratio: %g%%' \
 223   #      % (" "*12, (alltypes_found*100.0/(alltypes_fixed
 224   #         + alltypes_inval + alltypes_dup + alltypes_other)))
 225
 226
 227 def plot(datafile, outbase):
 228   "Generates data files intended for use by gnuplot."
 229
 230   global _types
 231
 232   data = load_data(datafile)
 233
 234   t_min = 1L<<32
 235   for issue in data:
 236     if issue.created < t_min:
 237       t_min = issue.created
 238
 239   # break the time up into a tuple, then back up to Sunday
 240   t_start = time.localtime(t_min)
 241   t_start = time.mktime((t_start[0], t_start[1], t_start[2] - t_start[6] - 1,
 242                          0, 0, 0, 0, 0, 0))
 243
 244   plots = { }
 245   for t in _types:
 246     # for each issue type, we will record per-week stats, compute a moving
 247     # average of the find/fix delta, and track the number of open issues
 248     plots[t] = [ [ ], MovingAverage(), 0 ]
 249
 250   week = 0
 251   for date in range(t_start, time.time(), ONE_WEEK):
 252     ### this is quite inefficient, as we could just sort by date, but
 253     ### I'm being lazy
 254     found, fixed = extract(data, None, date, date + ONE_WEEK - 1)
 255
 256     for t in _types:
 257       per_week, avg, open_issues = plots[t]
 258       delta = found[t] - fixed[t]
 259       per_week.append((week, date,
 260                        found[t], -fixed[t], avg.add(delta), open_issues))
 261       plots[t][2] = open_issues + delta
 262
 263     week = week + 1
 264
 265   for t in _types:
 266     week_data = plots[t][0]
 267     write_file(week_data, outbase, t, 'found', 2)
 268     write_file(week_data, outbase, t, 'fixed', 3)
 269     write_file(week_data, outbase, t, 'avg', 4)
 270     write_file(week_data, outbase, t, 'open', 5)
 271
 272 def write_file(week_data, base, type, tag, idx):
 273   f = open('%s.%s.%s' % (base, tag, type), 'w')
 274   for info in week_data:
 275     f.write('%s %s # %s\n' % (info[0], info[idx], time.ctime(info[1])))
 276
 277
 278 class MovingAverage:
 279   "Helper class to compute moving averages."
 280   def __init__(self, n=4):
 281     self.n = n
 282     self.data = [ 0 ] * n
 283   def add(self, value):
 284     self.data.pop(0)
 285     self.data.append(float(value) / self.n)
 286     return self.avg()
 287   def avg(self):
 288     return reduce(operator.add, self.data)
 289
 290
 291 def extract(data, details, d_start, d_end):
 292   """Extract found/fixed counts for each issue type within the data range.
 293
 294   If DETAILS is false, then return two dictionaries:
 295
 296     found, fixed
 297
 298   ...each mapping issue types to the number of issues of that type
 299   found or fixed respectively.
 300
 301   If DETAILS is true, return five dictionaries:
 302
 303     found, fixed, invalid, duplicate, other
 304
 305   The first is still the found issues, but the other four break down
 306   the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag
 307   category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'."""
 308
 309   global _types
 310   global _milestone_filter
 311
 312   found = { }
 313   fixed = { }
 314   invalid = { }
 315   duplicate = { }
 316   other = { }  # "WORKSFORME", "LATER", "REMIND", and "WONTFIX"
 317
 318   for t in _types:
 319     found[t] = fixed[t] = invalid[t] = duplicate[t] = other[t] = 0
 320
 321   for issue in data:
 322     # filter out disrespected milestones
 323     if issue.milestone in _milestone_filter:
 324       continue
 325
 326     # record the found/fixed counts
 327     if d_start <= issue.created <= d_end:
 328       found[issue.type] = found[issue.type] + 1
 329     if d_start <= issue.resolved <= d_end:
 330       if details:
 331         if issue.resolution == "FIXED":
 332           fixed[issue.type] = fixed[issue.type] + 1
 333         elif issue.resolution == "INVALID":
 334           invalid[issue.type] = invalid[issue.type] + 1
 335         elif issue.resolution == "DUPLICATE":
 336           duplicate[issue.type] = duplicate[issue.type] + 1
 337         else:
 338           other[issue.type] = other[issue.type] + 1
 339       else:
 340         fixed[issue.type] = fixed[issue.type] + 1
 341
 342   if details:
 343     return found, fixed, invalid, duplicate, other
 344   else:
 345     return found, fixed
 346
 347
 348 def load_data(datafile):
 349   "Return a list of Issue objects for the specified data."
 350   return map(Issue, open(datafile).readlines())
 351
 352
 353 class Issue:
 354   "Represents a single issue from the exported IssueZilla data."
 355
 356   def __init__(self, line):
 357     row = string.split(string.strip(line), '\t')
 358
 359     self.id = int(row[0])
 360     self.type = row[1]
 361     self.reporter = row[2]
 362     if row[3] == 'NULL':
 363       self.assigned = None
 364     else:
 365       self.assigned = row[3]
 366     self.milestone = row[4]
 367     self.created = parse_time(row[5])
 368     self.resolution = row[7]
 369     if not self.resolution:
 370       # If the resolution is empty, then force the resolved date to None.
 371       # When an issue is reopened, there will still be activity showing
 372       # a "RESOLVED", thus we get a resolved date. But we simply want to
 373       # ignore that date.
 374       self.resolved = None
 375     else:
 376       self.resolved = parse_time(row[6])
 377     self.summary = row[8]
 378
 379
 380 parse_time_re = re.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) '
 381                            '([0-9]{2}):([0-9]{2}):([0-9]{2})')
 382
 383 def parse_time(t):
 384   "Convert an exported MySQL timestamp into seconds since the epoch."
 385
 386   global parse_time_re
 387
 388   if t == 'NULL':
 389     return None
 390   try:
 391     matches = parse_time_re.match(t)
 392     return time.mktime((int(matches.group(1)),
 393                         int(matches.group(2)),
 394                         int(matches.group(3)),
 395                         int(matches.group(4)),
 396                         int(matches.group(5)),
 397                         int(matches.group(6)),
 398                         0, 0, -1))
 399   except ValueError:
 400     sys.stderr.write('ERROR: bad time value: %s\n'% t)
 401     sys.exit(1)
 402
 403 def shortusage():
 404   print pydoc.synopsis(sys.argv[0])
 405   print """
 406 For simple text summary:
 407        find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
 408
 409 For gnuplot presentation:
 410        find-fix.py [options] query-set-1.tsv outfile
 411 """
 412
 413 def usage():
 414   shortusage()
 415   for x in long_opts:
 416       padding_limit = 18
 417       if x[0][-1:] == '=':
 418           print "   --" + x[0][:-1],
 419           padding_limit = 19
 420       else:
 421           print "   --" + x[0],
 422       print (' ' * (padding_limit - len(x[0]))), x[1]
 423   print '''
 424 Option keywords may be abbreviated to any unique prefix.
 425 Most options require "=xxx" arguments.
 426 Option order is not important.'''
 427
 428 if __name__ == '__main__':
 429   main()