3 """find-fix.py: produce a find/fix report for Subversion's IZ database
5 For simple text summary:
6 find-fix.py query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
7 Statistics will be printed for bugs found or fixed within the
10 For gnuplot presentation:
11 find-fix.py query-set-1.tsv outfile
12 Gnuplot provides its own way to select date ranges.
14 Either way, get a query-set-1.tsv from:
15 http://subversion.tigris.org/iz-data/query-set-1.tsv (updated nightly)
16 See http://subversion.tigris.org/iz-data/README for more info on that file.
18 For more usage info on this script:
22 _version
= "$Revision:"
25 # This can be run over the data file found at:
26 # http://subversion.tigris.org/iz-data/query-set-1.tsv
31 my_getopt
= getopt
.gnu_getopt
32 except AttributeError:
33 my_getopt
= getopt
.getopt
43 me
= os
.path
.basename(sys
.argv
[0])
45 # Long options and their usage strings; "=" means it takes an argument.
46 # To get a list suitable for getopt, just do
48 # [x[0] for x in long_opts]
50 # Make sure to sacrifice a lamb to Guido for each element of the list.
52 ["milestones=", """Optional, milestones NOT to report on
53 (one or more of Beta, 1.0, Post-1.0, cvs2svn-1.0, cvs2svn-opt,
55 ["update", """Optional, update the statistics first."""],
56 ["doc", """Optional, print pydocs."""],
57 ["help", """Optional, print usage (this text)."""],
58 ["verbose", """Optional, print more progress messages."""],
65 DATA_FILE
= "http://subversion.tigris.org/iz-data/query-set-1.tsv"
66 ONE_WEEK
= 7 * 24 * 60 * 60
69 _milestone_filter
= []
71 noncore_milestone_filter
= [
80 one_point_oh_milestone_filter
= noncore_milestone_filter
+ []
82 beta_milestone_filter
= one_point_oh_milestone_filter
+ ['1.0']
95 """Report bug find/fix rate statistics for Subversion."""
100 global _milestone_filter
101 global noncore_milestone_filter
104 opts
, args
= my_getopt(sys
.argv
[1:], "", [x
[0] for x
in long_opts
])
105 except getopt
.GetoptError
, e
:
106 sys
.stderr
.write("Error: %s\n" % e
.msg
)
108 sys
.stderr
.write("%s --help for options.\n" % me
)
111 for opt
, arg
in opts
:
115 elif opt
== "--verbose":
117 elif opt
== "--milestones":
118 for mstone
in string
.split(arg
, ","):
119 if mstone
== "noncore":
120 _milestone_filter
= noncore_milestone_filter
121 elif mstone
== "beta":
122 _milestone_filter
= beta_milestone_filter
123 elif mstone
== "one":
124 _milestone_filter
= one_point_oh_milestone_filter
125 elif mstone
[0] == '-':
126 if mstone
[1:] in _milestone_filter
:
127 spot
= _milestone_filter
.index(mstone
[1:])
128 _milestone_filter
= _milestone_filter
[:spot
] \
129 + _milestone_filter
[(spot
+1):]
131 _milestone_filter
+= [mstone
]
133 elif opt
== "--update":
136 pydoc
.doc(pydoc
.importfile(sys
.argv
[0]))
139 if len(_milestone_filter
) == 0:
140 _milestone_filter
= noncore_milestone_filter
143 sys
.stderr
.write("%s: Filtering out milestones %s.\n"
144 % (me
, string
.join(_milestone_filter
, ", ")))
148 sys
.stderr
.write("%s: Generating gnuplot data.\n" % me
)
151 sys
.stderr
.write("%s: Updating %s from %s.\n" % (me
, args
[0], DATA_FILE
))
152 if os
.system("curl " + DATA_FILE
+ "> " + args
[0]):
153 os
.system("wget " + DATA_FILE
)
154 plot(args
[0], args
[1])
158 sys
.stderr
.write("%s: Generating summary from %s to %s.\n"
159 % (me
, args
[1], args
[2]))
162 sys
.stderr
.write("%s: Updating %s from %s.\n" % (me
, args
[0], DATA_FILE
))
163 if os
.system("curl " + DATA_FILE
+ "> " + args
[0]):
164 os
.system("wget " + DATA_FILE
)
167 t_start
= parse_time(args
[1] + " 00:00:00")
169 sys
.stderr
.write('%s: ERROR: bad time value: %s\n' % (me
, args
[1]))
173 t_end
= parse_time(args
[2] + " 00:00:00")
175 sys
.stderr
.write('%s: ERROR: bad time value: %s\n' % (me
, args
[2]))
178 summary(args
[0], t_start
, t_end
)
185 def summary(datafile
, d_start
, d_end
):
186 "Prints a summary of activity within a specified date range."
188 data
= load_data(datafile
)
190 # activity during the requested period
191 found
, fixed
, inval
, dup
, other
= extract(data
, 1, d_start
, d_end
)
193 # activity from the beginning of time to the end of the request
194 # used to compute remaining
195 # XXX It would be faster to change extract to collect this in one
196 # pass. But we don't presently have enough data, nor use this
197 # enough, to justify that rework.
198 fromzerofound
, fromzerofixed
, fromzeroinval
, fromzerodup
, fromzeroother \
199 = extract(data
, 1, 0, d_end
)
201 alltypes_found
= alltypes_fixed
= alltypes_inval
= alltypes_dup \
202 = alltypes_other
= alltypes_rem
= 0
204 fromzerorem_t
= fromzerofound
[t
]\
205 - (fromzerofixed
[t
] + fromzeroinval
[t
] + fromzerodup
[t
]
207 print '%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \
208 'other=%3d remain=%3d' \
209 % (t
, found
[t
], fixed
[t
], inval
[t
], dup
[t
], other
[t
], fromzerorem_t
)
210 alltypes_found
= alltypes_found
+ found
[t
]
211 alltypes_fixed
= alltypes_fixed
+ fixed
[t
]
212 alltypes_inval
= alltypes_inval
+ inval
[t
]
213 alltypes_dup
= alltypes_dup
+ dup
[t
]
214 alltypes_other
= alltypes_other
+ other
[t
]
215 alltypes_rem
= alltypes_rem
+ fromzerorem_t
218 print '%12s: found=%3d fixed=%3d inval=%3d dup=%3d ' \
219 'other=%3d remain=%3d' \
220 % ('totals', alltypes_found
, alltypes_fixed
, alltypes_inval
,
221 alltypes_dup
, alltypes_other
, alltypes_rem
)
222 # print '%12s find/fix ratio: %g%%' \
223 # % (" "*12, (alltypes_found*100.0/(alltypes_fixed
224 # + alltypes_inval + alltypes_dup + alltypes_other)))
227 def plot(datafile
, outbase
):
228 "Generates data files intended for use by gnuplot."
232 data
= load_data(datafile
)
236 if issue
.created
< t_min
:
237 t_min
= issue
.created
239 # break the time up into a tuple, then back up to Sunday
240 t_start
= time
.localtime(t_min
)
241 t_start
= time
.mktime((t_start
[0], t_start
[1], t_start
[2] - t_start
[6] - 1,
246 # for each issue type, we will record per-week stats, compute a moving
247 # average of the find/fix delta, and track the number of open issues
248 plots
[t
] = [ [ ], MovingAverage(), 0 ]
251 for date
in range(t_start
, time
.time(), ONE_WEEK
):
252 ### this is quite inefficient, as we could just sort by date, but
254 found
, fixed
= extract(data
, None, date
, date
+ ONE_WEEK
- 1)
257 per_week
, avg
, open_issues
= plots
[t
]
258 delta
= found
[t
] - fixed
[t
]
259 per_week
.append((week
, date
,
260 found
[t
], -fixed
[t
], avg
.add(delta
), open_issues
))
261 plots
[t
][2] = open_issues
+ delta
266 week_data
= plots
[t
][0]
267 write_file(week_data
, outbase
, t
, 'found', 2)
268 write_file(week_data
, outbase
, t
, 'fixed', 3)
269 write_file(week_data
, outbase
, t
, 'avg', 4)
270 write_file(week_data
, outbase
, t
, 'open', 5)
272 def write_file(week_data
, base
, type, tag
, idx
):
273 f
= open('%s.%s.%s' % (base
, tag
, type), 'w')
274 for info
in week_data
:
275 f
.write('%s %s # %s\n' % (info
[0], info
[idx
], time
.ctime(info
[1])))
279 "Helper class to compute moving averages."
280 def __init__(self
, n
=4):
282 self
.data
= [ 0 ] * n
283 def add(self
, value
):
285 self
.data
.append(float(value
) / self
.n
)
288 return reduce(operator
.add
, self
.data
)
291 def extract(data
, details
, d_start
, d_end
):
292 """Extract found/fixed counts for each issue type within the data range.
294 If DETAILS is false, then return two dictionaries:
298 ...each mapping issue types to the number of issues of that type
299 found or fixed respectively.
301 If DETAILS is true, return five dictionaries:
303 found, fixed, invalid, duplicate, other
305 The first is still the found issues, but the other four break down
306 the resolution into 'FIXED', 'INVALID', 'DUPLICATE', and a grab-bag
307 category for 'WORKSFORME', 'LATER', 'REMIND', and 'WONTFIX'."""
310 global _milestone_filter
316 other
= { } # "WORKSFORME", "LATER", "REMIND", and "WONTFIX"
319 found
[t
] = fixed
[t
] = invalid
[t
] = duplicate
[t
] = other
[t
] = 0
322 # filter out disrespected milestones
323 if issue
.milestone
in _milestone_filter
:
326 # record the found/fixed counts
327 if d_start
<= issue
.created
<= d_end
:
328 found
[issue
.type] = found
[issue
.type] + 1
329 if d_start
<= issue
.resolved
<= d_end
:
331 if issue
.resolution
== "FIXED":
332 fixed
[issue
.type] = fixed
[issue
.type] + 1
333 elif issue
.resolution
== "INVALID":
334 invalid
[issue
.type] = invalid
[issue
.type] + 1
335 elif issue
.resolution
== "DUPLICATE":
336 duplicate
[issue
.type] = duplicate
[issue
.type] + 1
338 other
[issue
.type] = other
[issue
.type] + 1
340 fixed
[issue
.type] = fixed
[issue
.type] + 1
343 return found
, fixed
, invalid
, duplicate
, other
348 def load_data(datafile
):
349 "Return a list of Issue objects for the specified data."
350 return map(Issue
, open(datafile
).readlines())
354 "Represents a single issue from the exported IssueZilla data."
356 def __init__(self
, line
):
357 row
= string
.split(string
.strip(line
), '\t')
359 self
.id = int(row
[0])
361 self
.reporter
= row
[2]
365 self
.assigned
= row
[3]
366 self
.milestone
= row
[4]
367 self
.created
= parse_time(row
[5])
368 self
.resolution
= row
[7]
369 if not self
.resolution
:
370 # If the resolution is empty, then force the resolved date to None.
371 # When an issue is reopened, there will still be activity showing
372 # a "RESOLVED", thus we get a resolved date. But we simply want to
376 self
.resolved
= parse_time(row
[6])
377 self
.summary
= row
[8]
380 parse_time_re
= re
.compile('([0-9]{4})-([0-9]{2})-([0-9]{2}) '
381 '([0-9]{2}):([0-9]{2}):([0-9]{2})')
384 "Convert an exported MySQL timestamp into seconds since the epoch."
391 matches
= parse_time_re
.match(t
)
392 return time
.mktime((int(matches
.group(1)),
393 int(matches
.group(2)),
394 int(matches
.group(3)),
395 int(matches
.group(4)),
396 int(matches
.group(5)),
397 int(matches
.group(6)),
400 sys
.stderr
.write('ERROR: bad time value: %s\n'% t
)
404 print pydoc
.synopsis(sys
.argv
[0])
406 For simple text summary:
407 find-fix.py [options] query-set-1.tsv YYYY-MM-DD YYYY-MM-DD
409 For gnuplot presentation:
410 find-fix.py [options] query-set-1.tsv outfile
418 print " --" + x
[0][:-1],
422 print (' ' * (padding_limit
- len(x
[0]))), x
[1]
424 Option keywords may be abbreviated to any unique prefix.
425 Most options require "=xxx" arguments.
426 Option order is not important.'''
428 if __name__
== '__main__':