Follow-up to r29036: Now that the "mergeinfo" transaction file is no
[svn.git] / tools / dev / trails.py
blob56a9bfe07d99867b59d440c70ca96f57fc19b4fa
1 #!/usr/bin/env python
3 ## See the usage() function for operating instructions. ##
5 import re
6 import sys
7 import operator
9 _re_trail = re.compile('\((?P<txn_body>[a-z_]*), (?P<filename>[a-z_\-./]*), (?P<lineno>[0-9]*), (?P<txn>0|1)\): (?P<ops>.*)')
10 _re_table_op = re.compile('\(([a-z]*), ([a-z]*)\)')
12 _seperator = '------------------------------------------------------------\n'
14 def parse_trails_log(infile):
15 trails = []
16 lineno = 0
17 for line in infile.readlines():
18 m = _re_trail.match(line)
20 lineno = lineno + 1
22 if not m:
23 sys.stderr.write('Invalid input, line %u:\n%s\n' % (lineno, line))
24 sys.exit(1)
26 txn = int(m.group('txn'))
27 if not txn:
28 ### We're not interested in trails that don't use txns at this point.
29 continue
31 txn_body = (m.group('txn_body'), m.group('filename'),
32 int(m.group('lineno')))
33 trail = _re_table_op.findall(m.group('ops'))
34 trail.reverse()
36 if not trail:
37 sys.stderr.write('Warning! Empty trail at line %u:\n%s' % (lineno, line))
39 trails.append((txn_body, trail))
41 return trails
44 def output_summary(trails, outfile):
45 ops = []
46 for (txn_body, trail) in trails:
47 ops.append(len(trail))
48 ops.sort()
50 total_trails = len(ops)
51 total_ops = reduce(operator.add, ops)
52 max_ops = ops[-1]
53 median_ops = ops[total_trails / 2]
54 average_ops = float(total_ops) / total_trails
56 outfile.write(_seperator)
57 outfile.write('Summary\n')
58 outfile.write(_seperator)
59 outfile.write('Total number of trails: %10i\n' % total_trails)
60 outfile.write('Total number of ops: %10i\n' % total_ops)
61 outfile.write('max ops/trail: %10i\n' % max_ops)
62 outfile.write('median ops/trail: %10i\n' % median_ops)
63 outfile.write('average ops/trail: %10.2f\n' % average_ops)
64 outfile.write('\n')
67 # custom compare function
68 def _freqtable_cmp((a, b), (c, d)):
69 c = cmp(d, b)
70 if not c:
71 c = cmp(a, c)
72 return c
74 def list_frequencies(list):
75 """
76 Given a list, return a list composed of (item, frequency)
77 in sorted order
78 """
80 counter = {}
81 for item in list:
82 counter[item] = counter.get(item, 0) + 1
84 frequencies = counter.items()
85 frequencies.sort(_freqtable_cmp)
87 return frequencies
90 def output_trail_length_frequencies(trails, outfile):
91 ops = []
92 for (txn_body, trail) in trails:
93 ops.append(len(trail))
95 total_trails = len(ops)
96 frequencies = list_frequencies(ops)
98 outfile.write(_seperator)
99 outfile.write('Trail length frequencies\n')
100 outfile.write(_seperator)
101 outfile.write('ops/trail frequency percentage\n')
102 for (r, f) in frequencies:
103 p = float(f) * 100 / total_trails
104 outfile.write('%4i %6i %5.2f\n' % (r, f, p))
105 outfile.write('\n')
108 def output_trail(outfile, trail, column = 0):
109 ### Output the trail itself, in its own column
111 if len(trail) == 0:
112 outfile.write('<empty>\n')
113 return
115 line = str(trail[0])
116 for op in trail[1:]:
117 op_str = str(op)
118 if len(line) + len(op_str) > 75 - column:
119 outfile.write('%s,\n' % line)
120 outfile.write(''.join(' ' * column))
121 line = op_str
122 else:
123 line = line + ', ' + op_str
124 outfile.write('%s\n' % line)
126 outfile.write('\n')
129 def output_trail_frequencies(trails, outfile):
131 total_trails = len(trails)
133 ttrails = []
134 for (txn_body, trail) in trails:
135 ttrails.append((txn_body, tuple(trail)))
137 frequencies = list_frequencies(ttrails)
139 outfile.write(_seperator)
140 outfile.write('Trail frequencies\n')
141 outfile.write(_seperator)
142 outfile.write('frequency percentage ops/trail trail\n')
143 for (((txn_body, file, line), trail), f) in frequencies:
144 p = float(f) * 100 / total_trails
145 outfile.write('-- %s - %s:%u --\n' % (txn_body, file, line))
146 outfile.write('%6i %5.2f %4i ' % (f, p, len(trail)))
147 output_trail(outfile, trail, 37)
150 def output_txn_body_frequencies(trails, outfile):
151 bodies = []
152 for (txn_body, trail) in trails:
153 bodies.append(txn_body)
155 total_trails = len(trails)
156 frequencies = list_frequencies(bodies)
158 outfile.write(_seperator)
159 outfile.write('txn_body frequencies\n')
160 outfile.write(_seperator)
161 outfile.write('frequency percentage txn_body\n')
162 for ((txn_body, file, line), f) in frequencies:
163 p = float(f) * 100 / total_trails
164 outfile.write('%6i %5.2f %s - %s:%u\n'
165 % (f, p, txn_body, file, line))
168 def usage(pgm):
169 w = sys.stderr.write
170 w("%s: a program for analyzing Subversion trail usage statistics.\n" % pgm)
171 w("\n")
172 w("Usage:\n")
173 w("\n")
174 w(" Compile Subversion with -DSVN_FS__TRAIL_DEBUG, which will cause it\n")
175 w(" it to print trail statistics to stderr. Save the stats to a file,\n")
176 w(" invoke %s on the file, and ponder the output.\n" % pgm)
177 w("\n")
180 if __name__ == '__main__':
181 if len(sys.argv) > 2:
182 sys.stderr.write("Error: too many arguments\n\n")
183 usage(sys.argv[0])
184 sys.exit(1)
186 if len(sys.argv) == 1:
187 infile = sys.stdin
188 else:
189 try:
190 infile = open(sys.argv[1])
191 except (IOError):
192 sys.stderr.write("Error: unable to open '%s'\n\n" % sys.argv[1])
193 usage(sys.argv[0])
194 sys.exit(1)
196 trails = parse_trails_log(infile)
198 output_summary(trails, sys.stdout)
199 output_trail_length_frequencies(trails, sys.stdout)
200 output_trail_frequencies(trails, sys.stdout)
201 output_txn_body_frequencies(trails, sys.stdout)