Follow-up to r29036: Now that the "mergeinfo" transaction file is no
[svn.git] / tools / dev / normalize-dump.py
blob2a0e51de2164a29e268300b5b09285ce73d4e959
1 #!/usr/bin/env python
3 import sys
4 import re
5 import string
7 header_re = re.compile(r'^([^:]*): ?(.*)$')
9 class NodePath:
10 def __init__(self, path, headers):
11 self.path = path
12 self.headers = headers
14 def dump(self):
15 print (' ' * 3) + self.path
16 headers = self.headers.keys()
17 headers.sort()
18 for header in headers:
19 print (' ' * 6) + header + ': ' + self.headers[header]
22 def dump_revision(rev, nodepaths):
23 sys.stderr.write('* Normalizing revision ' + rev + '...')
24 print 'Revision ' + rev
25 paths = nodepaths.keys()
26 paths.sort()
27 for path in paths:
28 nodepath = nodepaths[path]
29 nodepath.dump()
30 sys.stderr.write('done\n')
34 def parse_header_block(fp):
35 headers = {}
36 while 1:
37 line = fp.readline()
38 if line == '':
39 return headers, 1
40 line = string.strip(line)
41 if line == '':
42 return headers, 0
43 matches = header_re.match(line)
44 if not matches:
45 raise Exception('Malformed header block')
46 headers[matches.group(1)] = matches.group(2)
49 def parse_file(fp):
50 nodepaths = {}
51 current_rev = None
53 while 1:
54 # Parse a block of headers
55 headers, eof = parse_header_block(fp)
57 # This is a revision header block
58 if headers.has_key('Revision-number'):
60 # If there was a previous revision, dump it
61 if current_rev:
62 dump_revision(current_rev, nodepaths)
64 # Reset the data for this revision
65 current_rev = headers['Revision-number']
66 nodepaths = {}
68 # Skip the contents
69 prop_len = headers.get('Prop-content-length', 0)
70 fp.read(int(prop_len))
72 # This is a node header block
73 elif headers.has_key('Node-path'):
75 # Make a new NodePath object, and add it to the
76 # dictionary thereof
77 path = headers['Node-path']
78 node = NodePath(path, headers)
79 nodepaths[path] = node
81 # Skip the content
82 text_len = headers.get('Text-content-length', 0)
83 prop_len = headers.get('Prop-content-length', 0)
84 fp.read(int(text_len) + int(prop_len))
86 # Not a revision, not a node -- if we've already seen at least
87 # one revision block, we are in an errorful state.
88 elif current_rev and len(headers.keys()):
89 raise Exception('Header block from outta nowhere')
91 if eof:
92 if current_rev:
93 dump_revision(current_rev, nodepaths)
94 break
96 def usage():
97 print 'Usage: ' + sys.argv[0] + ' [DUMPFILE]'
98 print ''
99 print 'Reads a Subversion dumpfile from DUMPFILE (or, if not provided,'
100 print 'from stdin) and normalizes the metadata contained therein,'
101 print 'printing summarized and sorted information. This is useful for'
102 print 'generating data about dumpfiles in a diffable fashion.'
103 sys.exit(0)
105 def main():
106 if len(sys.argv) > 1:
107 if sys.argv[1] == '--help':
108 usage()
109 fp = open(sys.argv[1], 'rb')
110 else:
111 fp = sys.stdin
112 parse_file(fp)
115 if __name__ == '__main__':
116 main()