Follow-up to r29036: Now that the "mergeinfo" transaction file is no
[svn.git] / tools / dev / mlpatch.py
blob53411608b07c17f723303d2b4aae75567a527eff
1 #!/usr/bin/env python
3 # mlpatch.py: Run with no arguments for usage
5 import sys, os
6 import sgmllib
7 from htmlentitydefs import entitydefs
8 import fileinput
9 from urllib2 import urlopen
11 CHUNKSIZE = 8 * 1024
13 class MyParser(sgmllib.SGMLParser):
14 def __init__(self):
15 self.baseclass = sgmllib.SGMLParser
16 self.baseclass.__init__(self)
17 self.entitydefs = entitydefs
18 self.entitydefs["nbsp"] = " "
19 self.inbody = False
20 self.complete_line = False
21 self.discard_gathered()
23 def discard_gathered(self):
24 self.gather_data = False
25 self.gathered_data = ""
27 def noop(self):
28 pass
30 def out(self, data):
31 sys.stdout.write(data)
33 def handle_starttag(self, tag, method, attrs):
34 if not self.inbody: return
35 self.baseclass.handle_starttag(self, tag, method, attrs)
37 def handle_endtag(self, tag, method):
38 if not self.inbody: return
39 self.baseclass.handle_endtag(self, tag, method)
41 def handle_data(self, data):
42 if not self.inbody: return
43 data = data.replace('\n','')
44 if len(data) == 0: return
45 if self.gather_data:
46 self.gathered_data += data
47 else:
48 if self.complete_line:
49 if data[0] in ('+', '-', ' ', '#') \
50 or data.startswith("Index:") \
51 or data.startswith("@@ ") \
52 or data.startswith("======"):
53 # Real new line
54 self.out('\n')
55 else:
56 # Presume that we are wrapped
57 self.out(' ')
58 self.complete_line = False
59 self.out(data)
61 def handle_charref(self, ref):
62 if not self.inbody: return
63 self.baseclass.handle_charref(self, ref)
65 def handle_entityref(self, ref):
66 if not self.inbody: return
67 self.baseclass.handle_entityref(self, ref)
69 def handle_comment(self, comment):
70 if comment == ' body="start" ':
71 self.inbody = True
72 elif comment == ' body="end" ':
73 self.inbody = False
75 def handle_decl(self, data):
76 if not self.inbody: return
77 print "DECL: " + data
79 def unknown_starttag(self, tag, attrs):
80 if not self.inbody: return
81 print "UNKTAG: %s %s" % (tag, attrs)
83 def unknown_endtag(self, tag):
84 if not self.inbody: return
85 print "UNKTAG: /%s" % (tag)
87 def do_br(self, attrs):
88 self.complete_line = True
90 def do_p(self, attrs):
91 if self.complete_line:
92 self.out('\n')
93 self.out(' ')
94 self.complete_line = True
96 def start_a(self, attrs):
97 self.gather_data = True
99 def end_a(self):
100 self.out(self.gathered_data.replace('_at_', '@'))
101 self.discard_gathered()
103 def close(self):
104 if self.complete_line:
105 self.out('\n')
106 self.baseclass.close(self)
109 def main():
110 if len(sys.argv) == 1:
111 sys.stderr.write(
112 "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" +
113 "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" +
115 Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives
116 mangle inline patches, and provide no raw message download facility
117 (other than for an entire month's email as an mbox).
119 So, I wrote this script, to demangle them. It's not perfect, as it has to
120 guess about whitespace, but it does an acceptable job.\n""")
121 sys.exit(0)
122 elif len(sys.argv) != 5:
123 sys.stderr.write("error: mlpatch.py: Bad parameters - run with no "
124 + "parameters for usage\n")
125 sys.exit(1)
126 else:
127 list, year, month, msgno = sys.argv[1:]
128 url = "http://svn.haxx.se/" \
129 + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals()
130 print "MsgUrl: " + url
131 msgfile = urlopen(url)
132 p = MyParser()
133 buffer = msgfile.read(CHUNKSIZE)
134 while buffer:
135 p.feed(buffer)
136 buffer = msgfile.read(CHUNKSIZE)
137 p.close()
138 msgfile.close()
140 if __name__ == '__main__':
141 main()