Make HTMLParser handle Unicode.
[jira-zephyrbot.git] / jirabot.py
blob2fee41dd45cbc2e9637005ff554d51ca8aeff4c5
1 #!/usr/bin/python
2 import calendar
3 import feedparser
4 import formatter
5 import htmlentitydefs
6 import htmllib
7 import mechanize
8 import os
9 import random
10 import string
11 import StringIO
12 import time
13 import traceback
14 import urlparse
15 import zephyr
17 zephyr_sender = 'jira'
18 zephyr_class = 'andersk-test'
19 time_file = 'jirabot.time'
21 class UnicodeHTMLParser(htmllib.HTMLParser):
22 entitydefs = dict((k, unichr(v)) for (k, v) in htmlentitydefs.name2codepoint.items())
24 def convert_charref(self, name):
25 try:
26 n = int(name)
27 except ValueError:
28 return
29 return self.convert_codepoint(n)
31 def convert_codepoint(self, codepoint):
32 return unichr(codepoint)
34 def jira_init():
35 b = mechanize.Browser()
36 b.set_handle_robots(False)
37 b.add_client_certificate("https://idp.mit.edu:9443", "cert.pem", "cert.pem")
38 b.addheaders = [("Accept-Language", "en-us,en;q=0.5"),]
39 return b
41 def jira_login(b):
42 b.open("https://jira.mit.edu/jira/secure/Dashboard.jspa")
43 try:
44 b.follow_link(text="MIT Touchstone")
45 except mechanize.LinkNotFoundError:
46 return
47 if (urlparse.urlparse(b.geturl())[1] == "jira.mit.edu"):
48 return
49 b.select_form("wayfForm1")
50 b.submit()
51 b.select_form(predicate=lambda f: any(c.name == 'login_certificate'
52 for c in f.controls))
53 b.submit()
54 b.select_form(nr=0)
55 b.submit()
57 def feed_to_zephyrs(thing, rss, parse):
58 zephyrs = []
59 try:
60 feed = feedparser.parse(rss)
61 for e in feed.entries:
62 global old_time, new_time
63 t = int(calendar.timegm(e.date_parsed))
64 if t <= old_time:
65 continue
66 if t > new_time:
67 new_time = t
68 try:
69 z = parse(e)
70 except:
71 z = zerror("Error parsing " + thing + ":\n" + e.id + "\n" + traceback.format_exc())
72 zephyrs.append((t, z))
73 except:
74 zephyrs.append((0, zerror("Error parsing " + thing + "s feed:\n" + traceback.format_exc())))
75 return zephyrs
77 def parse_issue(e):
78 issue = urlparse.urlparse(e.id)[2].rsplit('/', 1)[1]
79 url = e.id
80 msg = e.id + "\nThis issue was updated."
82 return zephyr.ZNotice(
83 sender=zephyr_sender,
84 auth=False,
85 opcode='auto',
86 cls=zephyr_class,
87 instance=issue,
88 fields=[e.title, msg],
91 def parse_comment(e):
92 url = urlparse.urlunsplit(urlparse.urlparse(e.id)[0:3] + (None,None))
93 issue = url.rsplit('/', 1)[1]
95 s = StringIO.StringIO()
96 parser = UnicodeHTMLParser(formatter.AbstractFormatter(formatter.DumbWriter(s)))
97 parser.feed(e.summary.rsplit('<table>', 1)[0])
98 parser.close()
99 comment = s.getvalue()
101 msg = e.author + " added a comment:\n" + comment.rstrip()
103 return zephyr.ZNotice(
104 sender=zephyr_sender,
105 auth=False,
106 opcode='auto',
107 cls=zephyr_class,
108 instance=issue,
109 fields=[e.title, msg],
112 def zerror(msg):
113 return zephyr.ZNotice(
114 sender=zephyr_sender,
115 auth=False,
116 opcode='auto',
117 cls=zephyr_class,
118 instance='jira-error',
119 fields=['Jira bot error', msg]
122 b = jira_init()
123 zephyr.init()
125 while True:
126 time_file_new = time_file + '.' + ''.join(random.sample(string.letters, 8))
128 try:
129 os.rename(time_file, time_file_new)
130 except OSError:
131 print "warning: could not acquire timestamp lock"
132 time.sleep(17)
133 continue
135 jira_login(b)
136 b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
137 issues_rss = b.response().read()
138 b.open("https://jira.mit.edu/jira/sr/jira.issueviews:searchrequest-comments-rss/temp/SearchRequest.xml?&pid=10185&updated%3Aprevious=-1w&sorter/field=updated&sorter/order=DESC&tempMax=1000")
139 comments_rss = b.response().read()
141 old_time = int(open(time_file_new).read())
142 new_time = old_time
144 zephyrs = (feed_to_zephyrs('issue', issues_rss, parse_issue) +
145 feed_to_zephyrs('comment', comments_rss, parse_comment))
146 zephyrs.sort(key=lambda tz: tz[0])
147 for (t, z) in zephyrs:
148 z.send()
150 open(time_file_new, 'w').write(str(new_time))
151 os.rename(time_file_new, time_file)
153 time.sleep(60)