bump product version to 4.1.6.2
[LibreOffice.git] / bin / get-bugzilla-attachments-by-mimetype
blob93928843c776156411e3f150e07a5a2d966c10c6
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # This file is part of the LibreOffice project.
6 # This Source Code Form is subject to the terms of the Mozilla Public
7 # License, v. 2.0. If a copy of the MPL was not distributed with this
8 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 #This digs through a pile of bugzilla's and populates the cwd with a big
12 #collection of bug-docs in per-filetype dirs with bug-ids as names with
13 #prefixes to indicate which bug-tracker, e.g.
15 #fdo-bugid-X.suffix
16 #rhbz-bugid-X.suffix
17 #moz-bugid-X.suffix
19 #where X is the n'th attachment of that type in the bug
21 import urllib
22 import feedparser
23 import base64
24 import re
25 import os, os.path
26 import sys
27 import xmlrpclib
28 from xml.dom import minidom
29 from xml.sax.saxutils import escape
31 def urlopen_retry(url):
32 maxretries = 3
33 for i in range(maxretries + 1):
34 try:
35 return urllib.urlopen(url)
36 except IOError as e:
37 print "caught IOError: ", e
38 if maxretries == i:
39 raise
40 print "retrying..."
42 def get_from_bug_url_via_xml(url, mimetype, prefix, suffix):
43 id = url.rsplit('=', 2)[1]
44 print "id is", prefix, id, suffix
45 if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
46 print "assuming", id, "is up to date"
47 else:
48 print "parsing", id
49 sock = urlopen_retry(url+"&ctype=xml")
50 dom = minidom.parse(sock)
51 sock.close()
52 attachmentid=0
53 for attachment in dom.getElementsByTagName('attachment'):
54 attachmentid += 1
55 print " mimetype is",
56 for node in attachment.childNodes:
57 if node.nodeName == 'type':
58 print node.firstChild.nodeValue,
59 if node.firstChild.nodeValue.lower() != mimetype.lower():
60 print 'skipping'
61 break
62 elif node.nodeName == 'data':
63 # check if attachment is deleted (i.e. https://bugs.kde.org/show_bug.cgi?id=53343&ctype=xml)
64 if not node.firstChild:
65 print 'deleted attachment, skipping'
66 continue
68 download = suffix + '/' +prefix + id + '-' + str(attachmentid) + '.' + suffix
69 print 'downloading as', download
70 f = open(download, 'w')
71 f.write(base64.b64decode(node.firstChild.nodeValue))
72 f.close()
73 break
75 def get_novell_bug_via_xml(url, mimetype, prefix, suffix):
76 id = url.rsplit('=', 2)[1]
77 print "id is", prefix, id, suffix
78 if os.path.isfile(suffix + '/' + prefix + id + '-1.' + suffix):
79 print "assuming", id, "is up to date"
80 else:
81 print "parsing", id
82 sock = urlopen_retry(url+"&ctype=xml")
83 dom = minidom.parse(sock)
84 sock.close()
85 attachmentid=0
86 for comment in dom.getElementsByTagName('thetext'):
87 commentText = comment.firstChild.nodeValue
88 match = re.search(r".*Created an attachment \(id=([0-9]+)\)", commentText)
89 if not match:
90 continue
92 attachmentid += 1
94 realAttachmentId = match.group(1)
95 handle = urlopen_retry(novellattach + realAttachmentId)
96 if not handle:
97 print "attachment %s is not accessible", realAttachmentId
98 continue
99 print " mimetype is",
101 remoteMime = handle.info().gettype()
102 print remoteMime,
103 if remoteMime != mimetype:
104 print "skipping"
105 continue
107 download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
108 print 'downloading as', download
109 f = open(download, 'w')
110 f.write(handle.read())
111 f.close()
113 def get_through_rpc_query(rpcurl, showurl, mimetype, prefix, suffix):
114 try:
115 proxy = xmlrpclib.ServerProxy(rpcurl)
116 query = dict()
117 query['column_list']='bug_id'
118 query['query_format']='advanced'
119 query['field0-0-0']='attachments.mimetype'
120 query['type0-0-0']='equals'
121 query['value0-0-0']=mimetype
122 result = proxy.Bug.search(query)
123 bugs = result['bugs']
124 print len(bugs), 'bugs to process'
125 for bug in bugs:
126 url = showurl + str(bug['id'])
127 get_from_bug_url_via_xml(url, mimetype, prefix, suffix)
128 except xmlrpclib.Fault, err:
129 print "A fault occurred"
130 print "Fault code: %s" % err.faultCode
131 print err.faultString
133 def get_through_rss_query_url(url, mimetype, prefix, suffix):
134 try:
135 os.mkdir(suffix)
136 except:
137 pass
138 d = feedparser.parse(url)
140 #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla
141 #get_novell_bug_via_xml function is a workaround for that situation
142 get_bug_function = get_novell_bug_via_xml if prefix == "novell" else get_from_bug_url_via_xml
144 for entry in d['entries']:
145 try:
146 get_bug_function(entry['id'], mimetype, prefix, suffix)
147 except:
148 print entry['id'], "failed:", sys.exc_info()[0]
149 pass
151 def get_through_rss_query(queryurl, mimetype, prefix, suffix):
152 url = queryurl + '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype) + '&ctype=rss'
153 print 'url is', url
154 get_through_rss_query_url(url, mimetype, prefix, suffix)
156 def get_launchpad_bugs(prefix):
157 #launchpadlib python module is required to download launchpad attachments
158 from launchpadlib.launchpad import Launchpad
160 launchpad = Launchpad.login_anonymously("attachmentdownload", "production")
161 ubuntu = launchpad.distributions["ubuntu"]
163 #since searching bugs having attachments with specific mimetypes is not available in launchpad API
164 #we're iterating over all bugs of the libreoffice source package
165 libo = ubuntu.getSourcePackage(name="libreoffice")
166 libobugs = libo.getBugTasks()
168 for bugtask in libobugs:
169 bug = bugtask.bug
170 id = str(bug.id)
171 print "parsing ", id, "status:", bugtask.status, "title:", bug.title[:50]
172 attachmentid = 0
173 for attachment in bug.attachments:
174 attachmentid += 1
175 handle = attachment.data.open()
176 if not handle.content_type in mimetypes:
177 #print "skipping"
178 continue
180 suffix = mimetypes[handle.content_type]
181 if not os.path.isdir(suffix):
182 try:
183 os.mkdir(suffix)
184 except:
185 pass
187 download = suffix + '/' + prefix + id + '-' + str(attachmentid) + '.' + suffix
189 if os.path.isfile(download):
190 print "assuming", id, "is up to date"
191 break
193 print 'mimetype is', handle.content_type, 'downloading as', download
195 f = open(download, "w")
196 f.write(handle.read())
197 f.close()
199 freedesktop = 'http://bugs.freedesktop.org/buglist.cgi'
200 abisource = 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
201 gnome = 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric
202 kde = 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra
203 openoffice = 'https://issues.apache.org/ooo/buglist.cgi'
204 redhatrpc = 'https://bugzilla.redhat.com/xmlrpc.cgi'
205 redhatbug = 'https://bugzilla.redhat.com/show_bug.cgi?id='
206 mozilla = 'https://bugzilla.mozilla.org/buglist.cgi'
208 #Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc.
209 #As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually
210 #python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login
211 #system is a nightmare
212 novellattach = 'https://bugzilla.novell.com/attachment.cgi?id='
213 novell = 'https://bugzilla.novell.com/buglist.cgi'
215 mimetypes = {
216 # ODF
217 'application/vnd.oasis.opendocument.base': 'odb',
218 'application/vnd.oasis.opendocument.database': 'odb',
219 'application/vnd.oasis.opendocument.chart': 'odc',
220 'application/vnd.oasis.opendocument.chart-template': 'otc',
221 'application/vnd.oasis.opendocument.formula': 'odf',
222 'application/vnd.oasis.opendocument.formula-template': 'otf',
223 'application/vnd.oasis.opendocument.graphics': 'odg',
224 'application/vnd.oasis.opendocument.graphics-template': 'otg',
225 'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg',
226 'application/vnd.oasis.opendocument.presentation': 'odp',
227 'application/vnd.oasis.opendocument.presentation-template': 'otp',
228 'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp',
229 'application/vnd.oasis.opendocument.spreadsheet': 'ods',
230 'application/vnd.oasis.opendocument.spreadsheet-template': 'ots',
231 'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods',
232 'application/vnd.oasis.opendocument.text': 'odt',
233 'application/vnd.oasis.opendocument.text-flat-xml': 'fodt',
234 'application/vnd.oasis.opendocument.text-master': 'odm',
235 'application/vnd.oasis.opendocument.text-template': 'ott',
236 'application/vnd.oasis.opendocument.text-web': 'oth',
237 # OOo XML
238 'application/vnd.sun.xml.base': 'odb',
239 'application/vnd.sun.xml.calc': 'sxc',
240 'application/vnd.sun.xml.calc.template': 'stc',
241 'application/vnd.sun.xml.chart': 'sxs',
242 'application/vnd.sun.xml.draw': 'sxd',
243 'application/vnd.sun.xml.draw.template': 'std',
244 'application/vnd.sun.xml.impress': 'sxi',
245 'application/vnd.sun.xml.impress.template': 'sti',
246 'application/vnd.sun.xml.math': 'sxm',
247 'application/vnd.sun.xml.writer': 'sxw',
248 'application/vnd.sun.xml.writer.global': 'sxg',
249 'application/vnd.sun.xml.writer.template': 'stw',
250 'application/vnd.sun.xml.writer.web': 'stw',
251 # MSO
252 'application/rtf': 'rtf',
253 'text/rtf': 'rtf',
254 'application/msword': 'doc',
255 'application/vnd.ms-powerpoint': 'ppt',
256 'application/vnd.ms-excel': 'xls',
257 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
258 'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
259 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
260 'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
261 'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
262 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
263 'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
264 'application/vnd.visio': 'vsd',
265 'application/vnd.visio.xml': 'vdx',
266 'application/x-mspublisher': 'pub',
267 # W3C
268 'application/xhtml+xml': 'xhtml',
269 'application/mathml+xml': 'mml',
270 'text/html': 'html',
271 'application/docbook+xml': 'docbook',
272 # misc
273 'text/spreadsheet': 'slk',
274 'application/vnd.corel-draw': 'cdr',
275 'application/vnd.lotus-wordpro': 'lwp',
276 'application/vnd.lotus-1-2-3': 'wks',
277 'application/vnd.wordperfect': 'wpd',
278 'application/vnd.ms-works': 'wps',
279 'application/x-hwp': 'hwp',
280 'application/x-aportisdoc': 'pdb',
281 'application/x-pocket-word': 'psw',
282 'application/x-t602': '602',
283 # binfilter
284 'application/x-starcalc': 'sdc',
285 'application/vnd.stardivision.calc': 'sdc5',
286 'application/x-starchart': 'sds',
287 'application/vnd.stardivision.chart': 'sds5',
288 'application/x-stardraw': 'sdd_d',
289 'application/vnd.stardivision.draw': 'sda5',
290 'application/x-starimpress': 'sdd_i',
291 'application/vnd.stardivision.impress': 'sdd5',
292 'application/vnd.stardivision.impress-packed': 'sdp5',
293 'application/x-starmath': 'smf',
294 'application/vnd.stardivision.math': 'smf5',
295 'application/x-starwriter': 'sdw',
296 'application/vnd.stardivision.writer': 'sdw5',
297 'application/vnd.stardivision.writer-global': 'sgl5',
298 # relatively uncommon image mimetypes
299 'image/cgm': 'cgm',
300 'image/tiff': 'tiff',
301 'image/vnd.dxf': 'dxf',
302 'image/x-emf': 'emf',
303 'image/x-targa': 'tga',
304 'image/x-sgf': 'sgf',
305 'image/x-svm': 'svm',
306 'image/x-wmf': 'wmf',
307 'image/x-pict': 'pict',
310 # disabled for now, this would download gigs of pngs/jpegs...
311 common_noncore_mimetypes = [
312 # graphics
313 ('image/svg+xml', 'svg'),
314 ('image/x-MS-bmp', 'bmp'),
315 ('image/x-wpg', 'wpg'),
316 ('image/x-eps', 'eps'),
317 ('image/x-met', 'met'),
318 ('image/x-portable-bitmap', 'pbm'),
319 ('image/x-photo-cd', 'pcd'),
320 ('image/x-pcx', 'pcx'),
321 ('image/x-portable-graymap', 'pgm'),
322 ('image/x-portable-pixmap', 'ppm'),
323 ('image/vnd.adobe.photoshop', 'psd'),
324 ('image/x-cmu-raster', 'ras'),
325 ('image/x-xbitmap', 'xbm'),
326 ('image/x-xpixmap', 'xpm'),
327 ('image/gif', 'gif'),
328 ('image/jpeg', 'jpeg'),
329 ('image/png', 'png'),
330 # pdf, etc.
331 ('application/pdf', 'pdf'),
334 for (mimetype,extension) in mimetypes.items():
335 get_through_rss_query(freedesktop, mimetype, "fdo", extension)
337 for (mimetype,extension) in mimetypes.items():
338 get_through_rpc_query(redhatrpc, redhatbug, mimetype, "rhbz", extension)
340 for (mimetype,extension) in mimetypes.items():
341 get_through_rss_query(openoffice, mimetype, "ooo", extension)
343 for (mimetype,extension) in mimetypes.items():
344 get_through_rss_query(novell, mimetype, "novell", extension)
346 for (mimetype,extension) in mimetypes.items():
347 get_through_rss_query(gnome, mimetype, "gnome", extension)
349 for (mimetype,extension) in mimetypes.items():
350 get_through_rss_query(abisource, mimetype, "abi", extension)
352 for (mimetype,extension) in mimetypes.items():
353 get_through_rss_query(kde, mimetype, "kde", extension)
355 try:
356 get_launchpad_bugs("lp")
357 except ImportError:
358 print "launchpadlib unavailable, skipping Ubuntu tracker"
360 # vim:set shiftwidth=4 softtabstop=4 expandtab: