2 # -*- coding: utf-8 -*-
4 # This file is part of the LibreOffice project.
6 # This Source Code Form is subject to the terms of the Mozilla Public
7 # License, v. 2.0. If a copy of the MPL was not distributed with this
8 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 #This digs through a pile of bugzilla's and populates the cwd with a big
12 #collection of bug-docs in per-filetype dirs with bug-ids as names with
13 #prefixes to indicate which bug-tracker, e.g.
19 #where X is the n'th attachment of that type in the bug
28 from xml
.dom
import minidom
29 from xml
.sax
.saxutils
import escape
31 def urlopen_retry(url
):
33 for i
in range(maxretries
+ 1):
35 return urllib
.urlopen(url
)
37 print "caught IOError: ", e
42 def get_from_bug_url_via_xml(url
, mimetype
, prefix
, suffix
):
43 id = url
.rsplit('=', 2)[1]
44 print "id is", prefix
, id, suffix
45 if os
.path
.isfile(suffix
+ '/' + prefix
+ id + '-1.' + suffix
):
46 print "assuming", id, "is up to date"
49 sock
= urlopen_retry(url
+"&ctype=xml")
50 dom
= minidom
.parse(sock
)
53 for attachment
in dom
.getElementsByTagName('attachment'):
56 for node
in attachment
.childNodes
:
57 if node
.nodeName
== 'type':
58 print node
.firstChild
.nodeValue
,
59 if node
.firstChild
.nodeValue
.lower() != mimetype
.lower():
62 elif node
.nodeName
== 'data':
63 # check if attachment is deleted (i.e. https://bugs.kde.org/show_bug.cgi?id=53343&ctype=xml)
64 if not node
.firstChild
:
65 print 'deleted attachment, skipping'
68 download
= suffix
+ '/' +prefix
+ id + '-' + str(attachmentid
) + '.' + suffix
69 print 'downloading as', download
70 f
= open(download
, 'w')
71 f
.write(base64
.b64decode(node
.firstChild
.nodeValue
))
75 def get_novell_bug_via_xml(url
, mimetype
, prefix
, suffix
):
76 id = url
.rsplit('=', 2)[1]
77 print "id is", prefix
, id, suffix
78 if os
.path
.isfile(suffix
+ '/' + prefix
+ id + '-1.' + suffix
):
79 print "assuming", id, "is up to date"
82 sock
= urlopen_retry(url
+"&ctype=xml")
83 dom
= minidom
.parse(sock
)
86 for comment
in dom
.getElementsByTagName('thetext'):
87 commentText
= comment
.firstChild
.nodeValue
88 match
= re
.search(r
".*Created an attachment \(id=([0-9]+)\)", commentText
)
94 realAttachmentId
= match
.group(1)
95 handle
= urlopen_retry(novellattach
+ realAttachmentId
)
97 print "attachment %s is not accessible", realAttachmentId
101 remoteMime
= handle
.info().gettype()
103 if remoteMime
!= mimetype
:
107 download
= suffix
+ '/' + prefix
+ id + '-' + str(attachmentid
) + '.' + suffix
108 print 'downloading as', download
109 f
= open(download
, 'w')
110 f
.write(handle
.read())
113 def get_through_rpc_query(rpcurl
, showurl
, mimetype
, prefix
, suffix
):
115 proxy
= xmlrpclib
.ServerProxy(rpcurl
)
117 query
['column_list']='bug_id'
118 query
['query_format']='advanced'
119 query
['field0-0-0']='attachments.mimetype'
120 query
['type0-0-0']='equals'
121 query
['value0-0-0']=mimetype
122 result
= proxy
.Bug
.search(query
)
123 bugs
= result
['bugs']
124 print len(bugs
), 'bugs to process'
126 url
= showurl
+ str(bug
['id'])
127 get_from_bug_url_via_xml(url
, mimetype
, prefix
, suffix
)
128 except xmlrpclib
.Fault
, err
:
129 print "A fault occurred"
130 print "Fault code: %s" % err
.faultCode
131 print err
.faultString
133 def get_through_rss_query_url(url
, mimetype
, prefix
, suffix
):
138 d
= feedparser
.parse(url
)
140 #Getting detailed bug information and downloading an attachment body is not possible without logging in to Novell bugzilla
141 #get_novell_bug_via_xml function is a workaround for that situation
142 get_bug_function
= get_novell_bug_via_xml
if prefix
== "novell" else get_from_bug_url_via_xml
144 for entry
in d
['entries']:
146 get_bug_function(entry
['id'], mimetype
, prefix
, suffix
)
148 print entry
['id'], "failed:", sys
.exc_info()[0]
151 def get_through_rss_query(queryurl
, mimetype
, prefix
, suffix
):
152 url
= queryurl
+ '?query_format=advanced&field0-0-0=attachments.mimetype&type0-0-0=equals&value0-0-0=' + escape(mimetype
) + '&ctype=rss'
154 get_through_rss_query_url(url
, mimetype
, prefix
, suffix
)
156 def get_launchpad_bugs(prefix
):
157 #launchpadlib python module is required to download launchpad attachments
158 from launchpadlib
.launchpad
import Launchpad
160 launchpad
= Launchpad
.login_anonymously("attachmentdownload", "production")
161 ubuntu
= launchpad
.distributions
["ubuntu"]
163 #since searching bugs having attachments with specific mimetypes is not available in launchpad API
164 #we're iterating over all bugs of the libreoffice source package
165 libo
= ubuntu
.getSourcePackage(name
="libreoffice")
166 libobugs
= libo
.getBugTasks()
168 for bugtask
in libobugs
:
171 print "parsing ", id, "status:", bugtask
.status
, "title:", bug
.title
[:50]
173 for attachment
in bug
.attachments
:
175 handle
= attachment
.data
.open()
176 if not handle
.content_type
in mimetypes
:
180 suffix
= mimetypes
[handle
.content_type
]
181 if not os
.path
.isdir(suffix
):
187 download
= suffix
+ '/' + prefix
+ id + '-' + str(attachmentid
) + '.' + suffix
189 if os
.path
.isfile(download
):
190 print "assuming", id, "is up to date"
193 print 'mimetype is', handle
.content_type
, 'downloading as', download
195 f
= open(download
, "w")
196 f
.write(handle
.read())
199 freedesktop
= 'http://bugs.freedesktop.org/buglist.cgi'
200 abisource
= 'http://bugzilla.abisource.com/buglist.cgi' #added for abiword
201 gnome
= 'http://bugzilla.gnome.org/buglist.cgi' # added for gnumeric
202 kde
= 'http://bugs.kde.org/buglist.cgi' # added for koffice/calligra
203 openoffice
= 'https://issues.apache.org/ooo/buglist.cgi'
204 redhatrpc
= 'https://bugzilla.redhat.com/xmlrpc.cgi'
205 redhatbug
= 'https://bugzilla.redhat.com/show_bug.cgi?id='
206 mozilla
= 'https://bugzilla.mozilla.org/buglist.cgi'
208 #Novell Bugzilla requires users to log in in order to get details of the bugs such as attachment bodies etc.
209 #As a dirty workaround, we parse comments containing "Created an attachment (id=xxxxxx)" and download attachments manually
210 #python-bugzilla claims that it supports Novell bugzilla login but it's not working right now and novell bugzilla login
211 #system is a nightmare
212 novellattach
= 'https://bugzilla.novell.com/attachment.cgi?id='
213 novell
= 'https://bugzilla.novell.com/buglist.cgi'
217 'application/vnd.oasis.opendocument.base': 'odb',
218 'application/vnd.oasis.opendocument.database': 'odb',
219 'application/vnd.oasis.opendocument.chart': 'odc',
220 'application/vnd.oasis.opendocument.chart-template': 'otc',
221 'application/vnd.oasis.opendocument.formula': 'odf',
222 'application/vnd.oasis.opendocument.formula-template': 'otf',
223 'application/vnd.oasis.opendocument.graphics': 'odg',
224 'application/vnd.oasis.opendocument.graphics-template': 'otg',
225 'application/vnd.oasis.opendocument.graphics-flat-xml': 'fodg',
226 'application/vnd.oasis.opendocument.presentation': 'odp',
227 'application/vnd.oasis.opendocument.presentation-template': 'otp',
228 'application/vnd.oasis.opendocument.presentation-flat-xml': 'fodp',
229 'application/vnd.oasis.opendocument.spreadsheet': 'ods',
230 'application/vnd.oasis.opendocument.spreadsheet-template': 'ots',
231 'application/vnd.oasis.opendocument.spreadsheet-flat-xml': 'fods',
232 'application/vnd.oasis.opendocument.text': 'odt',
233 'application/vnd.oasis.opendocument.text-flat-xml': 'fodt',
234 'application/vnd.oasis.opendocument.text-master': 'odm',
235 'application/vnd.oasis.opendocument.text-template': 'ott',
236 'application/vnd.oasis.opendocument.text-web': 'oth',
238 'application/vnd.sun.xml.base': 'odb',
239 'application/vnd.sun.xml.calc': 'sxc',
240 'application/vnd.sun.xml.calc.template': 'stc',
241 'application/vnd.sun.xml.chart': 'sxs',
242 'application/vnd.sun.xml.draw': 'sxd',
243 'application/vnd.sun.xml.draw.template': 'std',
244 'application/vnd.sun.xml.impress': 'sxi',
245 'application/vnd.sun.xml.impress.template': 'sti',
246 'application/vnd.sun.xml.math': 'sxm',
247 'application/vnd.sun.xml.writer': 'sxw',
248 'application/vnd.sun.xml.writer.global': 'sxg',
249 'application/vnd.sun.xml.writer.template': 'stw',
250 'application/vnd.sun.xml.writer.web': 'stw',
252 'application/rtf': 'rtf',
254 'application/msword': 'doc',
255 'application/vnd.ms-powerpoint': 'ppt',
256 'application/vnd.ms-excel': 'xls',
257 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',
258 'application/vnd.openxmlformats-officedocument.spreadsheetml.template': 'xltx',
259 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',
260 'application/vnd.openxmlformats-officedocument.presentationml.template': 'ppotx',
261 'application/vnd.openxmlformats-officedocument.presentationml.slideshow': 'ppsx',
262 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',
263 'application/vnd.openxmlformats-officedocument.wordprocessingml.template': 'dotx',
264 'application/vnd.visio': 'vsd',
265 'application/vnd.visio.xml': 'vdx',
266 'application/x-mspublisher': 'pub',
268 'application/xhtml+xml': 'xhtml',
269 'application/mathml+xml': 'mml',
271 'application/docbook+xml': 'docbook',
273 'text/spreadsheet': 'slk',
274 'application/vnd.corel-draw': 'cdr',
275 'application/vnd.lotus-wordpro': 'lwp',
276 'application/vnd.lotus-1-2-3': 'wks',
277 'application/vnd.wordperfect': 'wpd',
278 'application/vnd.ms-works': 'wps',
279 'application/x-hwp': 'hwp',
280 'application/x-aportisdoc': 'pdb',
281 'application/x-pocket-word': 'psw',
282 'application/x-t602': '602',
284 'application/x-starcalc': 'sdc',
285 'application/vnd.stardivision.calc': 'sdc5',
286 'application/x-starchart': 'sds',
287 'application/vnd.stardivision.chart': 'sds5',
288 'application/x-stardraw': 'sdd_d',
289 'application/vnd.stardivision.draw': 'sda5',
290 'application/x-starimpress': 'sdd_i',
291 'application/vnd.stardivision.impress': 'sdd5',
292 'application/vnd.stardivision.impress-packed': 'sdp5',
293 'application/x-starmath': 'smf',
294 'application/vnd.stardivision.math': 'smf5',
295 'application/x-starwriter': 'sdw',
296 'application/vnd.stardivision.writer': 'sdw5',
297 'application/vnd.stardivision.writer-global': 'sgl5',
298 # relatively uncommon image mimetypes
300 'image/tiff': 'tiff',
301 'image/vnd.dxf': 'dxf',
302 'image/x-emf': 'emf',
303 'image/x-targa': 'tga',
304 'image/x-sgf': 'sgf',
305 'image/x-svm': 'svm',
306 'image/x-wmf': 'wmf',
307 'image/x-pict': 'pict',
310 # disabled for now, this would download gigs of pngs/jpegs...
311 common_noncore_mimetypes
= [
313 ('image/svg+xml', 'svg'),
314 ('image/x-MS-bmp', 'bmp'),
315 ('image/x-wpg', 'wpg'),
316 ('image/x-eps', 'eps'),
317 ('image/x-met', 'met'),
318 ('image/x-portable-bitmap', 'pbm'),
319 ('image/x-photo-cd', 'pcd'),
320 ('image/x-pcx', 'pcx'),
321 ('image/x-portable-graymap', 'pgm'),
322 ('image/x-portable-pixmap', 'ppm'),
323 ('image/vnd.adobe.photoshop', 'psd'),
324 ('image/x-cmu-raster', 'ras'),
325 ('image/x-xbitmap', 'xbm'),
326 ('image/x-xpixmap', 'xpm'),
327 ('image/gif', 'gif'),
328 ('image/jpeg', 'jpeg'),
329 ('image/png', 'png'),
331 ('application/pdf', 'pdf'),
334 for (mimetype
,extension
) in mimetypes
.items():
335 get_through_rss_query(freedesktop
, mimetype
, "fdo", extension
)
337 for (mimetype
,extension
) in mimetypes
.items():
338 get_through_rpc_query(redhatrpc
, redhatbug
, mimetype
, "rhbz", extension
)
340 for (mimetype
,extension
) in mimetypes
.items():
341 get_through_rss_query(openoffice
, mimetype
, "ooo", extension
)
343 for (mimetype
,extension
) in mimetypes
.items():
344 get_through_rss_query(novell
, mimetype
, "novell", extension
)
346 for (mimetype
,extension
) in mimetypes
.items():
347 get_through_rss_query(gnome
, mimetype
, "gnome", extension
)
349 for (mimetype
,extension
) in mimetypes
.items():
350 get_through_rss_query(abisource
, mimetype
, "abi", extension
)
352 for (mimetype
,extension
) in mimetypes
.items():
353 get_through_rss_query(kde
, mimetype
, "kde", extension
)
356 get_launchpad_bugs("lp")
358 print "launchpadlib unavailable, skipping Ubuntu tracker"
360 # vim:set shiftwidth=4 softtabstop=4 expandtab: