python-pathvalidate: bump version to 0.14.1
[buildroot-gz.git] / support / scripts / scanpypi
blobe150ae588d2baeced47bc5b3c85299772fcf36b5
1 #!/usr/bin/python2
2 """
4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
7 errors.
8 """
9 from __future__ import print_function
10 import argparse
11 import json
12 import urllib2
13 import sys
14 import os
15 import shutil
16 import StringIO
17 import tarfile
18 import zipfile
19 import errno
20 import hashlib
21 import re
22 import textwrap
23 import tempfile
24 import imp
25 from functools import wraps
27 def setup_decorator(func, method):
28     """
29     Decorator for distutils.core.setup and setuptools.setup.
30     Puts the arguments with which setup is called as a dict
31     Add key 'method' which should be either 'setuptools' or 'distutils'.
33     Keyword arguments:
34     func -- either setuptools.setup or distutils.core.setup
35     method -- either 'setuptools' or 'distutils'
36     """
38     @wraps(func)
39     def closure(*args, **kwargs):
40         # Any python packages calls its setup function to be installed.
41         # Argument 'name' of this setup function is the package's name
42         BuildrootPackage.setup_args[kwargs['name']] = kwargs
43         BuildrootPackage.setup_args[kwargs['name']]['method'] = method
44     return closure
46 # monkey patch
47 import setuptools
48 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
49 import distutils
50 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
52 def find_file_upper_case(filenames, path='./'):
53     """
54     List generator:
55     Recursively find files that matches one of the specified filenames.
56     Returns a relative path starting with path argument.
58     Keyword arguments:
59     filenames -- List of filenames to be found
60     path -- Path to the directory to search
61     """
62     for root, dirs, files in os.walk(path):
63         for file in files:
64             if file.upper() in filenames:
65                 yield (os.path.join(root, file))
68 def pkg_buildroot_name(pkg_name):
69     """
70     Returns the Buildroot package name for the PyPI package pkg_name.
71     Remove all non alphanumeric characters except -
72     Also lowers the name and adds 'python-' suffix
74     Keyword arguments:
75     pkg_name -- String to rename
76     """
77     name = re.sub('[^\w-]', '', pkg_name.lower())
78     prefix = 'python-'
79     pattern = re.compile('^(?!' + prefix + ')(.+?)$')
80     name = pattern.sub(r'python-\1', name)
81     return name
83 class DownloadFailed(Exception):
84     pass
86 class BuildrootPackage():
87     """This class's methods are not meant to be used individually please
88     use them in the correct order:
90     __init__
92     download_package
94     extract_package
96     load_module
98     get_requirements
100     create_package_mk
102     create_hash_file
104     create_config_in
106     """
107     setup_args = {}
109     def __init__(self, real_name, pkg_folder):
110         self.real_name = real_name
111         self.buildroot_name = pkg_buildroot_name(self.real_name)
112         self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
113         self.mk_name = self.buildroot_name.upper().replace('-', '_')
114         self.as_string = None
115         self.md5_sum = None
116         self.metadata = None
117         self.metadata_name = None
118         self.metadata_url = None
119         self.pkg_req = None
120         self.setup_metadata = None
121         self.tmp_extract = None
122         self.used_url = None
123         self.filename = None
124         self.url = None
125         self.version = None
127     def fetch_package_info(self):
128         """
129         Fetch a package's metadata from the python package index
130         """
131         self.metadata_url = 'https://pypi.python.org/pypi/{pkg}/json'.format(
132             pkg=self.real_name)
133         try:
134             pkg_json = urllib2.urlopen(self.metadata_url).read().decode()
135         except urllib2.HTTPError as error:
136             print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
137             print('ERROR: Could not find package {pkg}.\n'
138                   'Check syntax inside the python package index:\n'
139                   'https://pypi.python.org/pypi/ '
140                   .format(pkg=self.real_name))
141             raise
142         except urllib2.URLError:
143             print('ERROR: Could not find package {pkg}.\n'
144                   'Check syntax inside the python package index:\n'
145                   'https://pypi.python.org/pypi/ '
146                   .format(pkg=self.real_name))
147             raise
148         self.metadata = json.loads(pkg_json)
149         self.version = self.metadata['info']['version']
150         self.metadata_name = self.metadata['info']['name']
152     def download_package(self):
153         """
154         Download a package using metadata from pypi
155         """
156         try:
157             self.metadata['urls'][0]['filename']
158         except IndexError:
159             print(
160                 'Non-conventional package, ',
161                 'please check carefully after creation')
162             self.metadata['urls'] = [{
163                 'packagetype': 'sdist',
164                 'url': self.metadata['info']['download_url'],
165                 'md5_digest': None}]
166             # In this case, we can't get the name of the downloaded file
167             # from the pypi api, so we need to find it, this should work
168             urlpath = urllib2.urlparse.urlparse(
169                 self.metadata['info']['download_url']).path
170             # urlparse().path give something like
171             # /path/to/file-version.tar.gz
172             # We use basename to remove /path/to
173             self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
174         for download_url in self.metadata['urls']:
175             if 'bdist' in download_url['packagetype']:
176                 continue
177             try:
178                 print('Downloading package {pkg} from {url}...'.format(
179                       pkg=self.real_name, url=download_url['url']))
180                 download = urllib2.urlopen(download_url['url'])
181             except urllib2.HTTPError as http_error:
182                 download = http_error
183             else:
184                 self.used_url = download_url
185                 self.as_string = download.read()
186                 if not download_url['md5_digest']:
187                     break
188                 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
189                 if self.md5_sum == download_url['md5_digest']:
190                     break
191         else:
192             if download.__class__ == urllib2.HTTPError:
193                 raise download
194             raise DownloadFailed('Failed to downloas package {pkg}'
195                                  .format(pkg=self.real_name))
196         self.filename = self.used_url['filename']
197         self.url = self.used_url['url']
199     def extract_package(self, tmp_path):
200         """
201         Extract the package contents into a directrory
203         Keyword arguments:
204         tmp_path -- directory where you want the package to be extracted
205         """
206         as_file = StringIO.StringIO(self.as_string)
207         if self.filename[-3:] == 'zip':
208             with zipfile.ZipFile(as_file) as as_zipfile:
209                 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
210                 try:
211                     os.makedirs(tmp_pkg)
212                 except OSError as exception:
213                     if exception.errno != errno.EEXIST:
214                         print("ERROR: ", exception.message, file=sys.stderr)
215                         return None, None
216                     print('WARNING:', exception.message, file=sys.stderr)
217                     print('Removing {pkg}...'.format(pkg=tmp_pkg))
218                     shutil.rmtree(tmp_pkg)
219                     os.makedirs(tmp_pkg)
220                 as_zipfile.extractall(tmp_pkg)
221         else:
222             with tarfile.open(fileobj=as_file) as as_tarfile:
223                 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
224                 try:
225                     os.makedirs(tmp_pkg)
226                 except OSError as exception:
227                     if exception.errno != errno.EEXIST:
228                         print("ERROR: ", exception.message, file=sys.stderr)
229                         return None, None
230                     print('WARNING:', exception.message, file=sys.stderr)
231                     print('Removing {pkg}...'.format(pkg=tmp_pkg))
232                     shutil.rmtree(tmp_pkg)
233                     os.makedirs(tmp_pkg)
234                 as_tarfile.extractall(tmp_pkg)
236         tmp_extract = '{folder}/{name}-{version}'
237         self.tmp_extract = tmp_extract.format(
238             folder=tmp_pkg,
239             name=self.metadata_name,
240             version=self.version)
242     def load_setup(self):
243         """
244         Loads the corresponding setup and store its metadata
245         """
246         current_dir = os.getcwd()
247         os.chdir(self.tmp_extract)
248         sys.path.append(self.tmp_extract)
249         s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
250         setup = imp.load_module('setup', s_file, s_path, s_desc)
251         try:
252             self.setup_metadata = self.setup_args[self.metadata_name]
253         except KeyError:
254             # This means setup was not called which most likely mean that it is
255             # called through the if __name__ == '__main__' directive.
256             # In this case, we can only pray that it is called through a
257             # function called main() in setup.py.
258             setup.main([]) # Will raise AttributeError if not found
259             self.setup_metadata = self.setup_args[self.metadata_name]
260         # Here we must remove the module the hard way.
261         # We must do this because of a very sepcific case: if a package calls
262         # setup from the __main__ but does not come with a 'main()' function,
263         # for some reason setup.main([]) will successfully call the main
264         # function of a previous package...
265         sys.modules.pop('setup',None)
266         del setup
267         os.chdir(current_dir)
268         sys.path.remove(self.tmp_extract)
270     def get_requirements(self, pkg_folder):
271         """
272         Retrieve dependencies from the metadata found in the setup.py script of
273         a pypi package.
275         Keyword Arguments:
276         pkg_folder -- location of the already created packages
277         """
278         if 'install_requires' not in self.setup_metadata:
279             self.pkg_req = None
280             return set()
281         self.pkg_req = self.setup_metadata['install_requires']
282         self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
283                         for req in self.pkg_req]
284         req_not_found = self.pkg_req
285         self.pkg_req = map(pkg_buildroot_name, self.pkg_req)
286         pkg_tuples = zip(req_not_found, self.pkg_req)
287         # pkg_tuples is a list of tuples that looks like
288         # ('werkzeug','python-werkzeug') because I need both when checking if
289         # dependencies already exist or are already in the download list
290         req_not_found = set(
291             pkg[0] for pkg in pkg_tuples
292             if not os.path.isdir(pkg[1])
293             )
294         return req_not_found
296     def __create_mk_header(self):
297         """
298         Create the header of the <package_name>.mk file
299         """
300         header = ['#' * 80 + '\n']
301         header.append('#\n')
302         header.append('# {name}\n'.format(name=self.buildroot_name))
303         header.append('#\n')
304         header.append('#' * 80 + '\n')
305         header.append('\n')
306         return header
308     def __create_mk_download_info(self):
309         """
310         Create the lines refering to the download information of the
311         <package_name>.mk file
312         """
313         lines = []
314         version_line = '{name}_VERSION = {version}\n'.format(
315             name=self.mk_name,
316             version=self.version)
317         lines.append(version_line)
319         targz = self.filename.replace(
320             self.version,
321             '$({name}_VERSION)'.format(name=self.mk_name))
322         targz_line = '{name}_SOURCE = {filename}\n'.format(
323             name=self.mk_name,
324             filename=targz)
325         lines.append(targz_line)
327         if self.filename not in self.url:
328             # Sometimes the filename is in the url, sometimes it's not
329             site_url = self.url
330         else:
331             site_url = self.url[:self.url.find(self.filename)]
332         site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
333                                                  url=site_url)
334         site_line = site_line.rstrip('/') + '\n'
335         lines.append(site_line)
336         return lines
338     def __create_mk_setup(self):
339         """
340         Create the line refering to the setup method of the package of the
341         <package_name>.mk file
343         There are two things you can use to make an installer
344         for a python package: distutils or setuptools
345         distutils comes with python but does not support dependencies.
346         distutils is mostly still there for backward support.
347         setuptools is what smart people use,
348         but it is not shipped with python :(
349         """
350         lines = []
351         setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
352             name=self.mk_name,
353             method=self.setup_metadata['method'])
354         lines.append(setup_type_line)
355         return lines
357     def __create_mk_license(self):
358         """
359         Create the lines referring to the package's license informations of the
360         <package_name>.mk file
362         The license is found using the metadata from pypi.
363         In the metadata, the license can be found either with standard names in
364         the classifiers part or with naming from the packager in the "License"
365         part.
367         From the classifiers, the license is "translated" according to
368         buildroot standards if need be (i.e. from Apache Software License to
369         Apache-2.0).
371         From the License part, we cannot guess what formatting the packager
372         used. Hence, it is likely to be incorrect. (i.e. Apache License 2.0
373         instead of Apache-2.0).
375         The license's files are found by searching the package for files named
376         license or license.txt (case insensitive).
377         If more than one license file is found, the user is asked to select
378         which ones he wants to use.
379         """
380         license_dict = {
381             'Apache Software License': 'Apache-2.0',
382             'BSD License': 'BSD',
383             'European Union Public Licence 1.0': 'EUPLv1.0',
384             'European Union Public Licence 1.1': 'EUPLv1.1',
385             "GNU General Public License": "GPL",
386             "GNU General Public License v2": "GPLv2",
387             "GNU General Public License v2 or later": "GPLv2+",
388             "GNU General Public License v3": "GPLv3",
389             "GNU General Public License v3 or later": "GPLv3+",
390             "GNU Lesser General Public License v2": "LGPLv2.1",
391             "GNU Lesser General Public License v2 or later": "LGPLv2.1+",
392             "GNU Lesser General Public License v3": "LGPLv3",
393             "GNU Lesser General Public License v3 or later": "LGPLv3+",
394             "GNU Library or Lesser General Public License": "LGPLv2",
395             "ISC License": "ISC",
396             "MIT License": "MIT",
397             "Mozilla Public License 1.0": "MPL-1.0",
398             "Mozilla Public License 1.1": "MPL-1.1",
399             "Mozilla Public License 2.0": "MPL-2.0",
400             "Zope Public License": "ZPL"
401             }
402         regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
403         classifiers_licenses = [regexp.sub(r"\1", lic)
404                                 for lic in self.metadata['info']['classifiers']
405                                 if regexp.match(lic)]
406         licenses = map(lambda x: license_dict[x] if x in license_dict else x,
407                        classifiers_licenses)
408         lines = []
409         if not len(licenses):
410             print('WARNING: License has been set to "{license}". It is most'
411                   ' likely wrong, please change it if need be'.format(
412                       license=', '.join(licenses)))
413             licenses = [self.metadata['info']['license']]
414         license_line = '{name}_LICENSE = {license}\n'.format(
415             name=self.mk_name,
416             license=', '.join(licenses))
417         lines.append(license_line)
419         filenames = ['LICENCE', 'LICENSE', 'LICENSE.TXT', 'COPYING',
420                      'COPYING.TXT']
421         license_files = list(find_file_upper_case(filenames, self.tmp_extract))
422         license_files = [license.replace(self.tmp_extract, '')[1:]
423                          for license in license_files]
424         if len(license_files) > 0:
425             if len(license_files) > 1:
426                 print('More than one file found for license:',
427                       ', '.join(license_files))
428             license_files = [filename
429                              for index, filename in enumerate(license_files)]
430             license_file_line = ('{name}_LICENSE_FILES ='
431                                  ' {files}\n'.format(
432                                      name=self.mk_name,
433                                      files=' '.join(license_files)))
434             lines.append(license_file_line)
435         else:
436             print('WARNING: No license file found,'
437                   ' please specify it manually afterwards')
438             license_file_line = '# No license file found\n'
440         return lines
442     def __create_mk_requirements(self):
443         """
444         Create the lines referring to the dependencies of the of the
445         <package_name>.mk file
447         Keyword Arguments:
448         pkg_name -- name of the package
449         pkg_req -- dependencies of the package
450         """
451         lines = []
452         dependencies_line = ('{name}_DEPENDENCIES ='
453                              ' {reqs}\n'.format(
454                                  name=self.mk_name,
455                                  reqs=' '.join(self.pkg_req)))
456         lines.append(dependencies_line)
457         return lines
459     def create_package_mk(self):
460         """
461         Create the lines corresponding to the <package_name>.mk file
462         """
463         pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
464         path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
465         print('Creating {file}...'.format(file=path_to_mk))
466         lines = self.__create_mk_header()
467         lines += self.__create_mk_download_info()
468         lines += self.__create_mk_setup()
469         lines += self.__create_mk_license()
471         lines.append('\n')
472         lines.append('$(eval $(python-package))')
473         lines.append('\n')
474         with open(path_to_mk, 'w') as mk_file:
475             mk_file.writelines(lines)
477     def create_hash_file(self):
478         """
479         Create the lines corresponding to the <package_name>.hash files
480         """
481         pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
482         path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
483         print('Creating {filename}...'.format(filename=path_to_hash))
484         lines = []
485         if self.used_url['md5_digest']:
486             md5_comment = '# md5 from {url}, sha256 locally computed\n'.format(
487                 url=self.metadata_url)
488             lines.append(md5_comment)
489             hash_line = '{method}\t{digest}  {filename}\n'.format(
490                 method='md5',
491                 digest=self.used_url['md5_digest'],
492                 filename=self.filename)
493             lines.append(hash_line)
494         digest = hashlib.sha256(self.as_string).hexdigest()
495         hash_line = '{method}\t{digest}  {filename}\n'.format(
496             method='sha256',
497             digest=digest,
498             filename=self.filename)
499         lines.append(hash_line)
501         with open(path_to_hash, 'w') as hash_file:
502             hash_file.writelines(lines)
504     def create_config_in(self):
505         """
506         Creates the Config.in file of a package
507         """
508         path_to_config = os.path.join(self.pkg_dir, 'Config.in')
509         print('Creating {file}...'.format(file=path_to_config))
510         lines = []
511         config_line = 'config BR2_PACKAGE_{name}\n'.format(
512             name=self.mk_name)
513         lines.append(config_line)
515         bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
516         lines.append(bool_line)
517         if self.pkg_req:
518             for dep in self.pkg_req:
519                 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
520                     req=dep.upper().replace('-', '_'))
521                 lines.append(dep_line)
523         lines.append('\thelp\n')
525         help_lines = textwrap.wrap(self.metadata['info']['summary'],
526                                    initial_indent='\t  ',
527                                    subsequent_indent='\t  ')
529         # make sure a help text is terminated with a full stop
530         if help_lines[-1][-1] != '.':
531             help_lines[-1] += '.'
533         # \t + two spaces is 3 char long
534         help_lines.append('')
535         help_lines.append('\t  ' + self.metadata['info']['home_page'])
536         help_lines = map(lambda x: x + '\n', help_lines)
537         lines += help_lines
539         with open(path_to_config, 'w') as config_file:
540             config_file.writelines(lines)
543 def main():
544     # Building the parser
545     parser = argparse.ArgumentParser(
546         description="Creates buildroot packages from the metadata of "
547                     "an existing PyPI packages and include it "
548                     "in menuconfig")
549     parser.add_argument("packages",
550                         help="list of packages to be created",
551                         nargs='+')
552     parser.add_argument("-o", "--output",
553                         help="""
554                         Output directory for packages.
555                         Default is ./package
556                         """,
557                         default='./package')
559     args = parser.parse_args()
560     packages = list(set(args.packages))
562     # tmp_path is where we'll extract the files later
563     tmp_prefix = 'scanpypi-'
564     pkg_folder = args.output
565     tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
566     try:
567         for real_pkg_name in packages:
568             package = BuildrootPackage(real_pkg_name, pkg_folder)
569             print('buildroot package name for {}:'.format(package.real_name),
570                   package.buildroot_name)
571             # First we download the package
572             # Most of the info we need can only be found inside the package
573             print('Package:', package.buildroot_name)
574             print('Fetching package', package.real_name)
575             try:
576                 package.fetch_package_info()
577             except (urllib2.URLError, urllib2.HTTPError):
578                 continue
579             if package.metadata_name.lower() == 'setuptools':
580                 # setuptools imports itself, that does not work very well
581                 # with the monkey path at the begining
582                 print('Error: setuptools cannot be built using scanPyPI')
583                 continue
585             try:
586                 package.download_package()
587             except urllib2.HTTPError as error:
588                 print('Error: {code} {reason}'.format(code=error.code,
589                                                       reason=error.reason))
590                 print('Error downloading package :', package.buildroot_name)
591                 print()
592                 continue
594             # extract the tarball
595             try:
596                 package.extract_package(tmp_path)
597             except (tarfile.ReadError, zipfile.BadZipfile):
598                 print('Error extracting package {}'.format(package.real_name))
599                 print()
600                 continue
602             # Loading the package install info from the package
603             try:
604                 package.load_setup()
605             except ImportError as err:
606                 if 'buildutils' in err.message:
607                     print('This package needs buildutils')
608                 else:
609                     raise
610                 continue
611             except AttributeError:
612                 print('Error: Could not install package {pkg}'.format(
613                     pkg=package.real_name))
614                 continue
616             # Package requirement are an argument of the setup function
617             req_not_found = package.get_requirements(pkg_folder)
618             req_not_found = req_not_found.difference(packages)
620             packages += req_not_found
621             if req_not_found:
622                 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
623                       .format(pkgs=", ".join(req_not_found),
624                               pkg=package.buildroot_name))
625             print('Checking if package {name} already exists...'.format(
626                 name=package.pkg_dir))
627             try:
628                 os.makedirs(package.pkg_dir)
629             except OSError as exception:
630                 if exception.errno != errno.EEXIST:
631                     print("ERROR: ", exception.message, file=sys.stderr)
632                     continue
633                 print('Error: Package {name} already exists'
634                       .format(name=package.pkg_dir))
635                 del_pkg = raw_input(
636                     'Do you want to delete existing package ? [y/N]')
637                 if del_pkg.lower() == 'y':
638                     shutil.rmtree(package.pkg_dir)
639                     os.makedirs(package.pkg_dir)
640                 else:
641                     continue
642             package.create_package_mk()
644             package.create_hash_file()
646             package.create_config_in()
647             print()
648             # printing an empty line for visual confort
649     finally:
650         shutil.rmtree(tmp_path)
652 if __name__ == "__main__":
653     main()