pkgs/applications/office/libreoffice/generate-libreoffice-srcs.py

   1 #!/usr/bin/env python3
   2
   3 """
   4 Converts the LibreOffice `download.lst` file into a Nix expression.
   5
   6 Requires an environment variable named `downloadList` identifying the path
   7 of the input file, and writes the result to stdout.
   8
   9 todo - Ideally we would move as much as possible into derivation dependencies.
  10 """
  11 import collections, itertools, json, re, subprocess, sys, os
  12 import urllib.request, urllib.error
  13
  14 def main():
  15
  16     packages = list(get_packages())
  17
  18     for x in packages:
  19         print(x, file=sys.stderr)
  20
  21     print('[')
  22
  23     for x in packages:
  24
  25         md5 = x['md5']
  26         upstream_sha256 = x['sha256']
  27         if upstream_sha256:
  28             hash = upstream_sha256
  29             hashtype = 'sha256'
  30         else:
  31             hash = md5
  32             hashtype = 'md5'
  33         tarball = x['tarball']
  34
  35         url = construct_url(x)
  36         print('url: {}'.format(url), file=sys.stderr)
  37
  38         path = download(url, tarball, hash, hashtype)
  39         print('path: {}'.format(path), file=sys.stderr)
  40
  41         sha256 = get_sha256(path)
  42         print('sha256: {}'.format(sha256), file=sys.stderr)
  43
  44         print('  {')
  45         print('    name = "{}";'.format(tarball))
  46         print('    url = "{}";'.format(url))
  47         print('    sha256 = "{}";'.format(sha256))
  48         print('    md5 = "{}";'.format(md5))
  49         print('    md5name = "{}-{}";'.format(md5 or upstream_sha256,tarball))
  50         print('  }')
  51
  52     print(']')
  53
  54
  55 def construct_url(x):
  56     if x['brief']:
  57         url = 'https://dev-www.libreoffice.org/src/{}{}'.format(
  58             x.get('subdir', ''), x['tarball'])
  59     else:
  60         url = 'https://dev-www.libreoffice.org/src/{}{}-{}'.format(
  61             x.get('subdir', ''), x['md5'], x['tarball'])
  62
  63     if x['name'].startswith('FONT_NOTO_') and not probe_url(url):
  64         return 'https://noto-website-2.storage.googleapis.com/pkgs/{}'.format(x['tarball'])
  65
  66     if x['name'] == 'FONT_OPENDYSLEXIC':
  67         return 'https://github.com/antijingoist/opendyslexic/releases/download/v0.91.12/{}'.format(x['tarball'])
  68
  69     return url
  70
  71
  72 def probe_url(url: str) -> bool:
  73     request = urllib.request.Request(url, method='HEAD')
  74     try:
  75         with urllib.request.urlopen(request) as response:
  76             return response.status == 200
  77     except urllib.error.HTTPError as e:
  78         return False
  79
  80
  81 def download(url, name, hash, hashtype):
  82     cmd = ['nix-prefetch-url', url, hash, '--print-path',
  83            '--type', hashtype, '--name', name]
  84     proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
  85                           universal_newlines=True)
  86     return proc.stdout.split('\n')[1].strip()
  87
  88
  89 def get_sha256(path):
  90     cmd = ['sha256sum', path]
  91     proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
  92                           universal_newlines=True)
  93     return proc.stdout.split(' ')[0].strip()
  94
  95
  96 def get_packages():
  97     """
  98     All of the package data: What's parsed from download.lst,
  99     plus our additions.
 100     """
 101     return apply_additions(get_packages_from_download_list(),
 102                            get_additions())
 103
 104
 105 def get_additions():
 106     """
 107     A mapping from package name (the all-caps identifiers used in
 108     `download.lst`) to a dict of additional attributes to set on the package.
 109     """
 110     with open('./libreoffice-srcs-additions.json') as f:
 111         return json.load(f)
 112
 113
 114 def apply_additions(xs, additions):
 115     for x in xs:
 116         yield dict_merge([x,
 117                           additions.get(x['name'], {})])
 118
 119
 120 def get_packages_from_download_list():
 121     """
 122     The result of parsing `download.lst`: A list of dicts containing keys
 123     'name', 'tarball', 'md5', 'brief'.
 124     """
 125
 126     def lines():
 127         for x in sub_symbols(parse_lines(get_lines())):
 128
 129             interpretation = interpret(x)
 130
 131             if interpretation == 'unrecognized':
 132                 print_skipped_line(x)
 133             else:
 134                 yield dict_merge([x,
 135                                   interpretation])
 136
 137     def cluster(xs):
 138         """
 139         Groups lines according to their order within the file, to support
 140         packages that are listed in `download.lst` more than once.
 141         """
 142         keys = ['tarball', 'md5', 'sha256', 'brief']
 143         a = {k: [x for x in xs if k in x['attrs']] for k in keys}
 144         return zip(*[a[k] for k in keys])
 145
 146     def packages():
 147         for (name, group) in groupby(lines(), lambda x: x['name']):
 148             for xs in cluster(group):
 149                 yield {'name': name,
 150                        'attrs': dict_merge(x['attrs'] for x in xs),
 151                        'index': min(x['index'] for x in xs)}
 152
 153     for x in sorted(packages(), key=lambda x: x['index']):
 154         yield dict_merge([{'name': x['name']},
 155                           x['attrs']])
 156
 157
 158 def dict_merge(xs):
 159     """
 160     >>> dict_merge([{1: 2}, {3: 4}, {3: 5}])
 161     {1: 2, 3: 4}
 162     """
 163     return dict(collections.ChainMap(*xs))
 164
 165
 166 def groupby(xs, f):
 167     """
 168     >>> groupby([1, 2, 3, 4], lambda x: x % 2)
 169     [(0, [2, 4]), (1, [1, 3])]
 170     """
 171     for (k, iter) in itertools.groupby(sorted(xs, key=f), f):
 172         group = list(iter)
 173         yield (f(group[0]), group)
 174
 175
 176 def get_lines():
 177
 178     download_list = os.getenv('downloadList')
 179
 180     with open(download_list) as f:
 181         return f.read().splitlines()
 182
 183
 184 def print_skipped_line(x):
 185
 186     print('Skipped line {}: {}'.format(x['index'],
 187                                        x['original']),
 188           file=sys.stderr)
 189
 190
 191 def parse_lines(lines):
 192     """
 193     Input: List of strings (the lines from `download.lst`
 194     Output: Iterator of dicts with keys 'key', 'value', and 'index'
 195     """
 196     for (index, line) in enumerate(lines):
 197
 198         x = { 'index': index, 'original': line }
 199
 200         result = parse_line(line)
 201
 202         if result == 'nothing':
 203             pass
 204         elif result == 'unrecognized':
 205             print_skipped_line(x)
 206         else:
 207             yield dict_merge([x,
 208                              result])
 209
 210
 211 def parse_line(line):
 212     """
 213     Input: A string
 214     Output: One of 1. A dict with keys 'key', 'value'
 215                    2. 'nothing' (if the line contains no information)
 216                    2. 'unrecognized' (if parsing failed)
 217     """
 218
 219     if re.match('\s*(#.*)?$', line):
 220         return 'nothing'
 221
 222     match = re.match('([^:\s]+)\s*:=\s*(.*)$', line)
 223
 224     if match:
 225         return {
 226             'key': match.group(1),
 227             'value': match.group(2).strip()
 228         }
 229     else:
 230         return 'unrecognized'
 231
 232
 233 def sub_symbols(xs):
 234     """
 235     Do substitution of variables across all lines.
 236
 237     >>> sub_symbols([{'key': 'a', 'value': 'x'},
 238     ...              {'key': 'c': 'value': '$(a)yz'}])
 239     [{'key': 'a', 'value': 'x'}, {'key': 'c': 'value': 'xyz'}]
 240     """
 241
 242     xs = list(xs)
 243
 244     symbols = {x['key']: x for x in xs}
 245
 246     def get_value(k):
 247         x = symbols.get(k)
 248         return x['value'] if x is not None else ''
 249
 250     for x in xs:
 251         yield dict_merge([{'value': sub_str(x['value'], get_value)},
 252                           x])
 253
 254
 255 def sub_str(string, func):
 256     """
 257     Do substitution of variables in a single line.
 258
 259     >>> sub_str("x = $(x)", lambda k: {'x': 'a'}[k])
 260     "x = a"
 261     """
 262
 263     def func2(m):
 264         x = m.group(1)
 265         result = func(x)
 266         return result if result is not None else x
 267
 268     return re.sub(r'\$\(([^\$\(\)]+)\)', func2, string)
 269
 270
 271 def interpret(x):
 272     """
 273     Input: Dict with keys 'key' and 'value'
 274     Output: One of 1. Dict with keys 'name' and 'attrs'
 275                    2. 'unrecognized' (if interpretation failed)
 276     """
 277     for f in [interpret_md5, interpret_sha256, interpret_tarball_with_md5, interpret_tarball, interpret_jar]:
 278         result = f(x)
 279         if result is not None:
 280             return result
 281
 282     return 'unrecognized'
 283
 284
 285 def interpret_md5(x):
 286     """
 287     >>> interpret_md5("ODFGEN_MD5SUM", "32572ea48d9021bbd6fa317ddb697abc")
 288     {'name': 'ODFGEN', 'attrs': {'md5': '32572ea48d9021bbd6fa317ddb697abc'}}
 289     """
 290
 291     match = re.match('^(.*)_MD5SUM$', x['key'])
 292
 293     if match:
 294         return {'name': match.group(1),
 295                 'attrs': {'md5': x['value'], 'sha256': ''}}
 296
 297 def interpret_sha256(x):
 298     match = re.match('^(.*)_SHA256SUM$', x['key'])
 299
 300     if match:
 301         return {'name': match.group(1),
 302                 'attrs': {'sha256': x['value'], 'md5': ''}}
 303
 304 def interpret_tarball(x):
 305     """
 306     >>> interpret_tarball("FREEHAND_TARBALL", "libfreehand-0.1.1.tar.bz2")
 307     {'name': 'FREEHAND',
 308      'attrs': {'tarball': 'libfreehand-0.1.1.tar.bz2', 'brief': True}}
 309     """
 310
 311     match = re.match('^(.*)_TARBALL$', x['key'])
 312
 313     if match:
 314         return {'name': match.group(1),
 315                 'attrs': {'tarball': x['value'], 'brief': True}}
 316
 317 def interpret_jar(x):
 318     match = re.match('^(.*)_JAR$', x['key'])
 319
 320     if match:
 321         return {'name': match.group(1),
 322                 'attrs': {'tarball': x['value'], 'brief': True}}
 323
 324
 325 def interpret_tarball_with_md5(x):
 326     """
 327     >>> interpret_tarball_with_md5("CLUCENE_TARBALL",\
 328         "48d647fbd8ef8889e5a7f422c1bfda94-clucene-core-2.3.3.4.tar.gz")
 329     {'name': 'CLUCENE',
 330      'attrs': {'tarball': 'clucene-core-2.3.3.4.tar.gz',
 331                'md5': '48d647fbd8ef8889e5a7f422c1bfda94', 'brief': False}}
 332     """
 333
 334     match = {'key': re.match('^(.*)_(TARBALL|JAR)$', x['key']),
 335              'value': re.match('(?P<md5>[0-9a-fA-F]{32})-(?P<tarball>.+)$',
 336                                x['value'])}
 337
 338     if match['key'] and match['value']:
 339         return {'name': match['key'].group(1),
 340                 'attrs': {'tarball': match['value'].group('tarball'),
 341                           'md5': match['value'].group('md5'),
 342                           'sha256': '',
 343                           'brief': False}}
 344
 345
 346 main()