biome: 1.9.2 -> 1.9.3
[NixPkgs.git] / pkgs / applications / office / libreoffice / generate-libreoffice-srcs.py
blob7e2c13b5b17c9eb18f9aea76210666de0efaa0d1
1 #!/usr/bin/env python3
3 """
4 Converts the LibreOffice `download.lst` file into a Nix expression.
6 Requires an environment variable named `downloadList` identifying the path
7 of the input file, and writes the result to stdout.
9 todo - Ideally we would move as much as possible into derivation dependencies.
10 """
11 import collections, itertools, json, re, subprocess, sys, os
12 import urllib.request, urllib.error
14 def main():
16 packages = list(get_packages())
18 for x in packages:
19 print(x, file=sys.stderr)
21 print('[')
23 for x in packages:
25 md5 = x['md5']
26 upstream_sha256 = x['sha256']
27 if upstream_sha256:
28 hash = upstream_sha256
29 hashtype = 'sha256'
30 else:
31 hash = md5
32 hashtype = 'md5'
33 tarball = x['tarball']
35 url = construct_url(x)
36 print('url: {}'.format(url), file=sys.stderr)
38 path = download(url, tarball, hash, hashtype)
39 print('path: {}'.format(path), file=sys.stderr)
41 sha256 = get_sha256(path)
42 print('sha256: {}'.format(sha256), file=sys.stderr)
44 print(' {')
45 print(' name = "{}";'.format(tarball))
46 print(' url = "{}";'.format(url))
47 print(' sha256 = "{}";'.format(sha256))
48 print(' md5 = "{}";'.format(md5))
49 print(' md5name = "{}-{}";'.format(md5 or upstream_sha256,tarball))
50 print(' }')
52 print(']')
55 def construct_url(x):
56 if x['brief']:
57 url = 'https://dev-www.libreoffice.org/src/{}{}'.format(
58 x.get('subdir', ''), x['tarball'])
59 else:
60 url = 'https://dev-www.libreoffice.org/src/{}{}-{}'.format(
61 x.get('subdir', ''), x['md5'], x['tarball'])
63 if x['name'].startswith('FONT_NOTO_') and not probe_url(url):
64 return 'https://noto-website-2.storage.googleapis.com/pkgs/{}'.format(x['tarball'])
66 if x['name'] == 'FONT_OPENDYSLEXIC':
67 return 'https://github.com/antijingoist/opendyslexic/releases/download/v0.91.12/{}'.format(x['tarball'])
69 return url
72 def probe_url(url: str) -> bool:
73 request = urllib.request.Request(url, method='HEAD')
74 try:
75 with urllib.request.urlopen(request) as response:
76 return response.status == 200
77 except urllib.error.HTTPError as e:
78 return False
81 def download(url, name, hash, hashtype):
82 cmd = ['nix-prefetch-url', url, hash, '--print-path',
83 '--type', hashtype, '--name', name]
84 proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
85 universal_newlines=True)
86 return proc.stdout.split('\n')[1].strip()
89 def get_sha256(path):
90 cmd = ['sha256sum', path]
91 proc = subprocess.run(cmd, stdout=subprocess.PIPE, check=True,
92 universal_newlines=True)
93 return proc.stdout.split(' ')[0].strip()
96 def get_packages():
97 """
98 All of the package data: What's parsed from download.lst,
99 plus our additions.
101 return apply_additions(get_packages_from_download_list(),
102 get_additions())
105 def get_additions():
107 A mapping from package name (the all-caps identifiers used in
108 `download.lst`) to a dict of additional attributes to set on the package.
110 with open('./libreoffice-srcs-additions.json') as f:
111 return json.load(f)
114 def apply_additions(xs, additions):
115 for x in xs:
116 yield dict_merge([x,
117 additions.get(x['name'], {})])
120 def get_packages_from_download_list():
122 The result of parsing `download.lst`: A list of dicts containing keys
123 'name', 'tarball', 'md5', 'brief'.
126 def lines():
127 for x in sub_symbols(parse_lines(get_lines())):
129 interpretation = interpret(x)
131 if interpretation == 'unrecognized':
132 print_skipped_line(x)
133 else:
134 yield dict_merge([x,
135 interpretation])
137 def cluster(xs):
139 Groups lines according to their order within the file, to support
140 packages that are listed in `download.lst` more than once.
142 keys = ['tarball', 'md5', 'sha256', 'brief']
143 a = {k: [x for x in xs if k in x['attrs']] for k in keys}
144 return zip(*[a[k] for k in keys])
146 def packages():
147 for (name, group) in groupby(lines(), lambda x: x['name']):
148 for xs in cluster(group):
149 yield {'name': name,
150 'attrs': dict_merge(x['attrs'] for x in xs),
151 'index': min(x['index'] for x in xs)}
153 for x in sorted(packages(), key=lambda x: x['index']):
154 yield dict_merge([{'name': x['name']},
155 x['attrs']])
158 def dict_merge(xs):
160 >>> dict_merge([{1: 2}, {3: 4}, {3: 5}])
161 {1: 2, 3: 4}
163 return dict(collections.ChainMap(*xs))
166 def groupby(xs, f):
168 >>> groupby([1, 2, 3, 4], lambda x: x % 2)
169 [(0, [2, 4]), (1, [1, 3])]
171 for (k, iter) in itertools.groupby(sorted(xs, key=f), f):
172 group = list(iter)
173 yield (f(group[0]), group)
176 def get_lines():
178 download_list = os.getenv('downloadList')
180 with open(download_list) as f:
181 return f.read().splitlines()
184 def print_skipped_line(x):
186 print('Skipped line {}: {}'.format(x['index'],
187 x['original']),
188 file=sys.stderr)
191 def parse_lines(lines):
193 Input: List of strings (the lines from `download.lst`
194 Output: Iterator of dicts with keys 'key', 'value', and 'index'
196 for (index, line) in enumerate(lines):
198 x = { 'index': index, 'original': line }
200 result = parse_line(line)
202 if result == 'nothing':
203 pass
204 elif result == 'unrecognized':
205 print_skipped_line(x)
206 else:
207 yield dict_merge([x,
208 result])
211 def parse_line(line):
213 Input: A string
214 Output: One of 1. A dict with keys 'key', 'value'
215 2. 'nothing' (if the line contains no information)
216 2. 'unrecognized' (if parsing failed)
219 if re.match('\s*(#.*)?$', line):
220 return 'nothing'
222 match = re.match('([^:\s]+)\s*:=\s*(.*)$', line)
224 if match:
225 return {
226 'key': match.group(1),
227 'value': match.group(2).strip()
229 else:
230 return 'unrecognized'
233 def sub_symbols(xs):
235 Do substitution of variables across all lines.
237 >>> sub_symbols([{'key': 'a', 'value': 'x'},
238 ... {'key': 'c': 'value': '$(a)yz'}])
239 [{'key': 'a', 'value': 'x'}, {'key': 'c': 'value': 'xyz'}]
242 xs = list(xs)
244 symbols = {x['key']: x for x in xs}
246 def get_value(k):
247 x = symbols.get(k)
248 return x['value'] if x is not None else ''
250 for x in xs:
251 yield dict_merge([{'value': sub_str(x['value'], get_value)},
255 def sub_str(string, func):
257 Do substitution of variables in a single line.
259 >>> sub_str("x = $(x)", lambda k: {'x': 'a'}[k])
260 "x = a"
263 def func2(m):
264 x = m.group(1)
265 result = func(x)
266 return result if result is not None else x
268 return re.sub(r'\$\(([^\$\(\)]+)\)', func2, string)
271 def interpret(x):
273 Input: Dict with keys 'key' and 'value'
274 Output: One of 1. Dict with keys 'name' and 'attrs'
275 2. 'unrecognized' (if interpretation failed)
277 for f in [interpret_md5, interpret_sha256, interpret_tarball_with_md5, interpret_tarball, interpret_jar]:
278 result = f(x)
279 if result is not None:
280 return result
282 return 'unrecognized'
285 def interpret_md5(x):
287 >>> interpret_md5("ODFGEN_MD5SUM", "32572ea48d9021bbd6fa317ddb697abc")
288 {'name': 'ODFGEN', 'attrs': {'md5': '32572ea48d9021bbd6fa317ddb697abc'}}
291 match = re.match('^(.*)_MD5SUM$', x['key'])
293 if match:
294 return {'name': match.group(1),
295 'attrs': {'md5': x['value'], 'sha256': ''}}
297 def interpret_sha256(x):
298 match = re.match('^(.*)_SHA256SUM$', x['key'])
300 if match:
301 return {'name': match.group(1),
302 'attrs': {'sha256': x['value'], 'md5': ''}}
304 def interpret_tarball(x):
306 >>> interpret_tarball("FREEHAND_TARBALL", "libfreehand-0.1.1.tar.bz2")
307 {'name': 'FREEHAND',
308 'attrs': {'tarball': 'libfreehand-0.1.1.tar.bz2', 'brief': True}}
311 match = re.match('^(.*)_TARBALL$', x['key'])
313 if match:
314 return {'name': match.group(1),
315 'attrs': {'tarball': x['value'], 'brief': True}}
317 def interpret_jar(x):
318 match = re.match('^(.*)_JAR$', x['key'])
320 if match:
321 return {'name': match.group(1),
322 'attrs': {'tarball': x['value'], 'brief': True}}
325 def interpret_tarball_with_md5(x):
327 >>> interpret_tarball_with_md5("CLUCENE_TARBALL",\
328 "48d647fbd8ef8889e5a7f422c1bfda94-clucene-core-2.3.3.4.tar.gz")
329 {'name': 'CLUCENE',
330 'attrs': {'tarball': 'clucene-core-2.3.3.4.tar.gz',
331 'md5': '48d647fbd8ef8889e5a7f422c1bfda94', 'brief': False}}
334 match = {'key': re.match('^(.*)_(TARBALL|JAR)$', x['key']),
335 'value': re.match('(?P<md5>[0-9a-fA-F]{32})-(?P<tarball>.+)$',
336 x['value'])}
338 if match['key'] and match['value']:
339 return {'name': match['key'].group(1),
340 'attrs': {'tarball': match['value'].group('tarball'),
341 'md5': match['value'].group('md5'),
342 'sha256': '',
343 'brief': False}}
346 main()