4 Converts the LibreOffice `download.lst` file into a Nix expression.
6 Requires an environment variable named `downloadList` identifying the path
7 of the input file, and writes the result to stdout.
9 todo - Ideally we would move as much as possible into derivation dependencies.
11 import collections
, itertools
, json
, re
, subprocess
, sys
, os
12 import urllib
.request
, urllib
.error
16 packages
= list(get_packages())
19 print(x
, file=sys
.stderr
)
26 upstream_sha256
= x
['sha256']
28 hash = upstream_sha256
33 tarball
= x
['tarball']
35 url
= construct_url(x
)
36 print('url: {}'.format(url
), file=sys
.stderr
)
38 path
= download(url
, tarball
, hash, hashtype
)
39 print('path: {}'.format(path
), file=sys
.stderr
)
41 sha256
= get_sha256(path
)
42 print('sha256: {}'.format(sha256
), file=sys
.stderr
)
45 print(' name = "{}";'.format(tarball
))
46 print(' url = "{}";'.format(url
))
47 print(' sha256 = "{}";'.format(sha256
))
48 print(' md5 = "{}";'.format(md5
))
49 print(' md5name = "{}-{}";'.format(md5
or upstream_sha256
,tarball
))
57 url
= 'https://dev-www.libreoffice.org/src/{}{}'.format(
58 x
.get('subdir', ''), x
['tarball'])
60 url
= 'https://dev-www.libreoffice.org/src/{}{}-{}'.format(
61 x
.get('subdir', ''), x
['md5'], x
['tarball'])
63 if x
['name'].startswith('FONT_NOTO_') and not probe_url(url
):
64 return 'https://noto-website-2.storage.googleapis.com/pkgs/{}'.format(x
['tarball'])
66 if x
['name'] == 'FONT_OPENDYSLEXIC':
67 return 'https://github.com/antijingoist/opendyslexic/releases/download/v0.91.12/{}'.format(x
['tarball'])
72 def probe_url(url
: str) -> bool:
73 request
= urllib
.request
.Request(url
, method
='HEAD')
75 with urllib
.request
.urlopen(request
) as response
:
76 return response
.status
== 200
77 except urllib
.error
.HTTPError
as e
:
81 def download(url
, name
, hash, hashtype
):
82 cmd
= ['nix-prefetch-url', url
, hash, '--print-path',
83 '--type', hashtype
, '--name', name
]
84 proc
= subprocess
.run(cmd
, stdout
=subprocess
.PIPE
, check
=True,
85 universal_newlines
=True)
86 return proc
.stdout
.split('\n')[1].strip()
90 cmd
= ['sha256sum', path
]
91 proc
= subprocess
.run(cmd
, stdout
=subprocess
.PIPE
, check
=True,
92 universal_newlines
=True)
93 return proc
.stdout
.split(' ')[0].strip()
98 All of the package data: What's parsed from download.lst,
101 return apply_additions(get_packages_from_download_list(),
107 A mapping from package name (the all-caps identifiers used in
108 `download.lst`) to a dict of additional attributes to set on the package.
110 with
open('./libreoffice-srcs-additions.json') as f
:
114 def apply_additions(xs
, additions
):
117 additions
.get(x
['name'], {})])
120 def get_packages_from_download_list():
122 The result of parsing `download.lst`: A list of dicts containing keys
123 'name', 'tarball', 'md5', 'brief'.
127 for x
in sub_symbols(parse_lines(get_lines())):
129 interpretation
= interpret(x
)
131 if interpretation
== 'unrecognized':
132 print_skipped_line(x
)
139 Groups lines according to their order within the file, to support
140 packages that are listed in `download.lst` more than once.
142 keys
= ['tarball', 'md5', 'sha256', 'brief']
143 a
= {k
: [x
for x
in xs
if k
in x
['attrs']] for k
in keys
}
144 return zip(*[a
[k
] for k
in keys
])
147 for (name
, group
) in groupby(lines(), lambda x
: x
['name']):
148 for xs
in cluster(group
):
150 'attrs': dict_merge(x
['attrs'] for x
in xs
),
151 'index': min(x
['index'] for x
in xs
)}
153 for x
in sorted(packages(), key
=lambda x
: x
['index']):
154 yield dict_merge([{'name': x
['name']},
160 >>> dict_merge([{1: 2}, {3: 4}, {3: 5}])
163 return dict(collections
.ChainMap(*xs
))
168 >>> groupby([1, 2, 3, 4], lambda x: x % 2)
169 [(0, [2, 4]), (1, [1, 3])]
171 for (k
, iter) in itertools
.groupby(sorted(xs
, key
=f
), f
):
173 yield (f(group
[0]), group
)
178 download_list
= os
.getenv('downloadList')
180 with
open(download_list
) as f
:
181 return f
.read().splitlines()
184 def print_skipped_line(x
):
186 print('Skipped line {}: {}'.format(x
['index'],
191 def parse_lines(lines
):
193 Input: List of strings (the lines from `download.lst`
194 Output: Iterator of dicts with keys 'key', 'value', and 'index'
196 for (index
, line
) in enumerate(lines
):
198 x
= { 'index': index
, 'original': line
}
200 result
= parse_line(line
)
202 if result
== 'nothing':
204 elif result
== 'unrecognized':
205 print_skipped_line(x
)
211 def parse_line(line
):
214 Output: One of 1. A dict with keys 'key', 'value'
215 2. 'nothing' (if the line contains no information)
216 2. 'unrecognized' (if parsing failed)
219 if re
.match('\s*(#.*)?$', line
):
222 match
= re
.match('([^:\s]+)\s*:=\s*(.*)$', line
)
226 'key': match
.group(1),
227 'value': match
.group(2).strip()
230 return 'unrecognized'
235 Do substitution of variables across all lines.
237 >>> sub_symbols([{'key': 'a', 'value': 'x'},
238 ... {'key': 'c': 'value': '$(a)yz'}])
239 [{'key': 'a', 'value': 'x'}, {'key': 'c': 'value': 'xyz'}]
244 symbols
= {x
['key']: x
for x
in xs
}
248 return x
['value'] if x
is not None else ''
251 yield dict_merge([{'value': sub_str(x
['value'], get_value
)},
255 def sub_str(string
, func
):
257 Do substitution of variables in a single line.
259 >>> sub_str("x = $(x)", lambda k: {'x': 'a'}[k])
266 return result
if result
is not None else x
268 return re
.sub(r
'\$\(([^\$\(\)]+)\)', func2
, string
)
273 Input: Dict with keys 'key' and 'value'
274 Output: One of 1. Dict with keys 'name' and 'attrs'
275 2. 'unrecognized' (if interpretation failed)
277 for f
in [interpret_md5
, interpret_sha256
, interpret_tarball_with_md5
, interpret_tarball
, interpret_jar
]:
279 if result
is not None:
282 return 'unrecognized'
285 def interpret_md5(x
):
287 >>> interpret_md5("ODFGEN_MD5SUM", "32572ea48d9021bbd6fa317ddb697abc")
288 {'name': 'ODFGEN', 'attrs': {'md5': '32572ea48d9021bbd6fa317ddb697abc'}}
291 match
= re
.match('^(.*)_MD5SUM$', x
['key'])
294 return {'name': match
.group(1),
295 'attrs': {'md5': x
['value'], 'sha256': ''}}
297 def interpret_sha256(x
):
298 match
= re
.match('^(.*)_SHA256SUM$', x
['key'])
301 return {'name': match
.group(1),
302 'attrs': {'sha256': x
['value'], 'md5': ''}}
304 def interpret_tarball(x
):
306 >>> interpret_tarball("FREEHAND_TARBALL", "libfreehand-0.1.1.tar.bz2")
308 'attrs': {'tarball': 'libfreehand-0.1.1.tar.bz2', 'brief': True}}
311 match
= re
.match('^(.*)_TARBALL$', x
['key'])
314 return {'name': match
.group(1),
315 'attrs': {'tarball': x
['value'], 'brief': True}}
317 def interpret_jar(x
):
318 match
= re
.match('^(.*)_JAR$', x
['key'])
321 return {'name': match
.group(1),
322 'attrs': {'tarball': x
['value'], 'brief': True}}
325 def interpret_tarball_with_md5(x
):
327 >>> interpret_tarball_with_md5("CLUCENE_TARBALL",\
328 "48d647fbd8ef8889e5a7f422c1bfda94-clucene-core-2.3.3.4.tar.gz")
330 'attrs': {'tarball': 'clucene-core-2.3.3.4.tar.gz',
331 'md5': '48d647fbd8ef8889e5a7f422c1bfda94', 'brief': False}}
334 match
= {'key': re
.match('^(.*)_(TARBALL|JAR)$', x
['key']),
335 'value': re
.match('(?P<md5>[0-9a-fA-F]{32})-(?P<tarball>.+)$',
338 if match
['key'] and match
['value']:
339 return {'name': match
['key'].group(1),
340 'attrs': {'tarball': match
['value'].group('tarball'),
341 'md5': match
['value'].group('md5'),