Merge pull request #329823 from ExpidusOS/fix/pkgsllvm/elfutils
[NixPkgs.git] / pkgs / desktops / gnome / extensions / update-extensions.py
blobef6687a06f736fbbb5db5df5148896680c351e37
1 #!/usr/bin/env nix-shell
2 #!nix-shell -I nixpkgs=../../../.. -i python3 -p python3
4 import base64
5 import json
6 import logging
7 import subprocess
8 import urllib.error
9 import urllib.request
10 from operator import itemgetter
11 from pathlib import Path
12 from typing import List, Dict, Optional, Any, Tuple, Set
14 # We don't want all those deprecated legacy extensions
15 # Group extensions by GNOME "major" version for compatibility reasons
16 supported_versions = {
17 "38": "3.38",
18 "40": "40",
19 "41": "41",
20 "42": "42",
21 "43": "43",
22 "44": "44",
23 "45": "45",
24 "46": "46",
27 # Some type alias to increase readability of complex compound types
28 PackageName = str
29 ShellVersion = str
30 Uuid = str
31 ExtensionVersion = int
33 # Keep track of all names that have been used till now to detect collisions.
34 # This works because we deterministically process all extensions in historical order
35 # The outer dict level is the shell version, as we are tracking duplicates only per same Shell version.
36 # key: shell version, value: Dict with key: pname, value: list of UUIDs with that pname
37 package_name_registry: Dict[ShellVersion, Dict[PackageName, List[Uuid]]] = {}
38 for shell_version in supported_versions.keys():
39 package_name_registry[shell_version] = {}
41 updater_dir_path = Path(__file__).resolve().parent
44 def fetch_extension_data(uuid: str, version: str) -> Tuple[str, str]:
45 """
46 Download the extension and hash it. We use `nix-prefetch-url` for this for efficiency reasons.
47 Returns a tuple with the hash (Nix-compatible) of the zip file's content and the base64-encoded content of its metadata.json.
48 """
50 # The download URLs follow this schema
51 uuid = uuid.replace("@", "")
52 url: str = f"https://extensions.gnome.org/extension-data/{uuid}.v{version}.shell-extension.zip"
54 # Download extension and add the zip content to nix-store
55 process = subprocess.run(
56 ["nix-prefetch-url", "--unpack", "--print-path", url], capture_output=True, text=True
59 lines = process.stdout.splitlines()
61 # Get hash from first line of nix-prefetch-url output
62 hash = lines[0].strip()
64 # Get path from second line of nix-prefetch-url output
65 path = Path(lines[1].strip())
67 # Get metadata.json content from nix-store
68 with open(path / "metadata.json", "r") as out:
69 metadata = base64.b64encode(out.read().encode("ascii")).decode()
71 return hash, metadata
74 def generate_extension_versions(
75 extension_version_map: Dict[ShellVersion, ExtensionVersion], uuid: str
76 ) -> Dict[ShellVersion, Dict[str, str]]:
77 """
78 Takes in a mapping from shell versions to extension versions and transforms it the way we need it:
79 - Only take one extension version per GNOME Shell major version (as per `supported_versions`)
80 - Filter out versions that only support old GNOME versions
81 - Download the extension and hash it
82 """
84 # Determine extension version per shell version
85 extension_versions: Dict[ShellVersion, ExtensionVersion] = {}
86 for shell_version, version_prefix in supported_versions.items():
87 # Newest compatible extension version
88 extension_version: Optional[int] = max(
90 int(ext_ver)
91 for shell_ver, ext_ver in extension_version_map.items()
92 if (shell_ver.startswith(version_prefix))
94 default=None,
96 # Extension is not compatible with this GNOME version
97 if not extension_version:
98 continue
100 extension_versions[shell_version] = extension_version
102 # Download information once for all extension versions chosen above
103 extension_info_cache: Dict[ExtensionVersion, Tuple[str, str]] = {}
104 for extension_version in sorted(set(extension_versions.values())):
105 logging.debug(
106 f"[{uuid}] Downloading v{extension_version}"
108 extension_info_cache[extension_version] = \
109 fetch_extension_data(uuid, str(extension_version))
111 # Fill map
112 extension_versions_full: Dict[ShellVersion, Dict[str, str]] = {}
113 for shell_version, extension_version in extension_versions.items():
114 sha256, metadata = extension_info_cache[extension_version]
116 extension_versions_full[shell_version] = {
117 "version": str(extension_version),
118 "sha256": sha256,
119 # The downloads are impure, their metadata.json may change at any time.
120 # Thus, we back it up / pin it to remain deterministic
121 # Upstream issue: https://gitlab.gnome.org/Infrastructure/extensions-web/-/issues/137
122 "metadata": metadata,
124 return extension_versions_full
127 def pname_from_url(url: str) -> Tuple[str, str]:
129 Parse something like "/extension/1475/battery-time/" and output ("battery-time", "1475")
132 url = url.split("/") # type: ignore
133 return url[3], url[2]
136 def process_extension(extension: Dict[str, Any]) -> Optional[Dict[str, Any]]:
138 Process an extension. It takes in raw scraped data and downloads all the necessary information that buildGnomeExtension.nix requires
140 Input: a json object of one extension queried from the site. It has the following schema (only important key listed):
142 "uuid": str,
143 "name": str,
144 "description": str,
145 "link": str,
146 "shell_version_map": {
147 str: { "version": int, … },
153 "uuid" is an extension UUID that looks like this (most of the time): "extension-name@username.domain.tld".
154 Don't make any assumptions on it, and treat it like an opaque string!
155 "link" follows the following schema: "/extension/$number/$string/"
156 The number is monotonically increasing and unique to every extension.
157 The string is usually derived from the extension name (but shortened, kebab-cased and URL friendly).
158 It may diverge from the actual name.
159 The keys of "shell_version_map" are GNOME Shell version numbers.
161 Output: a json object to be stored, or None if the extension should be skipped. Schema:
163 "uuid": str,
164 "name": str,
165 "pname": str,
166 "description": str,
167 "link": str,
168 "shell_version_map": {
169 str: { "version": int, "sha256": str, "metadata": <hex> },
174 Only "uuid" gets passed along unmodified. "name", "description" and "link" are taken from the input, but sanitized.
175 "pname" gets generated from other fields and "shell_version_map" has a completely different structure than the input
176 field with the same name.
178 uuid = extension["uuid"]
180 # Yeah, there are some extensions without any releases
181 if not extension["shell_version_map"]:
182 return None
183 logging.info(f"Processing '{uuid}'")
185 # Input is a mapping str -> { version: int, … }
186 # We want to map shell versions to extension versions
187 shell_version_map: Dict[ShellVersion, int] = {
188 k: v["version"] for k, v in extension["shell_version_map"].items()
190 # Transform shell_version_map to be more useful for us. Also throw away unwanted versions
191 shell_version_map: Dict[ShellVersion, Dict[str, str]] = generate_extension_versions(shell_version_map, uuid) # type: ignore
193 # No compatible versions found
194 if not shell_version_map:
195 return None
197 # Fetch a human-readable name for the package.
198 (pname, _pname_id) = pname_from_url(extension["link"])
200 for shell_version in shell_version_map.keys():
201 if pname in package_name_registry[shell_version]:
202 logging.warning(f"Package name '{pname}' for GNOME '{shell_version}' is colliding.")
203 package_name_registry[shell_version][pname].append(uuid)
204 else:
205 package_name_registry[shell_version][pname] = [uuid]
207 return {
208 "uuid": uuid,
209 "name": extension["name"],
210 "pname": pname,
211 "description": extension["description"],
212 "link": "https://extensions.gnome.org" + extension["link"],
213 "shell_version_map": shell_version_map,
217 def scrape_extensions_index() -> List[Dict[str, Any]]:
219 Scrape the list of extensions by sending search queries to the API. We simply go over it
220 page by page until we hit a non-full page or a 404 error.
222 The returned list is sorted by the age of the extension, in order to be deterministic.
224 page = 0
225 extensions = []
226 while True:
227 page += 1
228 logging.info("Scraping page " + str(page))
229 try:
230 with urllib.request.urlopen(
231 f"https://extensions.gnome.org/extension-query/?n_per_page=25&page={page}"
232 ) as response:
233 data = json.loads(response.read().decode())["extensions"]
234 response_length = len(data)
236 for extension in data:
237 extensions.append(extension)
239 # If our page isn't "full", it must have been the last one
240 if response_length < 25:
241 logging.debug(
242 f"\tThis page only has {response_length} entries, so it must be the last one."
244 break
245 except urllib.error.HTTPError as e:
246 if e.code == 404:
247 # We reached past the last page and are done now
248 break
249 else:
250 raise
252 # `pk` is the primary key in the extensions.gnome.org database. Sorting on it will give us a stable,
253 # deterministic ordering.
254 extensions.sort(key=itemgetter("pk"))
255 return extensions
258 if __name__ == "__main__":
259 logging.basicConfig(level=logging.DEBUG)
261 raw_extensions = scrape_extensions_index()
263 logging.info(f"Downloaded {len(raw_extensions)} extensions. Processing …")
264 processed_extensions: List[Dict[str, Any]] = []
265 for num, raw_extension in enumerate(raw_extensions):
266 processed_extension = process_extension(raw_extension)
267 if processed_extension:
268 processed_extensions.append(processed_extension)
269 logging.debug(f"Processed {num + 1} / {len(raw_extensions)}")
271 # We micro-manage a lot of the serialization process to keep the diffs optimal.
272 # We generally want most of the attributes of an extension on one line,
273 # but then each of its supported versions with metadata on a new line.
274 with open(updater_dir_path / "extensions.json", "w") as out:
275 for index, extension in enumerate(processed_extensions):
276 # Manually pretty-print the outermost array level
277 if index == 0:
278 out.write("[ ")
279 else:
280 out.write(", ")
281 # Dump each extension into a single-line string forst
282 extension = json.dumps(extension, ensure_ascii=False)
283 # Inject line breaks for each supported version
284 for version in supported_versions:
285 # This one only matches the first entry
286 extension = extension.replace(f"{{\"{version}\": {{", f"{{\n \"{version}\": {{")
287 # All other entries
288 extension = extension.replace(f", \"{version}\": {{", f",\n \"{version}\": {{")
289 # One last line break around the closing braces
290 extension = extension.replace("}}}", "}\n }}")
292 out.write(extension)
293 out.write("\n")
294 out.write("]\n")
296 logging.info(
297 f"Done. Writing results to extensions.json ({len(processed_extensions)} extensions in total)"
300 with open(updater_dir_path / "extensions.json", "r") as out:
301 # Check that the generated file actually is valid JSON, just to be sure
302 json.load(out)
304 with open(updater_dir_path / "collisions.json", "w") as out:
305 # Find the name collisions only for the last 3 shell versions
306 last_3_versions = sorted(supported_versions.keys(), key=lambda v: float(v), reverse=True)[:3]
307 package_name_registry_for_versions = [v for k, v in package_name_registry.items() if k in last_3_versions]
308 # Merge all package names into a single dictionary
309 package_name_registry_filtered: Dict[PackageName, Set[Uuid]] = {}
310 for pkgs in package_name_registry_for_versions:
311 for pname, uuids in pkgs.items():
312 if pname not in package_name_registry_filtered:
313 package_name_registry_filtered[pname] = set()
314 package_name_registry_filtered[pname].update(uuids)
315 # Filter out those that are not duplicates
316 package_name_registry_filtered = {k: v for k, v in package_name_registry_filtered.items() if len(v) > 1}
317 # Convert set to list
318 collisions: Dict[PackageName, List[Uuid]] = {k: list(v) for k, v in package_name_registry_filtered.items()}
319 json.dump(collisions, out, indent=2, ensure_ascii=False)
320 out.write("\n")
322 logging.info(
323 "Done. Writing name collisions to collisions.json (please check manually)"