1 #!/usr/bin/env nix-shell
2 #!nix-shell -I nixpkgs=../../../.. -i python3 -p python3
10 from operator
import itemgetter
11 from pathlib
import Path
12 from typing
import List
, Dict
, Optional
, Any
, Tuple
, Set
14 # We don't want all those deprecated legacy extensions
15 # Group extensions by GNOME "major" version for compatibility reasons
16 supported_versions
= {
27 # Some type alias to increase readability of complex compound types
31 ExtensionVersion
= int
33 # Keep track of all names that have been used till now to detect collisions.
34 # This works because we deterministically process all extensions in historical order
35 # The outer dict level is the shell version, as we are tracking duplicates only per same Shell version.
36 # key: shell version, value: Dict with key: pname, value: list of UUIDs with that pname
37 package_name_registry
: Dict
[ShellVersion
, Dict
[PackageName
, List
[Uuid
]]] = {}
38 for shell_version
in supported_versions
.keys():
39 package_name_registry
[shell_version
] = {}
41 updater_dir_path
= Path(__file__
).resolve().parent
44 def fetch_extension_data(uuid
: str, version
: str) -> Tuple
[str, str]:
46 Download the extension and hash it. We use `nix-prefetch-url` for this for efficiency reasons.
47 Returns a tuple with the hash (Nix-compatible) of the zip file's content and the base64-encoded content of its metadata.json.
50 # The download URLs follow this schema
51 uuid
= uuid
.replace("@", "")
52 url
: str = f
"https://extensions.gnome.org/extension-data/{uuid}.v{version}.shell-extension.zip"
54 # Download extension and add the zip content to nix-store
55 process
= subprocess
.run(
56 ["nix-prefetch-url", "--unpack", "--print-path", url
], capture_output
=True, text
=True
59 lines
= process
.stdout
.splitlines()
61 # Get hash from first line of nix-prefetch-url output
62 hash = lines
[0].strip()
64 # Get path from second line of nix-prefetch-url output
65 path
= Path(lines
[1].strip())
67 # Get metadata.json content from nix-store
68 with
open(path
/ "metadata.json", "r") as out
:
69 metadata
= base64
.b64encode(out
.read().encode("ascii")).decode()
74 def generate_extension_versions(
75 extension_version_map
: Dict
[ShellVersion
, ExtensionVersion
], uuid
: str
76 ) -> Dict
[ShellVersion
, Dict
[str, str]]:
78 Takes in a mapping from shell versions to extension versions and transforms it the way we need it:
79 - Only take one extension version per GNOME Shell major version (as per `supported_versions`)
80 - Filter out versions that only support old GNOME versions
81 - Download the extension and hash it
84 # Determine extension version per shell version
85 extension_versions
: Dict
[ShellVersion
, ExtensionVersion
] = {}
86 for shell_version
, version_prefix
in supported_versions
.items():
87 # Newest compatible extension version
88 extension_version
: Optional
[int] = max(
91 for shell_ver
, ext_ver
in extension_version_map
.items()
92 if (shell_ver
.startswith(version_prefix
))
96 # Extension is not compatible with this GNOME version
97 if not extension_version
:
100 extension_versions
[shell_version
] = extension_version
102 # Download information once for all extension versions chosen above
103 extension_info_cache
: Dict
[ExtensionVersion
, Tuple
[str, str]] = {}
104 for extension_version
in sorted(set(extension_versions
.values())):
106 f
"[{uuid}] Downloading v{extension_version}"
108 extension_info_cache
[extension_version
] = \
109 fetch_extension_data(uuid
, str(extension_version
))
112 extension_versions_full
: Dict
[ShellVersion
, Dict
[str, str]] = {}
113 for shell_version
, extension_version
in extension_versions
.items():
114 sha256
, metadata
= extension_info_cache
[extension_version
]
116 extension_versions_full
[shell_version
] = {
117 "version": str(extension_version
),
119 # The downloads are impure, their metadata.json may change at any time.
120 # Thus, we back it up / pin it to remain deterministic
121 # Upstream issue: https://gitlab.gnome.org/Infrastructure/extensions-web/-/issues/137
122 "metadata": metadata
,
124 return extension_versions_full
127 def pname_from_url(url
: str) -> Tuple
[str, str]:
129 Parse something like "/extension/1475/battery-time/" and output ("battery-time", "1475")
132 url
= url
.split("/") # type: ignore
133 return url
[3], url
[2]
136 def process_extension(extension
: Dict
[str, Any
]) -> Optional
[Dict
[str, Any
]]:
138 Process an extension. It takes in raw scraped data and downloads all the necessary information that buildGnomeExtension.nix requires
140 Input: a json object of one extension queried from the site. It has the following schema (only important key listed):
146 "shell_version_map": {
147 str: { "version": int, … },
153 "uuid" is an extension UUID that looks like this (most of the time): "extension-name@username.domain.tld".
154 Don't make any assumptions on it, and treat it like an opaque string!
155 "link" follows the following schema: "/extension/$number/$string/"
156 The number is monotonically increasing and unique to every extension.
157 The string is usually derived from the extension name (but shortened, kebab-cased and URL friendly).
158 It may diverge from the actual name.
159 The keys of "shell_version_map" are GNOME Shell version numbers.
161 Output: a json object to be stored, or None if the extension should be skipped. Schema:
168 "shell_version_map": {
169 str: { "version": int, "sha256": str, "metadata": <hex> },
174 Only "uuid" gets passed along unmodified. "name", "description" and "link" are taken from the input, but sanitized.
175 "pname" gets generated from other fields and "shell_version_map" has a completely different structure than the input
176 field with the same name.
178 uuid
= extension
["uuid"]
180 # Yeah, there are some extensions without any releases
181 if not extension
["shell_version_map"]:
183 logging
.info(f
"Processing '{uuid}'")
185 # Input is a mapping str -> { version: int, … }
186 # We want to map shell versions to extension versions
187 shell_version_map
: Dict
[ShellVersion
, int] = {
188 k
: v
["version"] for k
, v
in extension
["shell_version_map"].items()
190 # Transform shell_version_map to be more useful for us. Also throw away unwanted versions
191 shell_version_map
: Dict
[ShellVersion
, Dict
[str, str]] = generate_extension_versions(shell_version_map
, uuid
) # type: ignore
193 # No compatible versions found
194 if not shell_version_map
:
197 # Fetch a human-readable name for the package.
198 (pname
, _pname_id
) = pname_from_url(extension
["link"])
200 for shell_version
in shell_version_map
.keys():
201 if pname
in package_name_registry
[shell_version
]:
202 logging
.warning(f
"Package name '{pname}' for GNOME '{shell_version}' is colliding.")
203 package_name_registry
[shell_version
][pname
].append(uuid
)
205 package_name_registry
[shell_version
][pname
] = [uuid
]
209 "name": extension
["name"],
211 "description": extension
["description"],
212 "link": "https://extensions.gnome.org" + extension
["link"],
213 "shell_version_map": shell_version_map
,
217 def scrape_extensions_index() -> List
[Dict
[str, Any
]]:
219 Scrape the list of extensions by sending search queries to the API. We simply go over it
220 page by page until we hit a non-full page or a 404 error.
222 The returned list is sorted by the age of the extension, in order to be deterministic.
228 logging
.info("Scraping page " + str(page
))
230 with urllib
.request
.urlopen(
231 f
"https://extensions.gnome.org/extension-query/?n_per_page=25&page={page}"
233 data
= json
.loads(response
.read().decode())["extensions"]
234 response_length
= len(data
)
236 for extension
in data
:
237 extensions
.append(extension
)
239 # If our page isn't "full", it must have been the last one
240 if response_length
< 25:
242 f
"\tThis page only has {response_length} entries, so it must be the last one."
245 except urllib
.error
.HTTPError
as e
:
247 # We reached past the last page and are done now
252 # `pk` is the primary key in the extensions.gnome.org database. Sorting on it will give us a stable,
253 # deterministic ordering.
254 extensions
.sort(key
=itemgetter("pk"))
258 if __name__
== "__main__":
259 logging
.basicConfig(level
=logging
.DEBUG
)
261 raw_extensions
= scrape_extensions_index()
263 logging
.info(f
"Downloaded {len(raw_extensions)} extensions. Processing …")
264 processed_extensions
: List
[Dict
[str, Any
]] = []
265 for num
, raw_extension
in enumerate(raw_extensions
):
266 processed_extension
= process_extension(raw_extension
)
267 if processed_extension
:
268 processed_extensions
.append(processed_extension
)
269 logging
.debug(f
"Processed {num + 1} / {len(raw_extensions)}")
271 # We micro-manage a lot of the serialization process to keep the diffs optimal.
272 # We generally want most of the attributes of an extension on one line,
273 # but then each of its supported versions with metadata on a new line.
274 with
open(updater_dir_path
/ "extensions.json", "w") as out
:
275 for index
, extension
in enumerate(processed_extensions
):
276 # Manually pretty-print the outermost array level
281 # Dump each extension into a single-line string forst
282 extension
= json
.dumps(extension
, ensure_ascii
=False)
283 # Inject line breaks for each supported version
284 for version
in supported_versions
:
285 # This one only matches the first entry
286 extension
= extension
.replace(f
"{{\"{version}\": {{", f
"{{\n \"{version}\": {{")
288 extension
= extension
.replace(f
", \"{version}\": {{", f
",\n \"{version}\": {{")
289 # One last line break around the closing braces
290 extension
= extension
.replace("}}}", "}\n }}")
297 f
"Done. Writing results to extensions.json ({len(processed_extensions)} extensions in total)"
300 with
open(updater_dir_path
/ "extensions.json", "r") as out
:
301 # Check that the generated file actually is valid JSON, just to be sure
304 with
open(updater_dir_path
/ "collisions.json", "w") as out
:
305 # Find the name collisions only for the last 3 shell versions
306 last_3_versions
= sorted(supported_versions
.keys(), key
=lambda v
: float(v
), reverse
=True)[:3]
307 package_name_registry_for_versions
= [v
for k
, v
in package_name_registry
.items() if k
in last_3_versions
]
308 # Merge all package names into a single dictionary
309 package_name_registry_filtered
: Dict
[PackageName
, Set
[Uuid
]] = {}
310 for pkgs
in package_name_registry_for_versions
:
311 for pname
, uuids
in pkgs
.items():
312 if pname
not in package_name_registry_filtered
:
313 package_name_registry_filtered
[pname
] = set()
314 package_name_registry_filtered
[pname
].update(uuids
)
315 # Filter out those that are not duplicates
316 package_name_registry_filtered
= {k
: v
for k
, v
in package_name_registry_filtered
.items() if len(v
) > 1}
317 # Convert set to list
318 collisions
: Dict
[PackageName
, List
[Uuid
]] = {k
: list(v
) for k
, v
in package_name_registry_filtered
.items()}
319 json
.dump(collisions
, out
, indent
=2, ensure_ascii
=False)
323 "Done. Writing name collisions to collisions.json (please check manually)"