anvil-editor: init at 0.4
[NixPkgs.git] / pkgs / build-support / rust / fetch-cargo-vendor-util.py
blob33cac9b83d01e2607d1683efbe8715a8343487d6
1 import functools
2 import hashlib
3 import json
4 import multiprocessing as mp
5 import re
6 import shutil
7 import subprocess
8 import sys
9 import tomllib
10 from pathlib import Path
11 from typing import Any, TypedDict, cast
13 import requests
15 eprint = functools.partial(print, file=sys.stderr)
18 def load_toml(path: Path) -> dict[str, Any]:
19 with open(path, "rb") as f:
20 return tomllib.load(f)
23 def download_file_with_checksum(url: str, destination_path: Path) -> str:
24 sha256_hash = hashlib.sha256()
25 with requests.get(url, stream=True) as response:
26 if not response.ok:
27 raise Exception(f"Failed to fetch file from {url}. Status code: {response.status_code}")
28 with open(destination_path, "wb") as file:
29 for chunk in response.iter_content(1024): # Download in chunks
30 if chunk: # Filter out keep-alive chunks
31 file.write(chunk)
32 sha256_hash.update(chunk)
34 # Compute the final checksum
35 checksum = sha256_hash.hexdigest()
36 return checksum
39 def get_download_url_for_tarball(pkg: dict[str, Any]) -> str:
40 # TODO: support other registries
41 # maybe fetch config.json from the registry root and get the dl key
42 # See: https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration
43 if pkg["source"] != "registry+https://github.com/rust-lang/crates.io-index":
44 raise Exception("Only the default crates.io registry is supported.")
46 return f"https://crates.io/api/v1/crates/{pkg["name"]}/{pkg["version"]}/download"
49 def download_tarball(pkg: dict[str, Any], out_dir: Path) -> None:
51 url = get_download_url_for_tarball(pkg)
52 filename = f"{pkg["name"]}-{pkg["version"]}.tar.gz"
54 # TODO: allow legacy checksum specification, see importCargoLock for example
55 # also, don't forget about the other usage of the checksum
56 expected_checksum = pkg["checksum"]
58 tarball_out_dir = out_dir / "tarballs" / filename
59 eprint(f"Fetching {url} -> tarballs/{filename}")
61 calculated_checksum = download_file_with_checksum(url, tarball_out_dir)
63 if calculated_checksum != expected_checksum:
64 raise Exception(f"Hash mismatch! File fetched from {url} had checksum {calculated_checksum}, expected {expected_checksum}.")
67 def download_git_tree(url: str, git_sha_rev: str, out_dir: Path) -> None:
69 tree_out_dir = out_dir / "git" / git_sha_rev
70 eprint(f"Fetching {url}#{git_sha_rev} -> git/{git_sha_rev}")
72 cmd = ["nix-prefetch-git", "--builder", "--quiet", "--url", url, "--rev", git_sha_rev, "--out", str(tree_out_dir)]
73 subprocess.check_output(cmd)
76 GIT_SOURCE_REGEX = re.compile("git\\+(?P<url>[^?]+)(\\?(?P<type>rev|tag|branch)=(?P<value>.*))?#(?P<git_sha_rev>.*)")
79 class GitSourceInfo(TypedDict):
80 url: str
81 type: str | None
82 value: str | None
83 git_sha_rev: str
86 def parse_git_source(source: str) -> GitSourceInfo:
87 match = GIT_SOURCE_REGEX.match(source)
88 if match is None:
89 raise Exception(f"Unable to process git source: {source}.")
90 return cast(GitSourceInfo, match.groupdict(default=None))
93 def create_vendor_staging(lockfile_path: Path, out_dir: Path) -> None:
94 cargo_toml = load_toml(lockfile_path)
96 git_packages: list[dict[str, Any]] = []
97 registry_packages: list[dict[str, Any]] = []
99 for pkg in cargo_toml["package"]:
100 # ignore local dependenices
101 if "source" not in pkg.keys():
102 eprint(f"Skipping local dependency: {pkg["name"]}")
103 continue
104 source = pkg["source"]
106 if source.startswith("git+"):
107 git_packages.append(pkg)
108 elif source.startswith("registry+"):
109 registry_packages.append(pkg)
110 else:
111 raise Exception(f"Can't process source: {source}.")
113 git_sha_rev_to_url: dict[str, str] = {}
114 for pkg in git_packages:
115 source_info = parse_git_source(pkg["source"])
116 git_sha_rev_to_url[source_info["git_sha_rev"]] = source_info["url"]
118 out_dir.mkdir(exist_ok=True)
119 shutil.copy(lockfile_path, out_dir / "Cargo.lock")
121 # create a pool with at most 10 concurrent jobs
122 with mp.Pool(min(10, mp.cpu_count())) as pool:
124 if len(git_packages) != 0:
125 (out_dir / "git").mkdir()
126 # run download jobs in parallel
127 git_args_gen = ((url, git_sha_rev, out_dir) for git_sha_rev, url in git_sha_rev_to_url.items())
128 pool.starmap(download_git_tree, git_args_gen)
130 if len(registry_packages) != 0:
131 (out_dir / "tarballs").mkdir()
132 # run download jobs in parallel
133 tarball_args_gen = ((pkg, out_dir) for pkg in registry_packages)
134 pool.starmap(download_tarball, tarball_args_gen)
137 def get_manifest_metadata(manifest_path: Path) -> dict[str, Any]:
138 cmd = ["cargo", "metadata", "--format-version", "1", "--no-deps", "--manifest-path", str(manifest_path)]
139 output = subprocess.check_output(cmd)
140 return json.loads(output)
143 def try_get_crate_manifest_path_from_mainfest_path(manifest_path: Path, crate_name: str) -> Path | None:
144 metadata = get_manifest_metadata(manifest_path)
146 for pkg in metadata["packages"]:
147 if pkg["name"] == crate_name:
148 return Path(pkg["manifest_path"])
150 return None
153 def find_crate_manifest_in_tree(tree: Path, crate_name: str) -> Path:
154 # in some cases Cargo.toml is not located at the top level, so we also look at subdirectories
155 manifest_paths = tree.glob("**/Cargo.toml")
157 for manifest_path in manifest_paths:
158 res = try_get_crate_manifest_path_from_mainfest_path(manifest_path, crate_name)
159 if res is not None:
160 return res
162 raise Exception(f"Couldn't find manifest for crate {crate_name} inside {tree}.")
165 def copy_and_patch_git_crate_subtree(git_tree: Path, crate_name: str, crate_out_dir: Path) -> None:
166 crate_manifest_path = find_crate_manifest_in_tree(git_tree, crate_name)
167 crate_tree = crate_manifest_path.parent
169 eprint(f"Copying to {crate_out_dir}")
170 shutil.copytree(crate_tree, crate_out_dir)
171 crate_out_dir.chmod(0o755)
173 with open(crate_manifest_path, "r") as f:
174 manifest_data = f.read()
176 if "workspace" in manifest_data:
177 crate_manifest_metadata = get_manifest_metadata(crate_manifest_path)
178 workspace_root = Path(crate_manifest_metadata["workspace_root"])
180 root_manifest_path = workspace_root / "Cargo.toml"
181 manifest_path = crate_out_dir / "Cargo.toml"
183 manifest_path.chmod(0o644)
184 eprint(f"Patching {manifest_path}")
186 cmd = ["replace-workspace-values", str(manifest_path), str(root_manifest_path)]
187 subprocess.check_output(cmd)
190 def extract_crate_tarball_contents(tarball_path: Path, crate_out_dir: Path) -> None:
191 eprint(f"Unpacking to {crate_out_dir}")
192 crate_out_dir.mkdir()
193 cmd = ["tar", "xf", str(tarball_path), "-C", str(crate_out_dir), "--strip-components=1"]
194 subprocess.check_output(cmd)
197 def create_vendor(vendor_staging_dir: Path, out_dir: Path) -> None:
198 lockfile_path = vendor_staging_dir / "Cargo.lock"
199 out_dir.mkdir(exist_ok=True)
200 shutil.copy(lockfile_path, out_dir / "Cargo.lock")
202 cargo_toml = load_toml(lockfile_path)
204 config_lines = [
205 '[source.vendored-sources]',
206 'directory = "@vendor@"',
207 '[source.crates-io]',
208 'replace-with = "vendored-sources"',
211 seen_source_keys = set()
212 for pkg in cargo_toml["package"]:
214 # ignore local dependenices
215 if "source" not in pkg.keys():
216 continue
218 source: str = pkg["source"]
220 dir_name = f"{pkg["name"]}-{pkg["version"]}"
221 crate_out_dir = out_dir / dir_name
223 if source.startswith("git+"):
225 source_info = parse_git_source(pkg["source"])
226 git_sha_rev = source_info["git_sha_rev"]
227 git_tree = vendor_staging_dir / "git" / git_sha_rev
229 copy_and_patch_git_crate_subtree(git_tree, pkg["name"], crate_out_dir)
231 # git based crates allow having no checksum information
232 with open(crate_out_dir / ".cargo-checksum.json", "w") as f:
233 json.dump({"files": {}}, f)
235 source_key = source[0:source.find("#")]
237 if source_key in seen_source_keys:
238 continue
240 seen_source_keys.add(source_key)
242 config_lines.append(f'[source."{source_key}"]')
243 config_lines.append(f'git = "{source_info["url"]}"')
244 if source_info["type"] is not None:
245 config_lines.append(f'{source_info["type"]} = "{source_info["value"]}"')
246 config_lines.append('replace-with = "vendored-sources"')
248 elif source.startswith("registry+"):
250 filename = f"{pkg["name"]}-{pkg["version"]}.tar.gz"
251 tarball_path = vendor_staging_dir / "tarballs" / filename
253 extract_crate_tarball_contents(tarball_path, crate_out_dir)
255 # non-git based crates need the package checksum at minimum
256 with open(crate_out_dir / ".cargo-checksum.json", "w") as f:
257 json.dump({"files": {}, "package": pkg["checksum"]}, f)
259 else:
260 raise Exception(f"Can't process source: {source}.")
262 (out_dir / ".cargo").mkdir()
263 with open(out_dir / ".cargo" / "config.toml", "w") as config_file:
264 config_file.writelines(line + "\n" for line in config_lines)
267 def main() -> None:
268 subcommand = sys.argv[1]
270 subcommand_func_dict = {
271 "create-vendor-staging": lambda: create_vendor_staging(lockfile_path=Path(sys.argv[2]), out_dir=Path(sys.argv[3])),
272 "create-vendor": lambda: create_vendor(vendor_staging_dir=Path(sys.argv[2]), out_dir=Path(sys.argv[3]))
275 subcommand_func = subcommand_func_dict.get(subcommand)
277 if subcommand_func is None:
278 raise Exception(f"Unknown subcommand: '{subcommand}'. Must be one of {list(subcommand_func_dict.keys())}")
280 subcommand_func()
283 if __name__ == "__main__":
284 main()