4 import multiprocessing
as mp
10 from pathlib
import Path
11 from typing
import Any
, TypedDict
, cast
15 eprint
= functools
.partial(print, file=sys
.stderr
)
18 def load_toml(path
: Path
) -> dict[str, Any
]:
19 with
open(path
, "rb") as f
:
20 return tomllib
.load(f
)
23 def download_file_with_checksum(url
: str, destination_path
: Path
) -> str:
24 sha256_hash
= hashlib
.sha256()
25 with requests
.get(url
, stream
=True) as response
:
27 raise Exception(f
"Failed to fetch file from {url}. Status code: {response.status_code}")
28 with
open(destination_path
, "wb") as file:
29 for chunk
in response
.iter_content(1024): # Download in chunks
30 if chunk
: # Filter out keep-alive chunks
32 sha256_hash
.update(chunk
)
34 # Compute the final checksum
35 checksum
= sha256_hash
.hexdigest()
39 def get_download_url_for_tarball(pkg
: dict[str, Any
]) -> str:
40 # TODO: support other registries
41 # maybe fetch config.json from the registry root and get the dl key
42 # See: https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration
43 if pkg
["source"] != "registry+https://github.com/rust-lang/crates.io-index":
44 raise Exception("Only the default crates.io registry is supported.")
46 return f
"https://crates.io/api/v1/crates/{pkg["name
"]}/{pkg["version
"]}/download"
49 def download_tarball(pkg
: dict[str, Any
], out_dir
: Path
) -> None:
51 url
= get_download_url_for_tarball(pkg
)
52 filename
= f
"{pkg["name
"]}-{pkg["version
"]}.tar.gz"
54 # TODO: allow legacy checksum specification, see importCargoLock for example
55 # also, don't forget about the other usage of the checksum
56 expected_checksum
= pkg
["checksum"]
58 tarball_out_dir
= out_dir
/ "tarballs" / filename
59 eprint(f
"Fetching {url} -> tarballs/{filename}")
61 calculated_checksum
= download_file_with_checksum(url
, tarball_out_dir
)
63 if calculated_checksum
!= expected_checksum
:
64 raise Exception(f
"Hash mismatch! File fetched from {url} had checksum {calculated_checksum}, expected {expected_checksum}.")
67 def download_git_tree(url
: str, git_sha_rev
: str, out_dir
: Path
) -> None:
69 tree_out_dir
= out_dir
/ "git" / git_sha_rev
70 eprint(f
"Fetching {url}#{git_sha_rev} -> git/{git_sha_rev}")
72 cmd
= ["nix-prefetch-git", "--builder", "--quiet", "--url", url
, "--rev", git_sha_rev
, "--out", str(tree_out_dir
)]
73 subprocess
.check_output(cmd
)
76 GIT_SOURCE_REGEX
= re
.compile("git\\+(?P<url>[^?]+)(\\?(?P<type>rev|tag|branch)=(?P<value>.*))?#(?P<git_sha_rev>.*)")
79 class GitSourceInfo(TypedDict
):
86 def parse_git_source(source
: str) -> GitSourceInfo
:
87 match
= GIT_SOURCE_REGEX
.match(source
)
89 raise Exception(f
"Unable to process git source: {source}.")
90 return cast(GitSourceInfo
, match
.groupdict(default
=None))
93 def create_vendor_staging(lockfile_path
: Path
, out_dir
: Path
) -> None:
94 cargo_toml
= load_toml(lockfile_path
)
96 git_packages
: list[dict[str, Any
]] = []
97 registry_packages
: list[dict[str, Any
]] = []
99 for pkg
in cargo_toml
["package"]:
100 # ignore local dependenices
101 if "source" not in pkg
.keys():
102 eprint(f
"Skipping local dependency: {pkg["name
"]}")
104 source
= pkg
["source"]
106 if source
.startswith("git+"):
107 git_packages
.append(pkg
)
108 elif source
.startswith("registry+"):
109 registry_packages
.append(pkg
)
111 raise Exception(f
"Can't process source: {source}.")
113 git_sha_rev_to_url
: dict[str, str] = {}
114 for pkg
in git_packages
:
115 source_info
= parse_git_source(pkg
["source"])
116 git_sha_rev_to_url
[source_info
["git_sha_rev"]] = source_info
["url"]
118 out_dir
.mkdir(exist_ok
=True)
119 shutil
.copy(lockfile_path
, out_dir
/ "Cargo.lock")
121 # create a pool with at most 10 concurrent jobs
122 with mp
.Pool(min(10, mp
.cpu_count())) as pool
:
124 if len(git_packages
) != 0:
125 (out_dir
/ "git").mkdir()
126 # run download jobs in parallel
127 git_args_gen
= ((url
, git_sha_rev
, out_dir
) for git_sha_rev
, url
in git_sha_rev_to_url
.items())
128 pool
.starmap(download_git_tree
, git_args_gen
)
130 if len(registry_packages
) != 0:
131 (out_dir
/ "tarballs").mkdir()
132 # run download jobs in parallel
133 tarball_args_gen
= ((pkg
, out_dir
) for pkg
in registry_packages
)
134 pool
.starmap(download_tarball
, tarball_args_gen
)
137 def get_manifest_metadata(manifest_path
: Path
) -> dict[str, Any
]:
138 cmd
= ["cargo", "metadata", "--format-version", "1", "--no-deps", "--manifest-path", str(manifest_path
)]
139 output
= subprocess
.check_output(cmd
)
140 return json
.loads(output
)
143 def try_get_crate_manifest_path_from_mainfest_path(manifest_path
: Path
, crate_name
: str) -> Path |
None:
144 metadata
= get_manifest_metadata(manifest_path
)
146 for pkg
in metadata
["packages"]:
147 if pkg
["name"] == crate_name
:
148 return Path(pkg
["manifest_path"])
153 def find_crate_manifest_in_tree(tree
: Path
, crate_name
: str) -> Path
:
154 # in some cases Cargo.toml is not located at the top level, so we also look at subdirectories
155 manifest_paths
= tree
.glob("**/Cargo.toml")
157 for manifest_path
in manifest_paths
:
158 res
= try_get_crate_manifest_path_from_mainfest_path(manifest_path
, crate_name
)
162 raise Exception(f
"Couldn't find manifest for crate {crate_name} inside {tree}.")
165 def copy_and_patch_git_crate_subtree(git_tree
: Path
, crate_name
: str, crate_out_dir
: Path
) -> None:
166 crate_manifest_path
= find_crate_manifest_in_tree(git_tree
, crate_name
)
167 crate_tree
= crate_manifest_path
.parent
169 eprint(f
"Copying to {crate_out_dir}")
170 shutil
.copytree(crate_tree
, crate_out_dir
)
171 crate_out_dir
.chmod(0o755)
173 with
open(crate_manifest_path
, "r") as f
:
174 manifest_data
= f
.read()
176 if "workspace" in manifest_data
:
177 crate_manifest_metadata
= get_manifest_metadata(crate_manifest_path
)
178 workspace_root
= Path(crate_manifest_metadata
["workspace_root"])
180 root_manifest_path
= workspace_root
/ "Cargo.toml"
181 manifest_path
= crate_out_dir
/ "Cargo.toml"
183 manifest_path
.chmod(0o644)
184 eprint(f
"Patching {manifest_path}")
186 cmd
= ["replace-workspace-values", str(manifest_path
), str(root_manifest_path
)]
187 subprocess
.check_output(cmd
)
190 def extract_crate_tarball_contents(tarball_path
: Path
, crate_out_dir
: Path
) -> None:
191 eprint(f
"Unpacking to {crate_out_dir}")
192 crate_out_dir
.mkdir()
193 cmd
= ["tar", "xf", str(tarball_path
), "-C", str(crate_out_dir
), "--strip-components=1"]
194 subprocess
.check_output(cmd
)
197 def create_vendor(vendor_staging_dir
: Path
, out_dir
: Path
) -> None:
198 lockfile_path
= vendor_staging_dir
/ "Cargo.lock"
199 out_dir
.mkdir(exist_ok
=True)
200 shutil
.copy(lockfile_path
, out_dir
/ "Cargo.lock")
202 cargo_toml
= load_toml(lockfile_path
)
205 '[source.vendored-sources]',
206 'directory = "@vendor@"',
207 '[source.crates-io]',
208 'replace-with = "vendored-sources"',
211 seen_source_keys
= set()
212 for pkg
in cargo_toml
["package"]:
214 # ignore local dependenices
215 if "source" not in pkg
.keys():
218 source
: str = pkg
["source"]
220 dir_name
= f
"{pkg["name
"]}-{pkg["version
"]}"
221 crate_out_dir
= out_dir
/ dir_name
223 if source
.startswith("git+"):
225 source_info
= parse_git_source(pkg
["source"])
226 git_sha_rev
= source_info
["git_sha_rev"]
227 git_tree
= vendor_staging_dir
/ "git" / git_sha_rev
229 copy_and_patch_git_crate_subtree(git_tree
, pkg
["name"], crate_out_dir
)
231 # git based crates allow having no checksum information
232 with
open(crate_out_dir
/ ".cargo-checksum.json", "w") as f
:
233 json
.dump({"files": {}}, f
)
235 source_key
= source
[0:source
.find("#")]
237 if source_key
in seen_source_keys
:
240 seen_source_keys
.add(source_key
)
242 config_lines
.append(f
'[source."{source_key}"]')
243 config_lines
.append(f
'git = "{source_info["url"]}"')
244 if source_info
["type"] is not None:
245 config_lines
.append(f
'{source_info["type"]} = "{source_info["value"]}"')
246 config_lines
.append('replace-with = "vendored-sources"')
248 elif source
.startswith("registry+"):
250 filename
= f
"{pkg["name
"]}-{pkg["version
"]}.tar.gz"
251 tarball_path
= vendor_staging_dir
/ "tarballs" / filename
253 extract_crate_tarball_contents(tarball_path
, crate_out_dir
)
255 # non-git based crates need the package checksum at minimum
256 with
open(crate_out_dir
/ ".cargo-checksum.json", "w") as f
:
257 json
.dump({"files": {}, "package": pkg
["checksum"]}, f
)
260 raise Exception(f
"Can't process source: {source}.")
262 (out_dir
/ ".cargo").mkdir()
263 with
open(out_dir
/ ".cargo" / "config.toml", "w") as config_file
:
264 config_file
.writelines(line
+ "\n" for line
in config_lines
)
268 subcommand
= sys
.argv
[1]
270 subcommand_func_dict
= {
271 "create-vendor-staging": lambda: create_vendor_staging(lockfile_path
=Path(sys
.argv
[2]), out_dir
=Path(sys
.argv
[3])),
272 "create-vendor": lambda: create_vendor(vendor_staging_dir
=Path(sys
.argv
[2]), out_dir
=Path(sys
.argv
[3]))
275 subcommand_func
= subcommand_func_dict
.get(subcommand
)
277 if subcommand_func
is None:
278 raise Exception(f
"Unknown subcommand: '{subcommand}'. Must be one of {list(subcommand_func_dict.keys())}")
283 if __name__
== "__main__":