ocamlPackages.hxd: 0.3.2 -> 0.3.3 (#364231)
[NixPkgs.git] / pkgs / build-support / node / fetch-npm-deps / src / parse / mod.rs
blob0bca33f039152945dc51e17b69078272a40acf21
1 use anyhow::{anyhow, bail, Context};
2 use lock::UrlOrString;
3 use log::{debug, info};
4 use rayon::prelude::*;
5 use serde_json::{Map, Value};
6 use std::{
7     fs,
8     io::Write,
9     process::{Command, Stdio},
11 use tempfile::{tempdir, TempDir};
12 use url::Url;
14 use crate::util;
16 pub mod lock;
18 pub fn lockfile(
19     content: &str,
20     force_git_deps: bool,
21     force_empty_cache: bool,
22 ) -> anyhow::Result<Vec<Package>> {
23     debug!("parsing lockfile with contents:\n{content}");
25     let mut packages = lock::packages(content)
26         .context("failed to extract packages from lockfile")?
27         .into_par_iter()
28         .map(|p| {
29             let n = p.name.clone().unwrap();
31             Package::from_lock(p).with_context(|| format!("failed to parse data for {n}"))
32         })
33         .collect::<anyhow::Result<Vec<_>>>()?;
35     if packages.is_empty() && !force_empty_cache {
36         bail!("No cacheable dependencies were found. Please inspect the upstream `package-lock.json` file and ensure that remote dependencies have `resolved` URLs and `integrity` hashes. If the lockfile is missing this data, attempt to get upstream to fix it via a tool like <https://github.com/jeslie0/npm-lockfile-fix>. If generating an empty cache is intentional and you would like to do it anyways, set `forceEmptyCache = true`.");
37     }
39     let mut new = Vec::new();
41     for pkg in packages
42         .iter()
43         .filter(|p| matches!(p.specifics, Specifics::Git { .. }))
44     {
45         let dir = match &pkg.specifics {
46             Specifics::Git { workdir } => workdir,
47             Specifics::Registry { .. } => unimplemented!(),
48         };
50         let path = dir.path().join("package");
52         info!("recursively parsing lockfile for {} at {path:?}", pkg.name);
54         let lockfile_contents = fs::read_to_string(path.join("package-lock.json"));
56         let package_json_path = path.join("package.json");
57         let mut package_json: Map<String, Value> =
58             serde_json::from_str(&fs::read_to_string(package_json_path)?)?;
60         if let Some(scripts) = package_json
61             .get_mut("scripts")
62             .and_then(Value::as_object_mut)
63         {
64             // https://github.com/npm/pacote/blob/272edc1bac06991fc5f95d06342334bbacfbaa4b/lib/git.js#L166-L172
65             for typ in [
66                 "postinstall",
67                 "build",
68                 "preinstall",
69                 "install",
70                 "prepack",
71                 "prepare",
72             ] {
73                 if scripts.contains_key(typ) && lockfile_contents.is_err() && !force_git_deps {
74                     bail!("Git dependency {} contains install scripts, but has no lockfile, which is something that will probably break. Open an issue if you can't feasibly patch this dependency out, and we'll come up with a workaround.\nIf you'd like to attempt to try to use this dependency anyways, set `forceGitDeps = true`.", pkg.name);
75                 }
76             }
77         }
79         if let Ok(lockfile_contents) = lockfile_contents {
80             new.append(&mut lockfile(
81                 &lockfile_contents,
82                 force_git_deps,
83                 // force_empty_cache is turned on here since recursively parsed lockfiles should be
84                 // allowed to have an empty cache without erroring by default
85                 true,
86             )?);
87         }
88     }
90     packages.append(&mut new);
92     packages.par_sort_by(|x, y| {
93         x.url
94             .partial_cmp(&y.url)
95             .expect("resolved should be comparable")
96     });
98     packages.dedup_by(|x, y| x.url == y.url);
100     Ok(packages)
103 #[derive(Debug)]
104 pub struct Package {
105     pub name: String,
106     pub url: Url,
107     specifics: Specifics,
110 #[derive(Debug)]
111 enum Specifics {
112     Registry { integrity: lock::Hash },
113     Git { workdir: TempDir },
116 impl Package {
117     fn from_lock(pkg: lock::Package) -> anyhow::Result<Package> {
118         let mut resolved = match pkg
119             .resolved
120             .expect("at this point, packages should have URLs")
121         {
122             UrlOrString::Url(u) => u,
123             UrlOrString::String(_) => panic!("at this point, all packages should have URLs"),
124         };
126         let specifics = match get_hosted_git_url(&resolved)? {
127             Some(hosted) => {
128                 let body = util::get_url_body_with_retry(&hosted)?;
130                 let workdir = tempdir()?;
132                 let tar_path = workdir.path().join("package");
134                 fs::create_dir(&tar_path)?;
136                 let mut cmd = Command::new("tar")
137                     .args(["--extract", "--gzip", "--strip-components=1", "-C"])
138                     .arg(&tar_path)
139                     .stdin(Stdio::piped())
140                     .spawn()?;
142                 cmd.stdin.take().unwrap().write_all(&body)?;
144                 let exit = cmd.wait()?;
146                 if !exit.success() {
147                     bail!(
148                         "failed to extract tarball for {}: tar exited with status code {}",
149                         pkg.name.unwrap(),
150                         exit.code().unwrap()
151                     );
152                 }
154                 resolved = hosted;
156                 Specifics::Git { workdir }
157             }
158             None => Specifics::Registry {
159                 integrity: pkg
160                     .integrity
161                     .expect("non-git dependencies should have associated integrity")
162                     .into_best()
163                     .expect("non-git dependencies should have non-empty associated integrity"),
164             },
165         };
167         Ok(Package {
168             name: pkg.name.unwrap(),
169             url: resolved,
170             specifics,
171         })
172     }
174     pub fn tarball(&self) -> anyhow::Result<Vec<u8>> {
175         match &self.specifics {
176             Specifics::Registry { .. } => Ok(util::get_url_body_with_retry(&self.url)?),
177             Specifics::Git { workdir } => Ok(Command::new("tar")
178                 .args([
179                     "--sort=name",
180                     "--mtime=@0",
181                     "--owner=0",
182                     "--group=0",
183                     "--numeric-owner",
184                     "--format=gnu",
185                     "-I",
186                     "gzip -n -9",
187                     "--create",
188                     "-C",
189                 ])
190                 .arg(workdir.path())
191                 .arg("package")
192                 .output()?
193                 .stdout),
194         }
195     }
197     pub fn integrity(&self) -> Option<&lock::Hash> {
198         match &self.specifics {
199             Specifics::Registry { integrity } => Some(integrity),
200             Specifics::Git { .. } => None,
201         }
202     }
205 #[allow(clippy::case_sensitive_file_extension_comparisons)]
206 fn get_hosted_git_url(url: &Url) -> anyhow::Result<Option<Url>> {
207     if ["git", "git+ssh", "git+https", "ssh"].contains(&url.scheme()) {
208         let mut s = url
209             .path_segments()
210             .ok_or_else(|| anyhow!("bad URL: {url}"))?;
212         let mut get_url = || match url.host_str()? {
213             "github.com" => {
214                 let user = s.next()?;
215                 let mut project = s.next()?;
216                 let typ = s.next();
217                 let mut commit = s.next();
219                 if typ.is_none() {
220                     commit = url.fragment();
221                 } else if typ.is_some() && typ != Some("tree") {
222                     return None;
223                 }
225                 if project.ends_with(".git") {
226                     project = project.strip_suffix(".git")?;
227                 }
229                 let commit = commit.unwrap();
231                 Some(
232                     Url::parse(&format!(
233                         "https://codeload.github.com/{user}/{project}/tar.gz/{commit}"
234                     ))
235                     .ok()?,
236                 )
237             }
238             "bitbucket.org" => {
239                 let user = s.next()?;
240                 let mut project = s.next()?;
241                 let aux = s.next();
243                 if aux == Some("get") {
244                     return None;
245                 }
247                 if project.ends_with(".git") {
248                     project = project.strip_suffix(".git")?;
249                 }
251                 let commit = url.fragment()?;
253                 Some(
254                     Url::parse(&format!(
255                         "https://bitbucket.org/{user}/{project}/get/{commit}.tar.gz"
256                     ))
257                     .ok()?,
258                 )
259             }
260             "gitlab.com" => {
261                 /* let path = &url.path()[1..];
263                 if path.contains("/~/") || path.contains("/archive.tar.gz") {
264                     return None;
265                 }
267                 let user = s.next()?;
268                 let mut project = s.next()?;
270                 if project.ends_with(".git") {
271                     project = project.strip_suffix(".git")?;
272                 }
274                 let commit = url.fragment()?;
276                 Some(
277                     Url::parse(&format!(
278                     "https://gitlab.com/{user}/{project}/repository/archive.tar.gz?ref={commit}"
279                 ))
280                     .ok()?,
281                 ) */
283                 // lmao: https://github.com/npm/hosted-git-info/pull/109
284                 None
285             }
286             "git.sr.ht" => {
287                 let user = s.next()?;
288                 let mut project = s.next()?;
289                 let aux = s.next();
291                 if aux == Some("archive") {
292                     return None;
293                 }
295                 if project.ends_with(".git") {
296                     project = project.strip_suffix(".git")?;
297                 }
299                 let commit = url.fragment()?;
301                 Some(
302                     Url::parse(&format!(
303                         "https://git.sr.ht/{user}/{project}/archive/{commit}.tar.gz"
304                     ))
305                     .ok()?,
306                 )
307             }
308             _ => None,
309         };
311         match get_url() {
312             Some(u) => Ok(Some(u)),
313             None => Err(anyhow!("This lockfile either contains a Git dependency with an unsupported host, or a malformed URL in the lockfile: {url}"))
314         }
315     } else {
316         Ok(None)
317     }
320 #[cfg(test)]
321 mod tests {
322     use super::get_hosted_git_url;
323     use url::Url;
325     #[test]
326     fn hosted_git_urls() {
327         for (input, expected) in [
328             (
329                 "git+ssh://git@github.com/castlabs/electron-releases.git#fc5f78d046e8d7cdeb66345a2633c383ab41f525",
330                 Some("https://codeload.github.com/castlabs/electron-releases/tar.gz/fc5f78d046e8d7cdeb66345a2633c383ab41f525"),
331             ),
332             (
333                 "git+ssh://bitbucket.org/foo/bar#branch",
334                 Some("https://bitbucket.org/foo/bar/get/branch.tar.gz")
335             ),
336             (
337                 "git+ssh://git.sr.ht/~foo/bar#branch",
338                 Some("https://git.sr.ht/~foo/bar/archive/branch.tar.gz")
339             ),
340         ] {
341             assert_eq!(
342                 get_hosted_git_url(&Url::parse(input).unwrap()).unwrap(),
343                 expected.map(|u| Url::parse(u).unwrap())
344             );
345         }
347         assert!(
348             get_hosted_git_url(&Url::parse("ssh://git@gitlab.com/foo/bar.git#fix/bug").unwrap())
349                 .is_err(),
350             "GitLab URLs should be marked as invalid (lol)"
351         );
352     }