biome: 1.9.2 -> 1.9.3
[NixPkgs.git] / pkgs / build-support / fetchpypilegacy / fetch-legacy.py
blobe031f244a77147851227ea667b9d514af8f2c41a
1 # Some repositories (such as Devpi) expose the Pypi legacy API
2 # (https://warehouse.pypa.io/api-reference/legacy.html).
4 # Note it is not possible to use pip
5 # https://discuss.python.org/t/pip-download-just-the-source-packages-no-building-no-metadata-etc/4651/12
7 import base64
8 import argparse
9 import netrc
10 import os
11 import shutil
12 import ssl
13 import sys
14 import urllib.request
15 from html.parser import HTMLParser
16 from os.path import normpath
17 from typing import Optional
18 from urllib.parse import urlparse, urlunparse
21 # Parse the legacy index page to extract the href and package names
22 class Pep503(HTMLParser):
23 def __init__(self) -> None:
24 super().__init__()
25 self.sources: dict[str, str] = {}
26 self.url: Optional[str] = None
27 self.name: Optional[str] = None
29 def handle_data(self, data: str) -> None:
30 if self.url is not None:
31 self.name = data
33 def handle_starttag(self, tag: str, attrs: list[tuple[str, Optional[str]]]) -> None:
34 if tag == "a":
35 for name, value in attrs:
36 if name == "href":
37 self.url = value
39 def handle_endtag(self, tag: str) -> None:
40 if self.url is not None:
41 if not self.name:
42 raise ValueError("Name not set")
44 self.sources[self.name] = self.url
45 self.url = None
48 def try_fetch(url: str, package_name: str, package_filename: str) -> None:
49 index_url = url + "/" + package_name + "/"
51 # Parse username and password for this host from the netrc file if given.
52 username: Optional[str] = None
53 password: Optional[str] = None
54 if os.environ.get("NETRC", "") != "":
55 netrc_obj = netrc.netrc(os.environ["NETRC"])
56 host = urlparse(index_url).netloc
57 # Strip port number if present
58 if ":" in host:
59 host = host.split(":")[0]
60 authenticators = netrc_obj.authenticators(host)
61 if authenticators:
62 username, _, password = authenticators
64 print("Reading index %s" % index_url)
66 context = ssl.create_default_context()
68 # Extract out username/password from index_url, if present.
69 parsed_url = urlparse(index_url)
70 username = parsed_url.username or username
71 password = parsed_url.password or password
72 index_url = parsed_url._replace(netloc=parsed_url.netloc.rpartition("@")[-1]).geturl()
74 req = urllib.request.Request(index_url)
76 if username and password: # Add authentication
77 password_b64 = base64.b64encode(":".join((username, password)).encode()).decode("utf-8")
78 req.add_header("Authorization", "Basic {}".format(password_b64))
79 else: # If we are not using authentication disable TLS verification for long term reproducibility
80 context.check_hostname = False
81 context.verify_mode = ssl.CERT_NONE
83 response = urllib.request.urlopen(req, context=context)
84 index = response.read()
86 parser = Pep503()
87 parser.feed(str(index, "utf-8"))
88 if package_filename not in parser.sources:
89 print("The file %s has not be found in the index %s" % (package_filename, index_url))
90 exit(1)
92 package_file = open(package_filename, "wb")
93 # Sometimes the href is a relative or absolute path within the index's domain.
94 indicated_url = urlparse(parser.sources[package_filename])
95 if indicated_url.netloc == "":
96 parsed_url = urlparse(index_url)
98 if indicated_url.path.startswith("/"):
99 # An absolute path within the index's domain.
100 path = parser.sources[package_filename]
101 else:
102 # A relative path.
103 path = parsed_url.path + "/" + parser.sources[package_filename]
105 package_url = urlunparse(
107 parsed_url.scheme,
108 parsed_url.netloc,
109 path,
110 None,
111 None,
112 None,
115 else:
116 package_url = parser.sources[package_filename]
118 # Handle urls containing "../"
119 parsed_url = urlparse(package_url)
120 real_package_url = urlunparse(
122 parsed_url.scheme,
123 parsed_url.netloc,
124 normpath(parsed_url.path),
125 parsed_url.params,
126 parsed_url.query,
127 parsed_url.fragment,
130 print("Downloading %s" % real_package_url)
132 req = urllib.request.Request(real_package_url)
133 if username and password:
134 req.add_unredirected_header("Authorization", "Basic {}".format(password_b64))
135 response = urllib.request.urlopen(req, context=context)
137 with response as r:
138 shutil.copyfileobj(r, package_file)
141 argparser = argparse.ArgumentParser(description="Fetch file from legacy pypi API")
142 argparser.add_argument("--url", action="append", required=True)
143 argparser.add_argument("--pname", action="store", required=True)
144 argparser.add_argument("--filename", action="store", required=True)
147 if __name__ == "__main__":
148 args = argparser.parse_args()
149 for url in args.url:
150 try:
151 try_fetch(url, args.pname, args.filename)
152 except urllib.error.HTTPError as e:
153 print("Got exception'", e, "', trying next package index", file=sys.stderr)
154 continue
155 else:
156 break
157 else:
158 print(
159 f"Could not fetch package '{args.pname}' file '{args.filename}' from any mirrors: {args.url}",
160 file=sys.stderr,
162 exit(1)