1 # Some repositories (such as Devpi) expose the Pypi legacy API
2 # (https://warehouse.pypa.io/api-reference/legacy.html).
4 # Note it is not possible to use pip
5 # https://discuss.python.org/t/pip-download-just-the-source-packages-no-building-no-metadata-etc/4651/12
15 from html
.parser
import HTMLParser
16 from os
.path
import normpath
17 from typing
import Optional
18 from urllib
.parse
import urlparse
, urlunparse
21 # Parse the legacy index page to extract the href and package names
22 class Pep503(HTMLParser
):
23 def __init__(self
) -> None:
25 self
.sources
: dict[str, str] = {}
26 self
.url
: Optional
[str] = None
27 self
.name
: Optional
[str] = None
29 def handle_data(self
, data
: str) -> None:
30 if self
.url
is not None:
33 def handle_starttag(self
, tag
: str, attrs
: list[tuple[str, Optional
[str]]]) -> None:
35 for name
, value
in attrs
:
39 def handle_endtag(self
, tag
: str) -> None:
40 if self
.url
is not None:
42 raise ValueError("Name not set")
44 self
.sources
[self
.name
] = self
.url
48 def try_fetch(url
: str, package_name
: str, package_filename
: str) -> None:
49 index_url
= url
+ "/" + package_name
+ "/"
51 # Parse username and password for this host from the netrc file if given.
52 username
: Optional
[str] = None
53 password
: Optional
[str] = None
54 if os
.environ
.get("NETRC", "") != "":
55 netrc_obj
= netrc
.netrc(os
.environ
["NETRC"])
56 host
= urlparse(index_url
).netloc
57 # Strip port number if present
59 host
= host
.split(":")[0]
60 authenticators
= netrc_obj
.authenticators(host
)
62 username
, _
, password
= authenticators
64 print("Reading index %s" % index_url
)
66 context
= ssl
.create_default_context()
68 # Extract out username/password from index_url, if present.
69 parsed_url
= urlparse(index_url
)
70 username
= parsed_url
.username
or username
71 password
= parsed_url
.password
or password
72 index_url
= parsed_url
._replace
(netloc
=parsed_url
.netloc
.rpartition("@")[-1]).geturl()
74 req
= urllib
.request
.Request(index_url
)
76 if username
and password
: # Add authentication
77 password_b64
= base64
.b64encode(":".join((username
, password
)).encode()).decode("utf-8")
78 req
.add_header("Authorization", "Basic {}".format(password_b64
))
79 else: # If we are not using authentication disable TLS verification for long term reproducibility
80 context
.check_hostname
= False
81 context
.verify_mode
= ssl
.CERT_NONE
83 response
= urllib
.request
.urlopen(req
, context
=context
)
84 index
= response
.read()
87 parser
.feed(str(index
, "utf-8"))
88 if package_filename
not in parser
.sources
:
89 print("The file %s has not be found in the index %s" % (package_filename
, index_url
))
92 package_file
= open(package_filename
, "wb")
93 # Sometimes the href is a relative or absolute path within the index's domain.
94 indicated_url
= urlparse(parser
.sources
[package_filename
])
95 if indicated_url
.netloc
== "":
96 parsed_url
= urlparse(index_url
)
98 if indicated_url
.path
.startswith("/"):
99 # An absolute path within the index's domain.
100 path
= parser
.sources
[package_filename
]
103 path
= parsed_url
.path
+ "/" + parser
.sources
[package_filename
]
105 package_url
= urlunparse(
116 package_url
= parser
.sources
[package_filename
]
118 # Handle urls containing "../"
119 parsed_url
= urlparse(package_url
)
120 real_package_url
= urlunparse(
124 normpath(parsed_url
.path
),
130 print("Downloading %s" % real_package_url
)
132 req
= urllib
.request
.Request(real_package_url
)
133 if username
and password
:
134 req
.add_unredirected_header("Authorization", "Basic {}".format(password_b64
))
135 response
= urllib
.request
.urlopen(req
, context
=context
)
138 shutil
.copyfileobj(r
, package_file
)
141 argparser
= argparse
.ArgumentParser(description
="Fetch file from legacy pypi API")
142 argparser
.add_argument("--url", action
="append", required
=True)
143 argparser
.add_argument("--pname", action
="store", required
=True)
144 argparser
.add_argument("--filename", action
="store", required
=True)
147 if __name__
== "__main__":
148 args
= argparser
.parse_args()
151 try_fetch(url
, args
.pname
, args
.filename
)
152 except urllib
.error
.HTTPError
as e
:
153 print("Got exception'", e
, "', trying next package index", file=sys
.stderr
)
159 f
"Could not fetch package '{args.pname}' file '{args.filename}' from any mirrors: {args.url}",