5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
22 # Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
25 # userland-fetch - a file download utility
27 # A simple program similiar to wget(1), but handles local file copy, ignores
28 # directories, and verifies file hashes.
40 from urllib.parse import urlparse
41 from urllib.request import urlopen
42 from urllib.error import HTTPError,URLError
43 from urllib.request import Request
44 from pathlib import Path
46 from http.client import BadStatusLine
49 # 1 - unspecified error
50 # 2 - download uses insecure protocol
51 # 3 - was unable to find a suitable download
52 # 4 - need-hash specified but no hash was found
55 # -n/--need-hash: Set this to tell userland-fetch to fail if it cannot find a
56 # correct hash. This also causes userland-fetch to search for
57 # and download hash files, if they are not already present in
58 # HASH_DIR. If --hash is provided this effectively does nothing.
60 # -N/--need-sig: Set this to tell userland-fetch to require a signature. This
61 # also causes userland-fetch to search for signature files. If
62 # the signature fails then the download is considered corrupted,
63 # and will be deleted unless --keep is set.
64 # This means that if the signature can't be checked, the file
67 # -c/--clobber-hash: Set this to tell userland-fetch to clobber old hash files.
68 # userland-fetch will replace hash files in HASH_DIR with their
69 # remote counterparts.
72 # convert environment variables to global python variables
74 # This algorithm is set if it cannot be found in the filename
75 global DEFAULT_HASH_ALGO
76 DEFAULT_HASH_ALGO=os.getenv("DEFAULT_HASH_ALGO","sha256")
78 global DEFAULT_HASH_FILES
80 DEFAULT_HASH_FILES=[ x for x in os.environ["DEFAULT_HASH_FILES"].split(" ") if x ]
82 DEFAULT_HASH_FILES=["SHA256SUMS","sha256sums.txt"]
86 HASH_DIR = os.path.realpath(os.environ["HASH_DIR"])
88 # set after getting cmdline args
91 global SECURE_PROTOCOLS
93 SECURE_PROTOCOLS=["UNCHECKED"]+[ x for x in os.environ["SECURE_PROTOCOLS"].split(" ") if x ]
95 SECURE_PROTOCOLS=["UNCHECKED","https"]
97 global SIGNATURE_EXTENSIONS
99 SIGNATURE_EXTENSIONS=[ x for x in os.environ["SIGNATURE_EXTENSIONS"].split(" ") if x ]
101 SIGNATURE_EXTENSIONS=["sig","asc"]
103 global ALLOW_UNVERIFIED_DOWNLOADS
105 ALLOW_UNVERIFIED_DOWNLOADS = os.environ["ALLOW_UNVERIFIED_DOWNLOADS"] == 'yes'
107 ALLOW_UNVERIFIED_DOWNLOADS = False
109 LOCAL_SCHEMES = [None, 'file','']
110 REMOTE_SCHEMES = ['https','http','ftp']
112 def printIOError(e, txt):
113 """ Function to decode and print IOError type exception """
114 print("I/O Error: " + txt + ": ")
117 print(str(message) + " (" + str(code) + ")")
121 # TODO: refactor this so there aren't any global variables
124 def validate_signature(path, signature):
125 """Given paths to a file and a detached PGP signature, verify that
126 the signature is valid for the file. Current configuration allows for
127 unrecognized keys to be downloaded as necessary."""
129 # Find the root of the repo so that we can point GnuPG at the right
130 # configuration and keyring.
131 proc = subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE,
132 universal_newlines=True)
134 if proc.returncode != 0:
136 out, err = proc.communicate()
137 gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
139 # Skip the permissions warning: none of the information here is private,
140 # so not having to worry about getting git keeping the directory
141 # unreadable is just simplest.
143 proc = subprocess.Popen(["gpg2", "--verify",
144 "--no-permission-warning", "--homedir", gpgdir, signature,
145 path], stdin=open("/dev/null"),
146 stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
147 universal_newlines=True)
149 # If the executable simply couldn't be found, just skip the
151 if e.errno == errno.ENOENT:
157 VALIDATE_CODE = proc.returncode
158 if proc.returncode != 0:
159 # Only print GnuPG's output when there was a problem.
160 # Make this a global variable so we can output it somewhere tidy.
161 global VALIDATE_ERROR
162 VALIDATE_ERROR=proc.stdout.read()
167 def validate(file, hash):
168 """Given a file-like object and a hash string, verify that the hash
169 matches the file contents."""
172 algorithm, hashvalue = hash.split(':')
174 algorithm = DEFAULT_HASH_ALGO
176 # force migration away from sha1
177 if algorithm == "sha1":
178 algorithm = DEFAULT_HASH_ALGO
181 m = hashlib.new(algorithm)
183 print("Unable to generate hashlib instance for",algorithm)
189 return "%s:%s" % (algorithm, m.hexdigest())
190 except IOError as err:
191 print(str(err), end=' ')
192 except EOFError as err:
193 print(str(err), end=' ')
195 return "%s:" % (algorithm)
198 def validate_container(filename, hash):
199 """Given a file path and a hash string, verify that the hash matches the
203 file = open(filename, 'rb')
205 printIOError(e, "Can't open file " + filename)
207 return validate(file, hash)
210 def validate_payload(filename, hash):
211 """Given a file path and a hash string, verify that the hash matches the
212 payload (uncompressed content) of the file."""
214 expr_bz = re.compile('.+\.bz2?$', re.IGNORECASE)
215 expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
216 expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
217 expr_tbz = re.compile('.+\.tbz2?$', re.IGNORECASE)
220 if expr_bz.match(filename):
221 file = bz2.BZ2File(filename, 'rb')
222 elif expr_gz.match(filename):
223 file = gzip.GzipFile(filename, 'rb')
224 elif expr_tgz.match(filename):
225 file = gzip.GzipFile(filename, 'rb')
226 elif expr_tbz.match(filename):
227 file = bz2.GzipFile(filename, 'rb')
231 printIOError(e, "Can't open archive " + filename)
233 return validate(file, hash)
236 def download(url, filename=None, user_agent_arg=None, quiet=None,allow_partial=True):
237 """Download the content at the given URL to the given filename
238 (defaulting to the basename of the URL if not given. If 'quiet' is
239 True, throw away any error messages. Returns the name of the file to
240 which the content was donloaded."""
243 req = Request(url,method="HEAD")
244 if user_agent_arg is not None:
245 req.add_header("User-Agent", user_agent_arg)
247 filename = req.get_full_url().split('/')[-1]
251 if 'transfer-encoding' in i.headers and i.headers['transfer-encoding'] == 'chunked':
254 print("length unknown (streamed/chunked)")
257 length = int(i.headers['content-length'])
259 print("length %i bytes" % (length))
260 except (KeyError,ValueError,TypeError):
263 print("length unknown")
264 if not 'accept-ranges' in i.headers or i.headers['accept-ranges'] != 'bytes':
266 print("No partial download support from server")
267 allow_partial = False
270 # This might speed things up and keep memory usage down
272 with open(filename + ".part","ab" if allow_partial else "wb") as o:
274 # seek to end of the file if applicable
279 print("(Attempt %i of 3%s)..." % (retry,"; %i bytes done"%(dl) if dl else ""),end=" ")
281 req.add_header("Range","bytes=%i-"%(dl))
282 with urlopen(req) as i:
288 if length > 0 and o.tell() < length:
290 print("Download of %s stopped abruptly." % (str(url)))
293 except URLError as e:
295 print("Error downloading %s at %i bytes: %s" % (str(url),dl,str(e)))
296 # if we haven't downloaded any bytes since the last URLError, cancel the download.
297 if dl > 0 and o.tell() > dl:
300 req.add_header("Range","bytes=%i-"%(o.tell()+1))
303 except HTTPError as e:
305 print("Error downloading %s: %s" % (str(url),str(e)))
307 # return the name of the file that we downloaded the data to.
308 os.rename(filename+".part",filename)
312 printIOError(e, "Can't open url " + url)
313 except BadStatusLine as e:
315 print("Can't open url %s: server answered with code which we couldn't understand " % (url))
316 except KeyboardInterrupt:
317 print("Cancelling download...")
322 def download_paths(search, filenames, url):
323 """Returns a list of URLs where the file 'filename' might be found,
324 using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
326 If 'filename' is None, then the list will simply contain 'url'."""
329 if type(filenames) == str:
330 filenames = [filenames]
332 if filenames is not None:
333 tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
335 search += tmp.split(' ')
336 for filename in filenames:
337 file = os.path.basename(filename)
339 urls += [base + '/' + file for base in search]
341 # filename should always be first
343 urls.remove(filename)
344 urls.insert(0, filename)
346 # command line url is a fallback, so it's last
347 if url is not None and url not in urls:
348 parse_result = urlparse(url)
349 scheme = parse_result.scheme
350 path = parse_result.path
352 url = pypi_url(url, os.path.basename(filename))
353 if url != None and url not in urls:
357 if filenames is not None:
358 tmp = os.getenv('DOWNLOAD_FALLBACK_PATH')
360 for filename in filenames:
361 file = os.path.basename(filename)
362 urls += [base + '/' + file for base in tmp.split(' ')]
366 # sort entries by local first, then remote:
368 if urlparse(entry).scheme in LOCAL_SCHEMES:
369 local_urls.append(entry)
371 remote_urls.append(entry)
372 return local_urls + remote_urls
375 def pypi_url(url, filename):
376 """Given a pypi: URL, return the real URL for that component/version.
378 The pypi scheme has a host (with an empty host defaulting to
379 pypi.python.org), and a path that should be of the form
380 "component==version". Other specs could be supported, but == is the
381 only thing that makes sense in this context.
383 The filename argument is the name of the expected file to download, so
384 that when pypi gives us multiple archives to choose from, we can pick
389 parse_result = urlparse(url)
390 host = parse_result.netloc
391 path = parse_result.path
393 # We have to use ==; anything fancier would require pkg_resources, but
394 # really that's the only thing that makes sense in this context.
396 name, version = re.match("/(.*)==(.*)$", path).groups()
397 except AttributeError:
398 print("PyPI URLs must be of the form 'pypi:///component==version'")
402 jsurl = "https://pypi.python.org/pypi/%s/json" % name
404 jsurl = "https://%s/pypi/%s/json" % (host, name)
407 f = urlopen(jsurl, data=None)
408 except HTTPError as e:
409 if e.getcode() == 404:
410 print("Unknown component '%s'" % name)
412 printIOError(e, "Can't open PyPI JSON url %s" % url)
415 printIOError(e, "Can't open PyPI JSON url %s" % url)
417 content = f.read().decode("utf-8")
418 js = json.loads(content)
420 verblock = js["releases"][version]
422 print("Unknown version '%s'" % version)
425 urls = [ d["url"] for d in verblock ]
426 for archiveurl in urls:
427 if archiveurl.endswith("/%s" % filename):
431 print("None of the following URLs delivers '%s':" % filename)
432 print(" " + "\n ".join(urls))
434 print("Couldn't find any suitable URLs")
437 def download_from_paths(search_list, file_arg, url, link_arg, quiet=False,get_signature=False,download_dir=None):
438 """Attempts to download a file from a number of possible locations.
439 Generates a list of paths where the file ends up on the local
440 filesystem. This is a generator because while a download might be
441 successful, the signature or hash may not validate, and the caller may
442 want to try again from the next location. The 'link_arg' argument is a
443 boolean which, when True, specifies that if the source is not a remote
444 URL and not already found where it should be, to make a symlink to the
445 source rather than copying it."""
447 for url in download_paths(search_list, file_arg, url):
449 print("Source %s..." % url, end=' ')
452 p = url[:24] + ' ... ' + url[-24:]
455 print(" {:54s}".format(p), end='')
457 parse_result = urlparse(url)
458 scheme = parse_result.scheme
459 path = parse_result.path
462 if scheme in LOCAL_SCHEMES:
464 if type(file_arg) == str:
470 # don't rename stuff - there shouldn't be a file list here anyway
471 if os.path.basename(n) != os.path.basename(url):
473 if os.path.exists(path) is False:
476 print("not found, skipping file copy")
478 print("{:10s}".format("-"))
480 elif n and n != path:
481 if link_arg is False:
483 print("\n copying...")
484 shutil.copy2(path, n)
487 print("\n linking...")
494 print("{:10s}".format("cached"),end="")
498 elif scheme in REMOTE_SCHEMES:
500 print("\n downloading...", end=' ')
501 if type(file_arg) == str:
502 name = download(url, file_arg, quiet,(scheme != 'ftp'))
505 download_dir = os.curdir
506 name = download(url, os.path.join(download_dir,os.path.basename(url)),quiet,(scheme != 'ftp'))
507 if get_signature and name:
508 for ext in SIGNATURE_EXTENSIONS:
509 sig = download(url+"."+ext, name+"."+ext, quiet,(scheme != 'ftp'))
516 print("{:10s}".format("-"))
522 print("{:10s}".format("fetched"),end="")
526 def find_hash_in_file(filename,hash_file):
527 splits = hash_file.split('.')
528 regex = re.compile('([0-9a-fA-F]+)( [ \*](.*/)?)('+os.path.basename(filename)+'$)')
529 match = re.match("(^[a-z0-9]+)(sums?(.txt)?$)",hash_file.lower())
530 if '.'.join(splits[:-1]) == filename:
531 algo = re.match('([a-zA-Z0-9]+)(sums?)',hash_file.split('.')[-1]).group(1)
533 algo = match.group(1)
535 algo = DEFAULT_HASH_ALGO
536 with open(os.path.join(HASH_DIR,hash_file),"r") as file:
538 for line in file.readlines():
539 hash_value = regex.match(line)
540 if hash_value is not None:
541 hash_value = hash_value.group(1)
543 if hash_value is not None:
544 return "%s:%s" % (algo,hash_value)
547 def find_hash_in_hash_dir(filename):
550 if not os.path.exists(HASH_DIR):
552 for hash_file in sorted(os.listdir(HASH_DIR)):
553 splits = hash_file.split('.')
554 if '.'.join(splits[:-1]) in SIGNATURE_EXTENSIONS:
556 hash_value = find_hash_in_file(filename,hash_file)
558 return hash_value, hash_file
560 except NotADirectoryError:
561 print(HASH_DIR,"should be a directory containing hashfiles in the",DEFAULT_HASH_ALGO+"sum","format.")
563 except IsADirectoryError:
564 print(hash_file,"should be a file containing hashes, not a directory.")
567 print("Usage: %s [-a|--user-agent (user-agent)] [-f|--file (file)] [-l|--link] " \
568 "[-k|--keep] [-h|--hash (hash)] [-n|--need-hash] [-s|--search (search-dir)] " \
569 "[-g|--get-hashes] [-G|--get-sigs] " \
570 "[-S|--sigurl (signature-url)] [-N|--need-sig] --url (url)" % (sys.argv[0].split('/')[-1]))
578 opts, args = getopt.getopt(sys.argv[1:], "a:f:h:lks:u:GgNnc",
579 ["file=", "link", "keep", "hash=", "search=", "url=","get-sigs","get-hashes",
580 "sigurl=", "user-agent=", "need-sig", "need-hash","clobber-hash"])
581 sys.exit(realmain(opts, args))
582 except getopt.GetoptError as err:
586 def realmain(opts, args):
588 user_agent_arg = None
596 get_signature = False
603 for opt, arg in opts:
604 if opt in ["-a", "--user-agent"]:
606 elif opt in ["-f", "--file"]:
608 elif opt in ["-l", "--link"]:
610 elif opt in ["-k", "--keep"]:
612 elif opt in ["-h", "--hash"]:
614 elif opt in ["-n", "--need-hash"]:
616 elif opt in ["-g", "--get-hashes"]:
618 elif opt in ["-s", "--search"]:
619 search_list.append(arg)
620 elif opt in ["-S", "--sigurl"]:
622 elif opt in ["-u", "--url"]:
624 elif opt in ["-N", "--need-sig"]:
626 elif opt in ["-G", "--get-sigs"]:
628 elif opt in ["-c", "--clobber-hash"]:
631 assert False, "unknown option"
634 if clobber_hash and len(search_list) == 0:
635 print("WARN: -c/--clobber-hash is meaningless without --search or --url. Ignoring.")
641 parse_result = urlparse(url_arg)
642 scheme = parse_result.scheme
643 path = parse_result.path
645 file_arg = os.path.realpath(os.path.join(os.curdir,os.path.basename(path)))
647 file_arg = os.path.realpath(file_arg)
648 filename = os.path.basename(file_arg)
651 HASH_DIR = os.path.realpath(os.path.join(os.path.dirname(file_arg),"hashes"))
654 if clobber_hash or get_hash:
655 print("Hash directory: %s [clobbering: %s]" % (HASH_DIR,str(clobber_hash)))
657 HASH_DIR_ORIG = HASH_DIR
658 HASH_DIR = HASH_DIR + ".tmp"
661 except FileNotFoundError:
662 print("Refusing to create %s recursively - is HASH_DIR set correctly?" % (HASH_DIR))
664 except FileExistsError:
667 # We need to account for the following possibilities for hash files:
668 # 1: .asc with embedded checksums (1 file, needs PGP stripping)
670 # 2: .asc or .sig, detached from hash file (2 files)
671 # 3: checksums without signature (need a secure protocol)
673 print("Sourcing hash files... ",end="")
674 search_hash_files = DEFAULT_HASH_FILES + [
675 filename + '.' + DEFAULT_HASH_ALGO,
676 filename + '.' + DEFAULT_HASH_ALGO + 'sum',
677 filename + '.' + DEFAULT_HASH_ALGO + 'sums'
680 print("\n {:54s}{:10s}{:10s}".format("URL","LOCALITY","HAS HASH"))
684 search_list.append(os.path.dirname(url_arg))
685 search_hash_files = [ os.path.join(HASH_DIR,x) for x in search_hash_files ]
686 for hashname, hashurl in download_from_paths(search_list, search_hash_files , None, link_arg, quiet=2,get_signature=True,download_dir=HASH_DIR):
687 scheme = urlparse(hashurl).scheme
688 safe = scheme in SECURE_PROTOCOLS or scheme in LOCAL_SCHEMES
690 for sigext in SIGNATURE_EXTENSIONS:
691 signame = hashname + "." + sigext
692 if os.path.exists(signame):
693 valid_sig = validate_signature(hashname,signame)
694 if not valid_sig and (not safe or need_sig):
695 print("denied (hashfile download did not meet security criteria)")
697 Path(signame).unlink(missing_ok=True)
699 hash_arg = find_hash_in_file(filename, hashname)
706 if hash_file is not None:
707 print("INFO: hash found for",filename,"in",hash_file)
710 for file in os.listdir(HASH_DIR):
714 orig_file = os.path.join(HASH_DIR_ORIG,os.path.basename(file))
715 new_file = os.path.join(HASH_DIR,file)
716 os.rename(new_file,orig_file)
717 except IsADirectoryError as e:
718 print("ERROR: moving hashfiles to HASH_DIR failed: %s" % (str(e)))
720 print("OSError: %s (%s -> %s)" %(str(e),new_file,orig_file))
724 print("Couldn't remove %s: %s" % (HASH_DIR,str(e)))
725 HASH_DIR = HASH_DIR_ORIG
726 elif hash_arg == None:
727 hash_arg, hash_file = find_hash_in_hash_dir(filename)
728 if hash_file is not None:
729 print("INFO: hash found for",filename,"in",hash_file)
731 print("INFO: not using any hashes in %s for" % (HASH_DIR),filename,"(overridden with --hash)")
735 if (hash_arg is None or hash_arg == 'none') and need_hash:
736 print("-n/--need-hash and no hash found. Exiting.")
738 if ALLOW_UNVERIFIED_DOWNLOADS:
739 print("WARN: ALLOW_UNVERIFIED_DOWNLOADS set.")
743 print("INFO: not searching with -g (--sigurl provided)")
745 for name, url in download_from_paths(search_list, file_arg, url_arg, link_arg,get_signature=get_signature):
746 scheme = urlparse(url).scheme
748 print(" was not downloaded")
750 print(" validating signature...", end=' ')
752 print("hashfile had valid signature")
756 if sig_arg == 'none':
757 print("skipping (--sigurl none)")
759 print("using %s..." % sig_arg,end=' ')
760 if urlparse(sig_arg).scheme in REMOTE_SCHEMES:
761 sig_file = download(sig_arg,filename=os.path.join(os.path.dirname(name),os.path.basename(sig_arg)),quiet=True,allow_partial=False)
764 print("checking remote signature...",end=' ')
766 print("checking local signature...",end=' ')
769 for ext in SIGNATURE_EXTENSIONS:
770 if os.path.exists(name+'.'+ext):
771 sig_file = name+'.'+ext
774 if validate_signature(name, sig_file):
779 errors.append((sig_file,VALIDATE_CODE,VALIDATE_ERROR))
785 print(" signature validation failed\n")
786 bad_signature = False
788 print("---%s output(exit code %d):\n%s---" % error)
792 if not keep_arg or bad_signature:
793 print("WARN: Deleting corrupt file.")
798 print("-N/--need-sig is set. This download cannot be used.")
804 print(" validating hash...", end=' ')
805 if hash_arg and hash_arg != 'none':
806 realhash = validate_container(name, hash_arg)
808 realhash = "skipped calculation (--hash none)"
811 if realhash == hash_arg:
815 if hash_arg and hash_arg != 'none':
816 payloadhash = validate_payload(name, hash_arg)
818 payloadhash = "skipped calculation (--hash none)"
819 if payloadhash == hash_arg:
823 if not hash_arg or hash_arg == 'none':
824 scheme = urlparse(url).scheme
825 if not ALLOW_UNVERIFIED_DOWNLOADS:
826 print("ERROR: Cannot validate download (no hash or signature).")
827 if keep_arg == False:
829 print("\nWARN: Removing the downloaded file")
835 elif scheme not in SECURE_PROTOCOLS and scheme not in LOCAL_SCHEMES:
836 print("ERROR: This download uses an insecure protocol: '%s'." % (str(scheme),))
837 if keep_arg == False:
839 print("\nWARN: Removing the downloaded file")
845 print("ignoring errors")
848 print("invalid hash!")
849 print(" expected: %s" % hash_arg)
850 print(" actual: %s" % realhash)
851 print(" payload: %s" % payloadhash)
855 print("-n/--need-hash is set. This download cannot be used.")
856 if keep_arg == False:
858 print("\nWARN: Removing the downloaded file")
865 # If the signature validated, then we assume
866 # that the expected hash is just a typo.
868 # An invalid hash shouldn't cause us to remove
869 # the target file if the signature was valid.
870 # Also, if the developer is in progress of upgrading
871 # some package version or introduces a new one, and
872 # explicitly ran "gmake fetch", keep the downloaded
873 # file (Makefile is not in position to have a valid
874 # checksum entry just yet) so it does not have to be
878 print("ERROR: This download failed to validate.")
879 if keep_arg == False:
881 print("\nWARN: Removing the corrupt downloaded file")
891 if __name__ == "__main__":