5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
22 # Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
25 # fetch.py - a file download utility
27 # A simple program similiar to wget(1), but handles local file copy, ignores
28 # directories, and verifies file hashes.
39 from urllib import splittype
40 from urllib2 import urlopen
41 from urllib2 import Request
45 def printIOError(e, txt):
46 """ Function to decode and print IOError type exception """
47 print "I/O Error: " + txt + ": "
50 print str(message) + " (" + str(code) + ")"
55 def validate_signature(path, signature):
56 """Given paths to a file and a detached PGP signature, verify that
57 the signature is valid for the file. Current configuration allows for
58 unrecognized keys to be downloaded as necessary."""
60 # Find the root of the repo so that we can point GnuPG at the right
61 # configuration and keyring.
62 proc = subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE)
64 if proc.returncode != 0:
66 out, err = proc.communicate()
67 gpgdir = os.path.join(out.strip(), "tools", ".gnupg")
69 # Skip the permissions warning: none of the information here is private,
70 # so not having to worry about getting git keeping the directory
71 # unreadable is just simplest.
73 proc = subprocess.Popen(["gpg", "--verify",
74 "--no-permission-warning", "--homedir", gpgdir, signature,
75 path], stdin=open("/dev/null"),
76 stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
78 # If the executable simply couldn't be found, just skip the
80 if e.errno == errno.ENOENT:
85 if proc.returncode != 0:
86 # Only print GnuPG's output when there was a problem.
87 print proc.stdout.read()
92 def validate(file, hash):
93 """Given a file-like object and a hash string, verify that the hash
94 matches the file contents."""
97 algorithm, hashvalue = hash.split(':')
101 # force migration away from sha1
102 if algorithm == "sha1":
106 m = hashlib.new(algorithm)
121 return "%s:%s" % (algorithm, m.hexdigest())
124 def validate_container(filename, hash):
125 """Given a file path and a hash string, verify that the hash matches the
129 file = open(filename, 'r')
131 printIOError(e, "Can't open file " + filename)
133 return validate(file, hash)
136 def validate_payload(filename, hash):
137 """Given a file path and a hash string, verify that the hash matches the
138 payload (uncompressed content) of the file."""
140 expr_bz = re.compile('.+\.bz2$', re.IGNORECASE)
141 expr_gz = re.compile('.+\.gz$', re.IGNORECASE)
142 expr_tgz = re.compile('.+\.tgz$', re.IGNORECASE)
145 if expr_bz.match(filename):
146 file = bz2.BZ2File(filename, 'r')
147 elif expr_gz.match(filename):
148 file = gzip.GzipFile(filename, 'r')
149 elif expr_tgz.match(filename):
150 file = gzip.GzipFile(filename, 'r')
154 printIOError(e, "Can't open archive " + filename)
156 return validate(file, hash)
159 def download(url, filename=None, user_agent_arg=None, quiet=None):
160 """Download the content at the given URL to the given filename
161 (defaulting to the basename of the URL if not given. If 'quiet' is
162 True, throw away any error messages. Returns the name of the file to
163 which the content was donloaded."""
169 if user_agent_arg is not None:
170 req.add_header("User-Agent", user_agent_arg)
174 printIOError(e, "Can't open url " + url)
177 # 3xx, 4xx and 5xx (f|ht)tp codes designate unsuccessfull action
178 if src.getcode() and (3 <= int(src.getcode() / 100) <= 5):
180 print "Error code: " + str(src.getcode())
184 filename = src.geturl().split('/')[-1]
187 dst = open(filename, 'wb')
190 printIOError(e, "Can't open file " + filename + " for writing")
203 # return the name of the file that we downloaded the data to.
207 def download_paths(search, filename, url):
208 """Returns a list of URLs where the file 'filename' might be found,
209 using 'url', 'search', and $DOWNLOAD_SEARCH_PATH as places to look.
211 If 'filename' is None, then the list will simply contain 'url'."""
215 if filename is not None:
216 tmp = os.getenv('DOWNLOAD_SEARCH_PATH')
218 search += tmp.split(' ')
220 file = os.path.basename(filename)
222 urls = [base + '/' + file for base in search]
224 # filename should always be first
226 urls.remove(filename)
227 urls.insert(0, filename)
229 # command line url is a fallback, so it's last
230 if url is not None and url not in urls:
234 if filename is not None:
235 tmp = os.getenv('DOWNLOAD_FALLBACK_PATH')
237 file = os.path.basename(filename)
239 urls += [base + '/' + file for base in tmp.split(' ')]
244 def download_from_paths(search_list, file_arg, url, link_arg, quiet=False):
245 """Attempts to download a file from a number of possible locations.
246 Generates a list of paths where the file ends up on the local
247 filesystem. This is a generator because while a download might be
248 successful, the signature or hash may not validate, and the caller may
249 want to try again from the next location. The 'link_arg' argument is a
250 boolean which, when True, specifies that if the source is not a remote
251 URL and not already found where it should be, to make a symlink to the
252 source rather than copying it."""
254 for url in download_paths(search_list, file_arg, url):
256 print "Source %s..." % url,
258 scheme, path = splittype(url)
261 if scheme in [None, 'file']:
262 if os.path.exists(path) is False:
264 print "not found, skipping file copy"
266 elif name and name != path:
267 if link_arg is False:
269 print "\n copying..."
270 shutil.copy2(path, name)
273 print "\n linking..."
274 os.symlink(path, name)
275 elif scheme in ['http', 'https', 'ftp']:
277 print "\n downloading...",
278 name = download(url, file_arg, quiet)
288 print "Usage: %s [-a|--user-agent (user-agent)] [-f|--file (file)] [-l|--link] " \
289 "[-k|--keep] [-h|--hash (hash)] [-s|--search (search-dir)] " \
290 "[-S|--sigurl (signature-url)] --url (url)" % (sys.argv[0].split('/')[-1])
298 sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
300 user_agent_arg = None
310 opts, args = getopt.getopt(sys.argv[1:], "a:f:h:lks:u:",
311 ["file=", "link", "keep", "hash=", "search=", "url=",
312 "sigurl=", "user-agent="])
313 except getopt.GetoptError, err:
317 for opt, arg in opts:
318 if opt in ["-a", "--user-agent"]:
320 elif opt in ["-f", "--file"]:
322 elif opt in ["-l", "--link"]:
324 elif opt in ["-k", "--keep"]:
326 elif opt in ["-h", "--hash"]:
328 elif opt in ["-s", "--search"]:
329 search_list.append(arg)
330 elif opt in ["-S", "--sigurl"]:
332 elif opt in ["-u", "--url"]:
335 assert False, "unknown option"
340 for name in download_from_paths(search_list, file_arg, url_arg, link_arg):
341 print "\n validating signature...",
345 print "skipping (no signature URL)"
347 # Put the signature file in the same directory as the
348 # file we're downloading.
349 sig_file = os.path.join(
350 os.path.dirname(file_arg),
351 os.path.basename(sig_arg))
352 # Validate with the first signature we find.
353 for sig_file in download_from_paths(search_list, sig_file,
354 sig_arg, link_arg, True):
356 if validate_signature(name, sig_file):
365 print "failed (couldn't fetch signature)"
367 print " validating hash...",
368 realhash = validate_container(name, hash_arg)
371 print "skipping (no hash)"
372 print "hash is: %s" % realhash
373 elif realhash == hash_arg:
376 payloadhash = validate_payload(name, hash_arg)
377 if payloadhash == hash_arg:
380 # If the signature validated, then we assume
381 # that the expected hash is just a typo, but we
384 print "invalid hash!"
386 print "corruption detected"
388 print " expected: %s" % hash_arg
389 print " actual: %s" % realhash
390 print " payload: %s" % payloadhash
392 # An invalid hash shouldn't cause us to remove
393 # the target file if the signature was valid.
394 # Also, if the developer is in progress of upgrading
395 # some package version or introduces a new one, and
396 # explicitly ran "gmake fetch", keep the downloaded
397 # file (Makefile is not in position to have a valid
398 # checksum entry just yet) so it does not have to be
402 if keep_arg == False:
404 print "\nWARN: Removing the corrupt downloaded file"
409 print "\nINFO: Keeping the downloaded file because asked to"
417 if __name__ == "__main__":