Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / util.py
blobece5112c34252b958b1bb5020541869c26401ff5
1 # -*- coding: utf-8 -*-
2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Static data and helper functions."""
17 from __future__ import absolute_import
19 import errno
20 import logging
21 import math
22 import multiprocessing
23 import os
24 import pkgutil
25 import re
26 import struct
27 import sys
28 import tempfile
29 import textwrap
30 import threading
31 import traceback
32 import xml.etree.ElementTree as ElementTree
34 import boto
35 from boto import config
36 import boto.auth
37 from boto.exception import NoAuthHandlerFound
38 from boto.gs.connection import GSConnection
39 from boto.provider import Provider
40 from boto.pyami.config import BotoConfigLocations
41 import httplib2
42 from oauth2client.client import HAS_CRYPTO
43 from retry_decorator import retry_decorator
45 import gslib
46 from gslib.exception import CommandException
47 from gslib.storage_url import StorageUrlFromString
48 from gslib.translation_helper import AclTranslation
49 from gslib.translation_helper import GenerationFromUrlAndString
50 from gslib.translation_helper import S3_ACL_MARKER_GUID
51 from gslib.translation_helper import S3_DELETE_MARKER_GUID
52 from gslib.translation_helper import S3_MARKER_GUIDS
54 # pylint: disable=g-import-not-at-top
55 try:
56 # This module doesn't necessarily exist on Windows.
57 import resource
58 HAS_RESOURCE_MODULE = True
59 except ImportError, e:
60 HAS_RESOURCE_MODULE = False
62 ONE_KIB = 1024
63 ONE_MIB = 1024 * 1024
64 TWO_MIB = 2 * ONE_MIB
65 EIGHT_MIB = 8 * ONE_MIB
66 TEN_MIB = 10 * ONE_MIB
67 DEFAULT_FILE_BUFFER_SIZE = 8 * ONE_KIB
68 _DEFAULT_LINES = 25
70 # By default, the timeout for SSL read errors is infinite. This could
71 # cause gsutil to hang on network disconnect, so pick a more reasonable
72 # timeout.
73 SSL_TIMEOUT = 60
75 # Start with a progress callback every 64 KiB during uploads/downloads (JSON
76 # API). Callback implementation should back off until it hits the maximum size
77 # so that callbacks do not create huge amounts of log output.
78 START_CALLBACK_PER_BYTES = 1024*64
79 MAX_CALLBACK_PER_BYTES = 1024*1024*100
81 # Upload/download files in 8 KiB chunks over the HTTP connection.
82 TRANSFER_BUFFER_SIZE = 1024*8
84 # Default number of progress callbacks during transfer (XML API).
85 XML_PROGRESS_CALLBACKS = 10
87 # For files >= this size, output a message indicating that we're running an
88 # operation on the file (like hashing or gzipping) so it does not appear to the
89 # user that the command is hanging.
90 MIN_SIZE_COMPUTE_LOGGING = 100*1024*1024 # 100 MiB
92 NO_MAX = sys.maxint
94 UTF8 = 'utf-8'
96 VERSION_MATCHER = re.compile(r'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')
98 RELEASE_NOTES_URL = 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'
100 # Binary exponentiation strings.
101 _EXP_STRINGS = [
102 (0, 'B', 'bit'),
103 (10, 'KiB', 'Kibit', 'K'),
104 (20, 'MiB', 'Mibit', 'M'),
105 (30, 'GiB', 'Gibit', 'G'),
106 (40, 'TiB', 'Tibit', 'T'),
107 (50, 'PiB', 'Pibit', 'P'),
108 (60, 'EiB', 'Eibit', 'E'),
112 global manager # pylint: disable=global-at-module-level
113 certs_file_lock = threading.Lock()
114 configured_certs_files = []
117 def InitializeMultiprocessingVariables():
118 """Perform necessary initialization for multiprocessing.
120 See gslib.command.InitializeMultiprocessingVariables for an explanation
121 of why this is necessary.
123 global manager # pylint: disable=global-variable-undefined
124 manager = multiprocessing.Manager()
127 def _GenerateSuffixRegex():
128 """Creates a suffix regex for human-readable byte counts."""
129 human_bytes_re = r'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'
130 suffixes = []
131 suffix_to_si = {}
132 for i, si in enumerate(_EXP_STRINGS):
133 si_suffixes = [s.lower() for s in list(si)[1:]]
134 for suffix in si_suffixes:
135 suffix_to_si[suffix] = i
136 suffixes.extend(si_suffixes)
137 human_bytes_re %= '|'.join(suffixes)
138 matcher = re.compile(human_bytes_re)
139 return suffix_to_si, matcher
141 SUFFIX_TO_SI, MATCH_HUMAN_BYTES = _GenerateSuffixRegex()
143 SECONDS_PER_DAY = 3600 * 24
145 # Detect platform types.
146 PLATFORM = str(sys.platform).lower()
147 IS_WINDOWS = 'win32' in PLATFORM
148 IS_CYGWIN = 'cygwin' in PLATFORM
149 IS_LINUX = 'linux' in PLATFORM
150 IS_OSX = 'darwin' in PLATFORM
152 # On Unix-like systems, we will set the maximum number of open files to avoid
153 # hitting the limit imposed by the OS. This number was obtained experimentally.
154 MIN_ACCEPTABLE_OPEN_FILES_LIMIT = 1000
156 GSUTIL_PUB_TARBALL = 'gs://pub/gsutil.tar.gz'
158 Retry = retry_decorator.retry # pylint: disable=invalid-name
160 # Cache the values from this check such that they're available to all callers
161 # without needing to run all the checks again (some of these, such as calling
162 # multiprocessing.Manager(), are expensive operations).
163 cached_multiprocessing_is_available = None
164 cached_multiprocessing_is_available_stack_trace = None
165 cached_multiprocessing_is_available_message = None
168 # Enum class for specifying listing style.
169 class ListingStyle(object):
170 SHORT = 'SHORT'
171 LONG = 'LONG'
172 LONG_LONG = 'LONG_LONG'
175 def UsingCrcmodExtension(crcmod):
176 return (getattr(crcmod, 'crcmod', None) and
177 getattr(crcmod.crcmod, '_usingExtension', None))
180 def CreateDirIfNeeded(dir_path, mode=0777):
181 """Creates a directory, suppressing already-exists errors."""
182 if not os.path.exists(dir_path):
183 try:
184 # Unfortunately, even though we catch and ignore EEXIST, this call will
185 # output a (needless) error message (no way to avoid that in Python).
186 os.makedirs(dir_path, mode)
187 # Ignore 'already exists' in case user tried to start up several
188 # resumable uploads concurrently from a machine where no tracker dir had
189 # yet been created.
190 except OSError as e:
191 if e.errno != errno.EEXIST:
192 raise
195 def GetGsutilStateDir():
196 """Returns the location of the directory for gsutil state files.
198 Certain operations, such as cross-process credential sharing and
199 resumable transfer tracking, need a known location for state files which
200 are created by gsutil as-needed.
202 This location should only be used for storing data that is required to be in
203 a static location.
205 Returns:
206 Path to directory for gsutil static state files.
208 config_file_dir = config.get(
209 'GSUtil', 'state_dir',
210 os.path.expanduser(os.path.join('~', '.gsutil')))
211 CreateDirIfNeeded(config_file_dir)
212 return config_file_dir
215 def GetCredentialStoreFilename():
216 return os.path.join(GetGsutilStateDir(), 'credstore')
219 def GetGceCredentialCacheFilename():
220 return os.path.join(GetGsutilStateDir(), 'gcecredcache')
223 def GetTabCompletionLogFilename():
224 return os.path.join(GetGsutilStateDir(), 'tab-completion-logs')
227 def GetTabCompletionCacheFilename():
228 tab_completion_dir = os.path.join(GetGsutilStateDir(), 'tab-completion')
229 # Limit read permissions on the directory to owner for privacy.
230 CreateDirIfNeeded(tab_completion_dir, mode=0700)
231 return os.path.join(tab_completion_dir, 'cache')
234 def PrintTrackerDirDeprecationWarningIfNeeded():
235 # TODO: Remove this along with the tracker_dir config value 1 year after
236 # 4.6 release date. Use state_dir instead.
237 if config.has_option('GSUtil', 'resumable_tracker_dir'):
238 sys.stderr.write('Warning: you have set resumable_tracker_dir in your '
239 '.boto configuration file. This configuration option is '
240 'deprecated; please use the state_dir configuration '
241 'option instead.\n')
244 # Name of file where we keep the timestamp for the last time we checked whether
245 # a new version of gsutil is available.
246 PrintTrackerDirDeprecationWarningIfNeeded()
247 CreateDirIfNeeded(GetGsutilStateDir())
248 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE = (
249 os.path.join(GetGsutilStateDir(), '.last_software_update_check'))
252 def HasConfiguredCredentials():
253 """Determines if boto credential/config file exists."""
254 has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
255 config.has_option('Credentials', 'gs_secret_access_key'))
256 has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
257 config.has_option('Credentials', 'aws_secret_access_key'))
258 has_oauth_creds = (
259 config.has_option('Credentials', 'gs_oauth2_refresh_token'))
260 has_service_account_creds = (
261 HAS_CRYPTO and
262 config.has_option('Credentials', 'gs_service_client_id') and
263 config.has_option('Credentials', 'gs_service_key_file'))
265 if (has_goog_creds or has_amzn_creds or has_oauth_creds or
266 has_service_account_creds):
267 return True
269 valid_auth_handler = None
270 try:
271 valid_auth_handler = boto.auth.get_auth_handler(
272 GSConnection.DefaultHost, config, Provider('google'),
273 requested_capability=['s3'])
274 # Exclude the no-op auth handler as indicating credentials are configured.
275 # Note we can't use isinstance() here because the no-op module may not be
276 # imported so we can't get a reference to the class type.
277 if getattr(getattr(valid_auth_handler, '__class__', None),
278 '__name__', None) == 'NoOpAuth':
279 valid_auth_handler = None
280 except NoAuthHandlerFound:
281 pass
283 return valid_auth_handler
286 def ConfigureNoOpAuthIfNeeded():
287 """Sets up no-op auth handler if no boto credentials are configured."""
288 if not HasConfiguredCredentials():
289 if (config.has_option('Credentials', 'gs_service_client_id')
290 and not HAS_CRYPTO):
291 if os.environ.get('CLOUDSDK_WRAPPER') == '1':
292 raise CommandException('\n'.join(textwrap.wrap(
293 'Your gsutil is configured with an OAuth2 service account, but '
294 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
295 'Service account authentication requires one of these libraries; '
296 'please reactivate your service account via the gcloud auth '
297 'command and ensure any gcloud packages necessary for '
298 'service accounts are present.')))
299 else:
300 raise CommandException('\n'.join(textwrap.wrap(
301 'Your gsutil is configured with an OAuth2 service account, but '
302 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
303 'Service account authentication requires one of these libraries; '
304 'please install either of them to proceed, or configure a '
305 'different type of credentials with "gsutil config".')))
306 else:
307 # With no boto config file the user can still access publicly readable
308 # buckets and objects.
309 from gslib import no_op_auth_plugin # pylint: disable=unused-variable
312 def GetConfigFilePath():
313 config_path = 'no config found'
314 for path in BotoConfigLocations:
315 try:
316 with open(path, 'r'):
317 config_path = path
318 break
319 except IOError:
320 pass
321 return config_path
324 def GetBotoConfigFileList():
325 """Returns list of boto config files that exist."""
326 config_paths = boto.pyami.config.BotoConfigLocations
327 if 'AWS_CREDENTIAL_FILE' in os.environ:
328 config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])
329 config_files = {}
330 for config_path in config_paths:
331 if os.path.exists(config_path):
332 config_files[config_path] = 1
333 cf_list = []
334 for config_file in config_files:
335 cf_list.append(config_file)
336 return cf_list
339 def GetCertsFile():
340 """Configures and returns the CA Certificates file.
342 If one is already configured, use it. Otherwise, amend the configuration
343 (in boto.config) to use the cert roots distributed with gsutil.
345 Returns:
346 string filename of the certs file to use.
348 certs_file = boto.config.get('Boto', 'ca_certificates_file', None)
349 if not certs_file:
350 with certs_file_lock:
351 if configured_certs_files:
352 disk_certs_file = configured_certs_files[0]
353 else:
354 disk_certs_file = os.path.abspath(
355 os.path.join(gslib.GSLIB_DIR, 'data', 'cacerts.txt'))
356 if not os.path.exists(disk_certs_file):
357 # If the file is not present on disk, this means the gslib module
358 # doesn't actually exist on disk anywhere. This can happen if it's
359 # being imported from a zip file. Unfortunately, we have to copy the
360 # certs file to a local temp file on disk because the underlying SSL
361 # socket requires it to be a filesystem path.
362 certs_data = pkgutil.get_data('gslib', 'data/cacerts.txt')
363 if not certs_data:
364 raise CommandException('Certificates file not found. Please '
365 'reinstall gsutil from scratch')
366 fd, fname = tempfile.mkstemp(suffix='.txt', prefix='gsutil-cacerts')
367 f = os.fdopen(fd, 'w')
368 f.write(certs_data)
369 f.close()
370 configured_certs_files.append(fname)
371 disk_certs_file = fname
372 certs_file = disk_certs_file
373 return certs_file
376 def GetCleanupFiles():
377 """Returns a list of temp files to delete (if possible) when program exits."""
378 cleanup_files = []
379 if configured_certs_files:
380 cleanup_files += configured_certs_files
381 return cleanup_files
384 def ProxyInfoFromEnvironmentVar(proxy_env_var):
385 """Reads proxy info from the environment and converts to httplib2.ProxyInfo.
387 Args:
388 proxy_env_var: Environment variable string to read, such as http_proxy or
389 https_proxy.
391 Returns:
392 httplib2.ProxyInfo constructed from the environment string.
394 proxy_url = os.environ.get(proxy_env_var)
395 if not proxy_url or not proxy_env_var.lower().startswith('http'):
396 return httplib2.ProxyInfo(httplib2.socks.PROXY_TYPE_HTTP, None, 0)
397 proxy_protocol = proxy_env_var.lower().split('_')[0]
398 if not proxy_url.lower().startswith('http'):
399 # proxy_info_from_url requires a protocol, which is always http or https.
400 proxy_url = proxy_protocol + '://' + proxy_url
401 return httplib2.proxy_info_from_url(proxy_url, method=proxy_protocol)
404 def GetNewHttp(http_class=httplib2.Http, **kwargs):
405 """Creates and returns a new httplib2.Http instance.
407 Args:
408 http_class: Optional custom Http class to use.
409 **kwargs: Arguments to pass to http_class constructor.
411 Returns:
412 An initialized httplib2.Http instance.
414 proxy_info = httplib2.ProxyInfo(
415 proxy_type=3,
416 proxy_host=boto.config.get('Boto', 'proxy', None),
417 proxy_port=boto.config.getint('Boto', 'proxy_port', 0),
418 proxy_user=boto.config.get('Boto', 'proxy_user', None),
419 proxy_pass=boto.config.get('Boto', 'proxy_pass', None),
420 proxy_rdns=boto.config.get('Boto', 'proxy_rdns', False))
422 if not (proxy_info.proxy_host and proxy_info.proxy_port):
423 # Fall back to using the environment variable.
424 for proxy_env_var in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
425 if proxy_env_var in os.environ and os.environ[proxy_env_var]:
426 proxy_info = ProxyInfoFromEnvironmentVar(proxy_env_var)
427 # Assume proxy_rnds is True if a proxy environment variable exists.
428 proxy_info.proxy_rdns = boto.config.get('Boto', 'proxy_rdns', True)
429 break
431 # Some installers don't package a certs file with httplib2, so use the
432 # one included with gsutil.
433 kwargs['ca_certs'] = GetCertsFile()
434 # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
435 kwargs['timeout'] = SSL_TIMEOUT
436 http = http_class(proxy_info=proxy_info, **kwargs)
437 http.disable_ssl_certificate_validation = (not config.getbool(
438 'Boto', 'https_validate_certificates'))
439 return http
442 def GetNumRetries():
443 return config.getint('Boto', 'num_retries', 6)
446 def GetMaxRetryDelay():
447 return config.getint('Boto', 'max_retry_delay', 60)
450 # Resumable downloads and uploads make one HTTP call per chunk (and must be
451 # in multiples of 256KiB). Overridable for testing.
452 def GetJsonResumableChunkSize():
453 chunk_size = config.getint('GSUtil', 'json_resumable_chunk_size',
454 1024*1024*100L)
455 if chunk_size == 0:
456 chunk_size = 1024*256L
457 elif chunk_size % 1024*256L != 0:
458 chunk_size += (1024*256L - (chunk_size % (1024*256L)))
459 return chunk_size
462 def _RoundToNearestExponent(num):
463 i = 0
464 while i+1 < len(_EXP_STRINGS) and num >= (2 ** _EXP_STRINGS[i+1][0]):
465 i += 1
466 return i, round(float(num) / 2 ** _EXP_STRINGS[i][0], 2)
469 def MakeHumanReadable(num):
470 """Generates human readable string for a number of bytes.
472 Args:
473 num: The number, in bytes.
475 Returns:
476 A string form of the number using size abbreviations (KiB, MiB, etc.).
478 i, rounded_val = _RoundToNearestExponent(num)
479 return '%g %s' % (rounded_val, _EXP_STRINGS[i][1])
482 def MakeBitsHumanReadable(num):
483 """Generates human readable string for a number of bits.
485 Args:
486 num: The number, in bits.
488 Returns:
489 A string form of the number using bit size abbreviations (kbit, Mbit, etc.)
491 i, rounded_val = _RoundToNearestExponent(num)
492 return '%g %s' % (rounded_val, _EXP_STRINGS[i][2])
495 def HumanReadableToBytes(human_string):
496 """Tries to convert a human-readable string to a number of bytes.
498 Args:
499 human_string: A string supplied by user, e.g. '1M', '3 GiB'.
500 Returns:
501 An integer containing the number of bytes.
502 Raises:
503 ValueError: on an invalid string.
505 human_string = human_string.lower()
506 m = MATCH_HUMAN_BYTES.match(human_string)
507 if m:
508 num = float(m.group('num'))
509 if m.group('suffix'):
510 power = _EXP_STRINGS[SUFFIX_TO_SI[m.group('suffix')]][0]
511 num *= (2.0 ** power)
512 num = int(round(num))
513 return num
514 raise ValueError('Invalid byte string specified: %s' % human_string)
517 def Percentile(values, percent, key=lambda x: x):
518 """Find the percentile of a list of values.
520 Taken from: http://code.activestate.com/recipes/511478/
522 Args:
523 values: a list of numeric values. Note that the values MUST BE already
524 sorted.
525 percent: a float value from 0.0 to 1.0.
526 key: optional key function to compute value from each element of the list
527 of values.
529 Returns:
530 The percentile of the values.
532 if not values:
533 return None
534 k = (len(values) - 1) * percent
535 f = math.floor(k)
536 c = math.ceil(k)
537 if f == c:
538 return key(values[int(k)])
539 d0 = key(values[int(f)]) * (c-k)
540 d1 = key(values[int(c)]) * (k-f)
541 return d0 + d1
544 def RemoveCRLFFromString(input_str):
545 """Returns the input string with all \\n and \\r removed."""
546 return re.sub(r'[\r\n]', '', input_str)
549 def UnaryDictToXml(message):
550 """Generates XML representation of a nested dict.
552 This dict contains exactly one top-level entry and an arbitrary number of
553 2nd-level entries, e.g. capturing a WebsiteConfiguration message.
555 Args:
556 message: The dict encoding the message.
558 Returns:
559 XML string representation of the input dict.
561 Raises:
562 Exception: if dict contains more than one top-level entry.
564 if len(message) != 1:
565 raise Exception('Expected dict of size 1, got size %d' % len(message))
567 name, content = message.items()[0]
568 element_type = ElementTree.Element(name)
569 for element_property, value in sorted(content.items()):
570 node = ElementTree.SubElement(element_type, element_property)
571 node.text = value
572 return ElementTree.tostring(element_type)
575 def LookUpGsutilVersion(gsutil_api, url_str):
576 """Looks up the gsutil version of the specified gsutil tarball URL.
578 Version is specified in the metadata field set on that object.
580 Args:
581 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
582 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').
584 Returns:
585 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
586 metadata, else None.
588 url = StorageUrlFromString(url_str)
589 if url.IsCloudUrl():
590 obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,
591 provider=url.scheme,
592 fields=['metadata'])
593 if obj.metadata and obj.metadata.additionalProperties:
594 for prop in obj.metadata.additionalProperties:
595 if prop.key == 'gsutil_version':
596 return prop.value
599 def GetGsutilVersionModifiedTime():
600 """Returns unix timestamp of when the VERSION file was last modified."""
601 if not gslib.VERSION_FILE:
602 return 0
603 return int(os.path.getmtime(gslib.VERSION_FILE))
606 def IsRunningInteractively():
607 """Returns True if currently running interactively on a TTY."""
608 return sys.stdout.isatty() and sys.stderr.isatty() and sys.stdin.isatty()
611 def _HttpsValidateCertifcatesEnabled():
612 return config.get('Boto', 'https_validate_certificates', True)
614 CERTIFICATE_VALIDATION_ENABLED = _HttpsValidateCertifcatesEnabled()
617 def _BotoIsSecure():
618 return config.get('Boto', 'is_secure', True)
620 BOTO_IS_SECURE = _BotoIsSecure()
623 def ResumableThreshold():
624 return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)
627 def AddAcceptEncoding(headers):
628 """Adds accept-encoding:gzip to the dictionary of headers."""
629 # If Accept-Encoding is not already set, set it to enable gzip.
630 if 'accept-encoding' not in headers:
631 headers['accept-encoding'] = 'gzip'
634 # pylint: disable=too-many-statements
635 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
636 """Print full info for given object (like what displays for gsutil ls -L).
638 Args:
639 bucket_listing_ref: BucketListingRef being listed.
640 Must have ref_type OBJECT and a populated root_object
641 with the desired fields.
642 incl_acl: True if ACL info should be output.
644 Returns:
645 Tuple (number of objects, object_length)
647 Raises:
648 Exception: if calling bug encountered.
650 url_str = bucket_listing_ref.url_string
651 storage_url = StorageUrlFromString(url_str)
652 obj = bucket_listing_ref.root_object
654 if (obj.metadata and S3_DELETE_MARKER_GUID in
655 obj.metadata.additionalProperties):
656 num_bytes = 0
657 num_objs = 0
658 url_str += '<DeleteMarker>'
659 else:
660 num_bytes = obj.size
661 num_objs = 1
663 print '%s:' % url_str.encode(UTF8)
664 if obj.updated:
665 print '\tCreation time:\t\t%s' % obj.updated.strftime(
666 '%a, %d %b %Y %H:%M:%S GMT')
667 if obj.cacheControl:
668 print '\tCache-Control:\t\t%s' % obj.cacheControl
669 if obj.contentDisposition:
670 print '\tContent-Disposition:\t\t%s' % obj.contentDisposition
671 if obj.contentEncoding:
672 print '\tContent-Encoding:\t\t%s' % obj.contentEncoding
673 if obj.contentLanguage:
674 print '\tContent-Language:\t%s' % obj.contentLanguage
675 print '\tContent-Length:\t\t%s' % obj.size
676 print '\tContent-Type:\t\t%s' % obj.contentType
677 if obj.componentCount:
678 print '\tComponent-Count:\t%d' % obj.componentCount
679 marker_props = {}
680 if obj.metadata and obj.metadata.additionalProperties:
681 non_marker_props = []
682 for add_prop in obj.metadata.additionalProperties:
683 if add_prop.key not in S3_MARKER_GUIDS:
684 non_marker_props.append(add_prop)
685 else:
686 marker_props[add_prop.key] = add_prop.value
687 if non_marker_props:
688 print '\tMetadata:'
689 for ap in non_marker_props:
690 meta_string = '\t\t%s:\t\t%s' % (ap.key, ap.value)
691 print meta_string.encode(UTF8)
692 if obj.crc32c: print '\tHash (crc32c):\t\t%s' % obj.crc32c
693 if obj.md5Hash: print '\tHash (md5):\t\t%s' % obj.md5Hash
694 print '\tETag:\t\t\t%s' % obj.etag.strip('"\'')
695 if obj.generation:
696 generation_str = GenerationFromUrlAndString(storage_url, obj.generation)
697 print '\tGeneration:\t\t%s' % generation_str
698 if obj.metageneration:
699 print '\tMetageneration:\t\t%s' % obj.metageneration
700 if incl_acl:
701 # JSON API won't return acls as part of the response unless we have
702 # full control scope
703 if obj.acl:
704 print '\tACL:\t\t%s' % AclTranslation.JsonFromMessage(obj.acl)
705 elif S3_ACL_MARKER_GUID in marker_props:
706 print '\tACL:\t\t%s' % marker_props[S3_ACL_MARKER_GUID]
707 else:
708 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
709 'permission\n\t\t\t\ton the object to read its ACL.')
711 return (num_objs, num_bytes)
714 def CompareVersions(first, second):
715 """Compares the first and second gsutil version strings.
717 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.
718 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes
719 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
720 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).
722 Args:
723 first: First gsutil version string.
724 second: Second gsutil version string.
726 Returns:
727 (g, m):
728 g is True if first known to be greater than second, else False.
729 m is True if first known to be greater by at least 1 major version,
730 else False.
732 m1 = VERSION_MATCHER.match(str(first))
733 m2 = VERSION_MATCHER.match(str(second))
735 # If passed strings we don't know how to handle, be conservative.
736 if not m1 or not m2:
737 return (False, False)
739 major_ver1 = int(m1.group('maj'))
740 minor_ver1 = int(m1.group('min')) if m1.group('min') else 0
741 suffix_ver1 = m1.group('suffix')
742 major_ver2 = int(m2.group('maj'))
743 minor_ver2 = int(m2.group('min')) if m2.group('min') else 0
744 suffix_ver2 = m2.group('suffix')
746 if major_ver1 > major_ver2:
747 return (True, True)
748 elif major_ver1 == major_ver2:
749 if minor_ver1 > minor_ver2:
750 return (True, False)
751 elif minor_ver1 == minor_ver2:
752 return (bool(suffix_ver2) and not suffix_ver1, False)
753 return (False, False)
756 def _IncreaseSoftLimitForResource(resource_name, fallback_value):
757 """Sets a new soft limit for the maximum number of open files.
759 The soft limit is used for this process (and its children), but the
760 hard limit is set by the system and cannot be exceeded.
762 We will first try to set the soft limit to the hard limit's value; if that
763 fails, we will try to set the soft limit to the fallback_value iff this would
764 increase the soft limit.
766 Args:
767 resource_name: Name of the resource to increase the soft limit for.
768 fallback_value: Fallback value to be used if we couldn't set the
769 soft value to the hard value (e.g., if the hard value
770 is "unlimited").
772 Returns:
773 Current soft limit for the resource (after any changes we were able to
774 make), or -1 if the resource doesn't exist.
777 # Get the value of the resource.
778 try:
779 (soft_limit, hard_limit) = resource.getrlimit(resource_name)
780 except (resource.error, ValueError):
781 # The resource wasn't present, so we can't do anything here.
782 return -1
784 # Try to set the value of the soft limit to the value of the hard limit.
785 if hard_limit > soft_limit: # Some OS's report 0 for "unlimited".
786 try:
787 resource.setrlimit(resource_name, (hard_limit, hard_limit))
788 return hard_limit
789 except (resource.error, ValueError):
790 # We'll ignore this and try the fallback value.
791 pass
793 # Try to set the value of the soft limit to the fallback value.
794 if soft_limit < fallback_value:
795 try:
796 resource.setrlimit(resource_name, (fallback_value, hard_limit))
797 return fallback_value
798 except (resource.error, ValueError):
799 # We couldn't change the soft limit, so just report the current
800 # value of the soft limit.
801 return soft_limit
802 else:
803 return soft_limit
806 def GetCloudApiInstance(cls, thread_state=None):
807 """Gets a gsutil Cloud API instance.
809 Since Cloud API implementations are not guaranteed to be thread-safe, each
810 thread needs its own instance. These instances are passed to each thread
811 via the thread pool logic in command.
813 Args:
814 cls: Command class to be used for single-threaded case.
815 thread_state: Per thread state from this thread containing a gsutil
816 Cloud API instance.
818 Returns:
819 gsutil Cloud API instance.
821 return thread_state or cls.gsutil_api
824 def GetFileSize(fp, position_to_eof=False):
825 """Returns size of file, optionally leaving fp positioned at EOF."""
826 if not position_to_eof:
827 cur_pos = fp.tell()
828 fp.seek(0, os.SEEK_END)
829 cur_file_size = fp.tell()
830 if not position_to_eof:
831 fp.seek(cur_pos)
832 return cur_file_size
835 def GetStreamFromFileUrl(storage_url):
836 if storage_url.IsStream():
837 return sys.stdin
838 else:
839 return open(storage_url.object_name, 'rb')
842 def UrlsAreForSingleProvider(url_args):
843 """Tests whether the URLs are all for a single provider.
845 Args:
846 url_args: Strings to check.
848 Returns:
849 True if URLs are for single provider, False otherwise.
851 provider = None
852 url = None
853 for url_str in url_args:
854 url = StorageUrlFromString(url_str)
855 if not provider:
856 provider = url.scheme
857 elif url.scheme != provider:
858 return False
859 return provider is not None
862 def HaveFileUrls(args_to_check):
863 """Checks whether args_to_check contain any file URLs.
865 Args:
866 args_to_check: Command-line argument subset to check.
868 Returns:
869 True if args_to_check contains any file URLs.
871 for url_str in args_to_check:
872 storage_url = StorageUrlFromString(url_str)
873 if storage_url.IsFileUrl():
874 return True
875 return False
878 def HaveProviderUrls(args_to_check):
879 """Checks whether args_to_check contains any provider URLs (like 'gs://').
881 Args:
882 args_to_check: Command-line argument subset to check.
884 Returns:
885 True if args_to_check contains any provider URLs.
887 for url_str in args_to_check:
888 storage_url = StorageUrlFromString(url_str)
889 if storage_url.IsCloudUrl() and storage_url.IsProvider():
890 return True
891 return False
894 def MultiprocessingIsAvailable(logger=None):
895 """Checks if multiprocessing is available.
897 There are some environments in which there is no way to use multiprocessing
898 logic that's built into Python (e.g., if /dev/shm is not available, then
899 we can't create semaphores). This simply tries out a few things that will be
900 needed to make sure the environment can support the pieces of the
901 multiprocessing module that we need.
903 Args:
904 logger: logging.logger to use for debug output.
906 Returns:
907 (multiprocessing_is_available, stack_trace):
908 multiprocessing_is_available: True iff the multiprocessing module is
909 available for use.
910 stack_trace: The stack trace generated by the call we tried that failed.
912 # pylint: disable=global-variable-undefined
913 global cached_multiprocessing_is_available
914 global cached_multiprocessing_check_stack_trace
915 global cached_multiprocessing_is_available_message
916 if cached_multiprocessing_is_available is not None:
917 if logger:
918 logger.debug(cached_multiprocessing_check_stack_trace)
919 logger.warn(cached_multiprocessing_is_available_message)
920 return (cached_multiprocessing_is_available,
921 cached_multiprocessing_check_stack_trace)
923 stack_trace = None
924 multiprocessing_is_available = True
925 message = """
926 You have requested multiple threads or processes for an operation, but the
927 required functionality of Python\'s multiprocessing module is not available.
928 Your operations will be performed sequentially, and any requests for
929 parallelism will be ignored.
931 try:
932 # Fails if /dev/shm (or some equivalent thereof) is not available for use
933 # (e.g., there's no implementation, or we can't write to it, etc.).
934 try:
935 multiprocessing.Value('i', 0)
936 except:
937 if not IS_WINDOWS:
938 message += """
939 Please ensure that you have write access to both /dev/shm and /run/shm.
941 raise # We'll handle this in one place below.
943 # Manager objects and Windows are generally a pain to work with, so try it
944 # out as a sanity check. This definitely works on some versions of Windows,
945 # but it's certainly possible that there is some unknown configuration for
946 # which it won't.
947 multiprocessing.Manager()
949 # Check that the max number of open files is reasonable. Always check this
950 # after we're sure that the basic multiprocessing functionality is
951 # available, since this won't matter unless that's true.
952 limit = -1
953 if HAS_RESOURCE_MODULE:
954 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix
955 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the
956 # "resource" module is not guaranteed to know about these names.
957 try:
958 limit = max(limit,
959 _IncreaseSoftLimitForResource(
960 resource.RLIMIT_NOFILE,
961 MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
962 except AttributeError:
963 pass
964 try:
965 limit = max(limit,
966 _IncreaseSoftLimitForResource(
967 resource.RLIMIT_OFILE, MIN_ACCEPTABLE_OPEN_FILES_LIMIT))
968 except AttributeError:
969 pass
971 if limit < MIN_ACCEPTABLE_OPEN_FILES_LIMIT and not IS_WINDOWS:
972 message += ("""
973 Your max number of open files, %s, is too low to allow safe multiprocessing.
974 On Linux you can fix this by adding something like "ulimit -n 10000" to your
975 ~/.bashrc or equivalent file and opening a new terminal.
977 On MacOS, you may also need to run a command like this once (in addition to the
978 above instructions), which might require a restart of your system to take
979 effect:
980 launchctl limit maxfiles 10000
982 Alternatively, edit /etc/launchd.conf with something like:
983 limit maxfiles 10000 10000
985 """ % limit)
986 raise Exception('Max number of open files, %s, is too low.' % limit)
987 except: # pylint: disable=bare-except
988 stack_trace = traceback.format_exc()
989 multiprocessing_is_available = False
990 if logger is not None:
991 logger.debug(stack_trace)
992 logger.warn(message)
994 # Set the cached values so that we never need to do this check again.
995 cached_multiprocessing_is_available = multiprocessing_is_available
996 cached_multiprocessing_check_stack_trace = stack_trace
997 cached_multiprocessing_is_available_message = message
998 return (multiprocessing_is_available, stack_trace)
1001 def CreateLock():
1002 """Returns either a multiprocessing lock or a threading lock.
1004 Use Multiprocessing lock iff we have access to the parts of the
1005 multiprocessing module that are necessary to enable parallelism in operations.
1007 Returns:
1008 Multiprocessing or threading lock.
1010 if MultiprocessingIsAvailable()[0]:
1011 return manager.Lock()
1012 else:
1013 return threading.Lock()
1016 def IsCloudSubdirPlaceholder(url, blr=None):
1017 """Determines if URL is a cloud subdir placeholder.
1019 This function is needed because GUI tools (like the GCS cloud console) allow
1020 users to create empty "folders" by creating a placeholder object; and parts
1021 of gsutil need to treat those placeholder objects specially. For example,
1022 gsutil rsync needs to avoid downloading those objects because they can cause
1023 conflicts (see comments in rsync command for details).
1025 We currently detect two cases:
1026 - Cloud objects whose name ends with '_$folder$'
1027 - Cloud objects whose name ends with '/'
1029 Args:
1030 url: The URL to be checked.
1031 blr: BucketListingRef to check, or None if not available.
1032 If None, size won't be checked.
1034 Returns:
1035 True/False.
1037 if not url.IsCloudUrl():
1038 return False
1039 url_str = url.url_string
1040 if url_str.endswith('_$folder$'):
1041 return True
1042 if blr and blr.IsObject():
1043 size = blr.root_object.size
1044 else:
1045 size = 0
1046 return size == 0 and url_str.endswith('/')
1049 def GetTermLines():
1050 """Returns number of terminal lines."""
1051 # fcntl isn't supported in Windows.
1052 try:
1053 import fcntl # pylint: disable=g-import-not-at-top
1054 import termios # pylint: disable=g-import-not-at-top
1055 except ImportError:
1056 return _DEFAULT_LINES
1057 def ioctl_GWINSZ(fd): # pylint: disable=invalid-name
1058 try:
1059 return struct.unpack(
1060 'hh', fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))[0]
1061 except: # pylint: disable=bare-except
1062 return 0 # Failure (so will retry on different file descriptor below).
1063 # Try to find a valid number of lines from termio for stdin, stdout,
1064 # or stderr, in that order.
1065 ioc = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
1066 if not ioc:
1067 try:
1068 fd = os.open(os.ctermid(), os.O_RDONLY)
1069 ioc = ioctl_GWINSZ(fd)
1070 os.close(fd)
1071 except: # pylint: disable=bare-except
1072 pass
1073 if not ioc:
1074 ioc = os.environ.get('LINES', _DEFAULT_LINES)
1075 return int(ioc)
1078 class GsutilStreamHandler(logging.StreamHandler):
1079 """A subclass of StreamHandler for use in gsutil."""
1081 def flush(self):
1082 # Note: we override the flush method here due to a python 2.6 bug. The
1083 # python logging module will try to flush all stream handlers at exit.
1084 # If the StreamHandler is pointing to a file that is already closed, the
1085 # method throws an exception. Our unit tests temporarily redirect stderr,
1086 # which causes the default StreamHandler to open its stream against a
1087 # temporary file. By the time the process shuts down, the underlying file
1088 # is closed, causing an exception. This was fixed in Python 2.7, but to
1089 # remove the flake from Python 2.6, we maintain this here.
1090 try:
1091 logging.StreamHandler.flush(self)
1092 except ValueError:
1093 pass
1096 def StdinIterator():
1097 """A generator function that returns lines from stdin."""
1098 for line in sys.stdin:
1099 # Strip CRLF.
1100 yield line.rstrip()