1 # -*- coding: utf-8 -*-
2 # Copyright 2010 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """Static data and helper functions."""
17 from __future__
import absolute_import
22 import multiprocessing
32 import xml
.etree
.ElementTree
as ElementTree
35 from boto
import config
37 from boto
.exception
import NoAuthHandlerFound
38 from boto
.gs
.connection
import GSConnection
39 from boto
.provider
import Provider
40 from boto
.pyami
.config
import BotoConfigLocations
42 from oauth2client
.client
import HAS_CRYPTO
43 from retry_decorator
import retry_decorator
46 from gslib
.exception
import CommandException
47 from gslib
.storage_url
import StorageUrlFromString
48 from gslib
.translation_helper
import AclTranslation
49 from gslib
.translation_helper
import GenerationFromUrlAndString
50 from gslib
.translation_helper
import S3_ACL_MARKER_GUID
51 from gslib
.translation_helper
import S3_DELETE_MARKER_GUID
52 from gslib
.translation_helper
import S3_MARKER_GUIDS
54 # pylint: disable=g-import-not-at-top
56 # This module doesn't necessarily exist on Windows.
58 HAS_RESOURCE_MODULE
= True
59 except ImportError, e
:
60 HAS_RESOURCE_MODULE
= False
65 EIGHT_MIB
= 8 * ONE_MIB
66 TEN_MIB
= 10 * ONE_MIB
67 DEFAULT_FILE_BUFFER_SIZE
= 8 * ONE_KIB
70 # By default, the timeout for SSL read errors is infinite. This could
71 # cause gsutil to hang on network disconnect, so pick a more reasonable
75 # Start with a progress callback every 64 KiB during uploads/downloads (JSON
76 # API). Callback implementation should back off until it hits the maximum size
77 # so that callbacks do not create huge amounts of log output.
78 START_CALLBACK_PER_BYTES
= 1024*64
79 MAX_CALLBACK_PER_BYTES
= 1024*1024*100
81 # Upload/download files in 8 KiB chunks over the HTTP connection.
82 TRANSFER_BUFFER_SIZE
= 1024*8
84 # Default number of progress callbacks during transfer (XML API).
85 XML_PROGRESS_CALLBACKS
= 10
87 # For files >= this size, output a message indicating that we're running an
88 # operation on the file (like hashing or gzipping) so it does not appear to the
89 # user that the command is hanging.
90 MIN_SIZE_COMPUTE_LOGGING
= 100*1024*1024 # 100 MiB
96 VERSION_MATCHER
= re
.compile(r
'^(?P<maj>\d+)(\.(?P<min>\d+)(?P<suffix>.*))?')
98 RELEASE_NOTES_URL
= 'https://pub.storage.googleapis.com/gsutil_ReleaseNotes.txt'
100 # Binary exponentiation strings.
103 (10, 'KiB', 'Kibit', 'K'),
104 (20, 'MiB', 'Mibit', 'M'),
105 (30, 'GiB', 'Gibit', 'G'),
106 (40, 'TiB', 'Tibit', 'T'),
107 (50, 'PiB', 'Pibit', 'P'),
108 (60, 'EiB', 'Eibit', 'E'),
112 global manager
# pylint: disable=global-at-module-level
113 certs_file_lock
= threading
.Lock()
114 configured_certs_files
= []
117 def InitializeMultiprocessingVariables():
118 """Perform necessary initialization for multiprocessing.
120 See gslib.command.InitializeMultiprocessingVariables for an explanation
121 of why this is necessary.
123 global manager
# pylint: disable=global-variable-undefined
124 manager
= multiprocessing
.Manager()
127 def _GenerateSuffixRegex():
128 """Creates a suffix regex for human-readable byte counts."""
129 human_bytes_re
= r
'(?P<num>\d*\.\d+|\d+)\s*(?P<suffix>%s)?'
132 for i
, si
in enumerate(_EXP_STRINGS
):
133 si_suffixes
= [s
.lower() for s
in list(si
)[1:]]
134 for suffix
in si_suffixes
:
135 suffix_to_si
[suffix
] = i
136 suffixes
.extend(si_suffixes
)
137 human_bytes_re
%= '|'.join(suffixes
)
138 matcher
= re
.compile(human_bytes_re
)
139 return suffix_to_si
, matcher
141 SUFFIX_TO_SI
, MATCH_HUMAN_BYTES
= _GenerateSuffixRegex()
143 SECONDS_PER_DAY
= 3600 * 24
145 # Detect platform types.
146 PLATFORM
= str(sys
.platform
).lower()
147 IS_WINDOWS
= 'win32' in PLATFORM
148 IS_CYGWIN
= 'cygwin' in PLATFORM
149 IS_LINUX
= 'linux' in PLATFORM
150 IS_OSX
= 'darwin' in PLATFORM
152 # On Unix-like systems, we will set the maximum number of open files to avoid
153 # hitting the limit imposed by the OS. This number was obtained experimentally.
154 MIN_ACCEPTABLE_OPEN_FILES_LIMIT
= 1000
156 GSUTIL_PUB_TARBALL
= 'gs://pub/gsutil.tar.gz'
158 Retry
= retry_decorator
.retry
# pylint: disable=invalid-name
160 # Cache the values from this check such that they're available to all callers
161 # without needing to run all the checks again (some of these, such as calling
162 # multiprocessing.Manager(), are expensive operations).
163 cached_multiprocessing_is_available
= None
164 cached_multiprocessing_is_available_stack_trace
= None
165 cached_multiprocessing_is_available_message
= None
168 # Enum class for specifying listing style.
169 class ListingStyle(object):
172 LONG_LONG
= 'LONG_LONG'
175 def UsingCrcmodExtension(crcmod
):
176 return (getattr(crcmod
, 'crcmod', None) and
177 getattr(crcmod
.crcmod
, '_usingExtension', None))
180 def CreateDirIfNeeded(dir_path
, mode
=0777):
181 """Creates a directory, suppressing already-exists errors."""
182 if not os
.path
.exists(dir_path
):
184 # Unfortunately, even though we catch and ignore EEXIST, this call will
185 # output a (needless) error message (no way to avoid that in Python).
186 os
.makedirs(dir_path
, mode
)
187 # Ignore 'already exists' in case user tried to start up several
188 # resumable uploads concurrently from a machine where no tracker dir had
191 if e
.errno
!= errno
.EEXIST
:
195 def GetGsutilStateDir():
196 """Returns the location of the directory for gsutil state files.
198 Certain operations, such as cross-process credential sharing and
199 resumable transfer tracking, need a known location for state files which
200 are created by gsutil as-needed.
202 This location should only be used for storing data that is required to be in
206 Path to directory for gsutil static state files.
208 config_file_dir
= config
.get(
209 'GSUtil', 'state_dir',
210 os
.path
.expanduser(os
.path
.join('~', '.gsutil')))
211 CreateDirIfNeeded(config_file_dir
)
212 return config_file_dir
215 def GetCredentialStoreFilename():
216 return os
.path
.join(GetGsutilStateDir(), 'credstore')
219 def GetGceCredentialCacheFilename():
220 return os
.path
.join(GetGsutilStateDir(), 'gcecredcache')
223 def GetTabCompletionLogFilename():
224 return os
.path
.join(GetGsutilStateDir(), 'tab-completion-logs')
227 def GetTabCompletionCacheFilename():
228 tab_completion_dir
= os
.path
.join(GetGsutilStateDir(), 'tab-completion')
229 # Limit read permissions on the directory to owner for privacy.
230 CreateDirIfNeeded(tab_completion_dir
, mode
=0700)
231 return os
.path
.join(tab_completion_dir
, 'cache')
234 def PrintTrackerDirDeprecationWarningIfNeeded():
235 # TODO: Remove this along with the tracker_dir config value 1 year after
236 # 4.6 release date. Use state_dir instead.
237 if config
.has_option('GSUtil', 'resumable_tracker_dir'):
238 sys
.stderr
.write('Warning: you have set resumable_tracker_dir in your '
239 '.boto configuration file. This configuration option is '
240 'deprecated; please use the state_dir configuration '
244 # Name of file where we keep the timestamp for the last time we checked whether
245 # a new version of gsutil is available.
246 PrintTrackerDirDeprecationWarningIfNeeded()
247 CreateDirIfNeeded(GetGsutilStateDir())
248 LAST_CHECKED_FOR_GSUTIL_UPDATE_TIMESTAMP_FILE
= (
249 os
.path
.join(GetGsutilStateDir(), '.last_software_update_check'))
252 def HasConfiguredCredentials():
253 """Determines if boto credential/config file exists."""
254 has_goog_creds
= (config
.has_option('Credentials', 'gs_access_key_id') and
255 config
.has_option('Credentials', 'gs_secret_access_key'))
256 has_amzn_creds
= (config
.has_option('Credentials', 'aws_access_key_id') and
257 config
.has_option('Credentials', 'aws_secret_access_key'))
259 config
.has_option('Credentials', 'gs_oauth2_refresh_token'))
260 has_service_account_creds
= (
262 config
.has_option('Credentials', 'gs_service_client_id') and
263 config
.has_option('Credentials', 'gs_service_key_file'))
265 if (has_goog_creds
or has_amzn_creds
or has_oauth_creds
or
266 has_service_account_creds
):
269 valid_auth_handler
= None
271 valid_auth_handler
= boto
.auth
.get_auth_handler(
272 GSConnection
.DefaultHost
, config
, Provider('google'),
273 requested_capability
=['s3'])
274 # Exclude the no-op auth handler as indicating credentials are configured.
275 # Note we can't use isinstance() here because the no-op module may not be
276 # imported so we can't get a reference to the class type.
277 if getattr(getattr(valid_auth_handler
, '__class__', None),
278 '__name__', None) == 'NoOpAuth':
279 valid_auth_handler
= None
280 except NoAuthHandlerFound
:
283 return valid_auth_handler
286 def ConfigureNoOpAuthIfNeeded():
287 """Sets up no-op auth handler if no boto credentials are configured."""
288 if not HasConfiguredCredentials():
289 if (config
.has_option('Credentials', 'gs_service_client_id')
291 if os
.environ
.get('CLOUDSDK_WRAPPER') == '1':
292 raise CommandException('\n'.join(textwrap
.wrap(
293 'Your gsutil is configured with an OAuth2 service account, but '
294 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
295 'Service account authentication requires one of these libraries; '
296 'please reactivate your service account via the gcloud auth '
297 'command and ensure any gcloud packages necessary for '
298 'service accounts are present.')))
300 raise CommandException('\n'.join(textwrap
.wrap(
301 'Your gsutil is configured with an OAuth2 service account, but '
302 'you do not have PyOpenSSL or PyCrypto 2.6 or later installed. '
303 'Service account authentication requires one of these libraries; '
304 'please install either of them to proceed, or configure a '
305 'different type of credentials with "gsutil config".')))
307 # With no boto config file the user can still access publicly readable
308 # buckets and objects.
309 from gslib
import no_op_auth_plugin
# pylint: disable=unused-variable
312 def GetConfigFilePath():
313 config_path
= 'no config found'
314 for path
in BotoConfigLocations
:
316 with
open(path
, 'r'):
324 def GetBotoConfigFileList():
325 """Returns list of boto config files that exist."""
326 config_paths
= boto
.pyami
.config
.BotoConfigLocations
327 if 'AWS_CREDENTIAL_FILE' in os
.environ
:
328 config_paths
.append(os
.environ
['AWS_CREDENTIAL_FILE'])
330 for config_path
in config_paths
:
331 if os
.path
.exists(config_path
):
332 config_files
[config_path
] = 1
334 for config_file
in config_files
:
335 cf_list
.append(config_file
)
340 """Configures and returns the CA Certificates file.
342 If one is already configured, use it. Otherwise, amend the configuration
343 (in boto.config) to use the cert roots distributed with gsutil.
346 string filename of the certs file to use.
348 certs_file
= boto
.config
.get('Boto', 'ca_certificates_file', None)
350 with certs_file_lock
:
351 if configured_certs_files
:
352 disk_certs_file
= configured_certs_files
[0]
354 disk_certs_file
= os
.path
.abspath(
355 os
.path
.join(gslib
.GSLIB_DIR
, 'data', 'cacerts.txt'))
356 if not os
.path
.exists(disk_certs_file
):
357 # If the file is not present on disk, this means the gslib module
358 # doesn't actually exist on disk anywhere. This can happen if it's
359 # being imported from a zip file. Unfortunately, we have to copy the
360 # certs file to a local temp file on disk because the underlying SSL
361 # socket requires it to be a filesystem path.
362 certs_data
= pkgutil
.get_data('gslib', 'data/cacerts.txt')
364 raise CommandException('Certificates file not found. Please '
365 'reinstall gsutil from scratch')
366 fd
, fname
= tempfile
.mkstemp(suffix
='.txt', prefix
='gsutil-cacerts')
367 f
= os
.fdopen(fd
, 'w')
370 configured_certs_files
.append(fname
)
371 disk_certs_file
= fname
372 certs_file
= disk_certs_file
376 def GetCleanupFiles():
377 """Returns a list of temp files to delete (if possible) when program exits."""
379 if configured_certs_files
:
380 cleanup_files
+= configured_certs_files
384 def ProxyInfoFromEnvironmentVar(proxy_env_var
):
385 """Reads proxy info from the environment and converts to httplib2.ProxyInfo.
388 proxy_env_var: Environment variable string to read, such as http_proxy or
392 httplib2.ProxyInfo constructed from the environment string.
394 proxy_url
= os
.environ
.get(proxy_env_var
)
395 if not proxy_url
or not proxy_env_var
.lower().startswith('http'):
396 return httplib2
.ProxyInfo(httplib2
.socks
.PROXY_TYPE_HTTP
, None, 0)
397 proxy_protocol
= proxy_env_var
.lower().split('_')[0]
398 if not proxy_url
.lower().startswith('http'):
399 # proxy_info_from_url requires a protocol, which is always http or https.
400 proxy_url
= proxy_protocol
+ '://' + proxy_url
401 return httplib2
.proxy_info_from_url(proxy_url
, method
=proxy_protocol
)
404 def GetNewHttp(http_class
=httplib2
.Http
, **kwargs
):
405 """Creates and returns a new httplib2.Http instance.
408 http_class: Optional custom Http class to use.
409 **kwargs: Arguments to pass to http_class constructor.
412 An initialized httplib2.Http instance.
414 proxy_info
= httplib2
.ProxyInfo(
416 proxy_host
=boto
.config
.get('Boto', 'proxy', None),
417 proxy_port
=boto
.config
.getint('Boto', 'proxy_port', 0),
418 proxy_user
=boto
.config
.get('Boto', 'proxy_user', None),
419 proxy_pass
=boto
.config
.get('Boto', 'proxy_pass', None),
420 proxy_rdns
=boto
.config
.get('Boto', 'proxy_rdns', False))
422 if not (proxy_info
.proxy_host
and proxy_info
.proxy_port
):
423 # Fall back to using the environment variable.
424 for proxy_env_var
in ['http_proxy', 'https_proxy', 'HTTPS_PROXY']:
425 if proxy_env_var
in os
.environ
and os
.environ
[proxy_env_var
]:
426 proxy_info
= ProxyInfoFromEnvironmentVar(proxy_env_var
)
427 # Assume proxy_rnds is True if a proxy environment variable exists.
428 proxy_info
.proxy_rdns
= boto
.config
.get('Boto', 'proxy_rdns', True)
431 # Some installers don't package a certs file with httplib2, so use the
432 # one included with gsutil.
433 kwargs
['ca_certs'] = GetCertsFile()
434 # Use a non-infinite SSL timeout to avoid hangs during network flakiness.
435 kwargs
['timeout'] = SSL_TIMEOUT
436 http
= http_class(proxy_info
=proxy_info
, **kwargs
)
437 http
.disable_ssl_certificate_validation
= (not config
.getbool(
438 'Boto', 'https_validate_certificates'))
443 return config
.getint('Boto', 'num_retries', 6)
446 def GetMaxRetryDelay():
447 return config
.getint('Boto', 'max_retry_delay', 60)
450 # Resumable downloads and uploads make one HTTP call per chunk (and must be
451 # in multiples of 256KiB). Overridable for testing.
452 def GetJsonResumableChunkSize():
453 chunk_size
= config
.getint('GSUtil', 'json_resumable_chunk_size',
456 chunk_size
= 1024*256L
457 elif chunk_size
% 1024*256L != 0:
458 chunk_size
+= (1024*256L - (chunk_size
% (1024*256L)))
462 def _RoundToNearestExponent(num
):
464 while i
+1 < len(_EXP_STRINGS
) and num
>= (2 ** _EXP_STRINGS
[i
+1][0]):
466 return i
, round(float(num
) / 2 ** _EXP_STRINGS
[i
][0], 2)
469 def MakeHumanReadable(num
):
470 """Generates human readable string for a number of bytes.
473 num: The number, in bytes.
476 A string form of the number using size abbreviations (KiB, MiB, etc.).
478 i
, rounded_val
= _RoundToNearestExponent(num
)
479 return '%g %s' % (rounded_val
, _EXP_STRINGS
[i
][1])
482 def MakeBitsHumanReadable(num
):
483 """Generates human readable string for a number of bits.
486 num: The number, in bits.
489 A string form of the number using bit size abbreviations (kbit, Mbit, etc.)
491 i
, rounded_val
= _RoundToNearestExponent(num
)
492 return '%g %s' % (rounded_val
, _EXP_STRINGS
[i
][2])
495 def HumanReadableToBytes(human_string
):
496 """Tries to convert a human-readable string to a number of bytes.
499 human_string: A string supplied by user, e.g. '1M', '3 GiB'.
501 An integer containing the number of bytes.
503 ValueError: on an invalid string.
505 human_string
= human_string
.lower()
506 m
= MATCH_HUMAN_BYTES
.match(human_string
)
508 num
= float(m
.group('num'))
509 if m
.group('suffix'):
510 power
= _EXP_STRINGS
[SUFFIX_TO_SI
[m
.group('suffix')]][0]
511 num
*= (2.0 ** power
)
512 num
= int(round(num
))
514 raise ValueError('Invalid byte string specified: %s' % human_string
)
517 def Percentile(values
, percent
, key
=lambda x
: x
):
518 """Find the percentile of a list of values.
520 Taken from: http://code.activestate.com/recipes/511478/
523 values: a list of numeric values. Note that the values MUST BE already
525 percent: a float value from 0.0 to 1.0.
526 key: optional key function to compute value from each element of the list
530 The percentile of the values.
534 k
= (len(values
) - 1) * percent
538 return key(values
[int(k
)])
539 d0
= key(values
[int(f
)]) * (c
-k
)
540 d1
= key(values
[int(c
)]) * (k
-f
)
544 def RemoveCRLFFromString(input_str
):
545 """Returns the input string with all \\n and \\r removed."""
546 return re
.sub(r
'[\r\n]', '', input_str
)
549 def UnaryDictToXml(message
):
550 """Generates XML representation of a nested dict.
552 This dict contains exactly one top-level entry and an arbitrary number of
553 2nd-level entries, e.g. capturing a WebsiteConfiguration message.
556 message: The dict encoding the message.
559 XML string representation of the input dict.
562 Exception: if dict contains more than one top-level entry.
564 if len(message
) != 1:
565 raise Exception('Expected dict of size 1, got size %d' % len(message
))
567 name
, content
= message
.items()[0]
568 element_type
= ElementTree
.Element(name
)
569 for element_property
, value
in sorted(content
.items()):
570 node
= ElementTree
.SubElement(element_type
, element_property
)
572 return ElementTree
.tostring(element_type
)
575 def LookUpGsutilVersion(gsutil_api
, url_str
):
576 """Looks up the gsutil version of the specified gsutil tarball URL.
578 Version is specified in the metadata field set on that object.
581 gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
582 url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').
585 Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
588 url
= StorageUrlFromString(url_str
)
590 obj
= gsutil_api
.GetObjectMetadata(url
.bucket_name
, url
.object_name
,
593 if obj
.metadata
and obj
.metadata
.additionalProperties
:
594 for prop
in obj
.metadata
.additionalProperties
:
595 if prop
.key
== 'gsutil_version':
599 def GetGsutilVersionModifiedTime():
600 """Returns unix timestamp of when the VERSION file was last modified."""
601 if not gslib
.VERSION_FILE
:
603 return int(os
.path
.getmtime(gslib
.VERSION_FILE
))
606 def IsRunningInteractively():
607 """Returns True if currently running interactively on a TTY."""
608 return sys
.stdout
.isatty() and sys
.stderr
.isatty() and sys
.stdin
.isatty()
611 def _HttpsValidateCertifcatesEnabled():
612 return config
.get('Boto', 'https_validate_certificates', True)
614 CERTIFICATE_VALIDATION_ENABLED
= _HttpsValidateCertifcatesEnabled()
618 return config
.get('Boto', 'is_secure', True)
620 BOTO_IS_SECURE
= _BotoIsSecure()
623 def ResumableThreshold():
624 return config
.getint('GSUtil', 'resumable_threshold', EIGHT_MIB
)
627 def AddAcceptEncoding(headers
):
628 """Adds accept-encoding:gzip to the dictionary of headers."""
629 # If Accept-Encoding is not already set, set it to enable gzip.
630 if 'accept-encoding' not in headers
:
631 headers
['accept-encoding'] = 'gzip'
634 # pylint: disable=too-many-statements
635 def PrintFullInfoAboutObject(bucket_listing_ref
, incl_acl
=True):
636 """Print full info for given object (like what displays for gsutil ls -L).
639 bucket_listing_ref: BucketListingRef being listed.
640 Must have ref_type OBJECT and a populated root_object
641 with the desired fields.
642 incl_acl: True if ACL info should be output.
645 Tuple (number of objects, object_length)
648 Exception: if calling bug encountered.
650 url_str
= bucket_listing_ref
.url_string
651 storage_url
= StorageUrlFromString(url_str
)
652 obj
= bucket_listing_ref
.root_object
654 if (obj
.metadata
and S3_DELETE_MARKER_GUID
in
655 obj
.metadata
.additionalProperties
):
658 url_str
+= '<DeleteMarker>'
663 print '%s:' % url_str
.encode(UTF8
)
665 print '\tCreation time:\t\t%s' % obj
.updated
.strftime(
666 '%a, %d %b %Y %H:%M:%S GMT')
668 print '\tCache-Control:\t\t%s' % obj
.cacheControl
669 if obj
.contentDisposition
:
670 print '\tContent-Disposition:\t\t%s' % obj
.contentDisposition
671 if obj
.contentEncoding
:
672 print '\tContent-Encoding:\t\t%s' % obj
.contentEncoding
673 if obj
.contentLanguage
:
674 print '\tContent-Language:\t%s' % obj
.contentLanguage
675 print '\tContent-Length:\t\t%s' % obj
.size
676 print '\tContent-Type:\t\t%s' % obj
.contentType
677 if obj
.componentCount
:
678 print '\tComponent-Count:\t%d' % obj
.componentCount
680 if obj
.metadata
and obj
.metadata
.additionalProperties
:
681 non_marker_props
= []
682 for add_prop
in obj
.metadata
.additionalProperties
:
683 if add_prop
.key
not in S3_MARKER_GUIDS
:
684 non_marker_props
.append(add_prop
)
686 marker_props
[add_prop
.key
] = add_prop
.value
689 for ap
in non_marker_props
:
690 meta_string
= '\t\t%s:\t\t%s' % (ap
.key
, ap
.value
)
691 print meta_string
.encode(UTF8
)
692 if obj
.crc32c
: print '\tHash (crc32c):\t\t%s' % obj
.crc32c
693 if obj
.md5Hash
: print '\tHash (md5):\t\t%s' % obj
.md5Hash
694 print '\tETag:\t\t\t%s' % obj
.etag
.strip('"\'')
696 generation_str
= GenerationFromUrlAndString(storage_url
, obj
.generation
)
697 print '\tGeneration:\t\t%s' % generation_str
698 if obj
.metageneration
:
699 print '\tMetageneration:\t\t%s' % obj
.metageneration
701 # JSON API won't return acls as part of the response unless we have
704 print '\tACL:\t\t%s' % AclTranslation
.JsonFromMessage(obj
.acl
)
705 elif S3_ACL_MARKER_GUID
in marker_props
:
706 print '\tACL:\t\t%s' % marker_props
[S3_ACL_MARKER_GUID
]
708 print ('\tACL:\t\t\tACCESS DENIED. Note: you need OWNER '
709 'permission\n\t\t\t\ton the object to read its ACL.')
711 return (num_objs
, num_bytes
)
714 def CompareVersions(first
, second
):
715 """Compares the first and second gsutil version strings.
717 For example, 3.33 > 3.7, and 4.1 is a greater major version than 3.33.
718 Does not handle multiple periods (e.g. 3.3.4) or complicated suffixes
719 (e.g., 3.3RC4 vs. 3.3RC5). A version string with a suffix is treated as
720 less than its non-suffix counterpart (e.g. 3.32 > 3.32pre).
723 first: First gsutil version string.
724 second: Second gsutil version string.
728 g is True if first known to be greater than second, else False.
729 m is True if first known to be greater by at least 1 major version,
732 m1
= VERSION_MATCHER
.match(str(first
))
733 m2
= VERSION_MATCHER
.match(str(second
))
735 # If passed strings we don't know how to handle, be conservative.
737 return (False, False)
739 major_ver1
= int(m1
.group('maj'))
740 minor_ver1
= int(m1
.group('min')) if m1
.group('min') else 0
741 suffix_ver1
= m1
.group('suffix')
742 major_ver2
= int(m2
.group('maj'))
743 minor_ver2
= int(m2
.group('min')) if m2
.group('min') else 0
744 suffix_ver2
= m2
.group('suffix')
746 if major_ver1
> major_ver2
:
748 elif major_ver1
== major_ver2
:
749 if minor_ver1
> minor_ver2
:
751 elif minor_ver1
== minor_ver2
:
752 return (bool(suffix_ver2
) and not suffix_ver1
, False)
753 return (False, False)
756 def _IncreaseSoftLimitForResource(resource_name
, fallback_value
):
757 """Sets a new soft limit for the maximum number of open files.
759 The soft limit is used for this process (and its children), but the
760 hard limit is set by the system and cannot be exceeded.
762 We will first try to set the soft limit to the hard limit's value; if that
763 fails, we will try to set the soft limit to the fallback_value iff this would
764 increase the soft limit.
767 resource_name: Name of the resource to increase the soft limit for.
768 fallback_value: Fallback value to be used if we couldn't set the
769 soft value to the hard value (e.g., if the hard value
773 Current soft limit for the resource (after any changes we were able to
774 make), or -1 if the resource doesn't exist.
777 # Get the value of the resource.
779 (soft_limit
, hard_limit
) = resource
.getrlimit(resource_name
)
780 except (resource
.error
, ValueError):
781 # The resource wasn't present, so we can't do anything here.
784 # Try to set the value of the soft limit to the value of the hard limit.
785 if hard_limit
> soft_limit
: # Some OS's report 0 for "unlimited".
787 resource
.setrlimit(resource_name
, (hard_limit
, hard_limit
))
789 except (resource
.error
, ValueError):
790 # We'll ignore this and try the fallback value.
793 # Try to set the value of the soft limit to the fallback value.
794 if soft_limit
< fallback_value
:
796 resource
.setrlimit(resource_name
, (fallback_value
, hard_limit
))
797 return fallback_value
798 except (resource
.error
, ValueError):
799 # We couldn't change the soft limit, so just report the current
800 # value of the soft limit.
806 def GetCloudApiInstance(cls
, thread_state
=None):
807 """Gets a gsutil Cloud API instance.
809 Since Cloud API implementations are not guaranteed to be thread-safe, each
810 thread needs its own instance. These instances are passed to each thread
811 via the thread pool logic in command.
814 cls: Command class to be used for single-threaded case.
815 thread_state: Per thread state from this thread containing a gsutil
819 gsutil Cloud API instance.
821 return thread_state
or cls
.gsutil_api
824 def GetFileSize(fp
, position_to_eof
=False):
825 """Returns size of file, optionally leaving fp positioned at EOF."""
826 if not position_to_eof
:
828 fp
.seek(0, os
.SEEK_END
)
829 cur_file_size
= fp
.tell()
830 if not position_to_eof
:
835 def GetStreamFromFileUrl(storage_url
):
836 if storage_url
.IsStream():
839 return open(storage_url
.object_name
, 'rb')
842 def UrlsAreForSingleProvider(url_args
):
843 """Tests whether the URLs are all for a single provider.
846 url_args: Strings to check.
849 True if URLs are for single provider, False otherwise.
853 for url_str
in url_args
:
854 url
= StorageUrlFromString(url_str
)
856 provider
= url
.scheme
857 elif url
.scheme
!= provider
:
859 return provider
is not None
862 def HaveFileUrls(args_to_check
):
863 """Checks whether args_to_check contain any file URLs.
866 args_to_check: Command-line argument subset to check.
869 True if args_to_check contains any file URLs.
871 for url_str
in args_to_check
:
872 storage_url
= StorageUrlFromString(url_str
)
873 if storage_url
.IsFileUrl():
878 def HaveProviderUrls(args_to_check
):
879 """Checks whether args_to_check contains any provider URLs (like 'gs://').
882 args_to_check: Command-line argument subset to check.
885 True if args_to_check contains any provider URLs.
887 for url_str
in args_to_check
:
888 storage_url
= StorageUrlFromString(url_str
)
889 if storage_url
.IsCloudUrl() and storage_url
.IsProvider():
894 def MultiprocessingIsAvailable(logger
=None):
895 """Checks if multiprocessing is available.
897 There are some environments in which there is no way to use multiprocessing
898 logic that's built into Python (e.g., if /dev/shm is not available, then
899 we can't create semaphores). This simply tries out a few things that will be
900 needed to make sure the environment can support the pieces of the
901 multiprocessing module that we need.
904 logger: logging.logger to use for debug output.
907 (multiprocessing_is_available, stack_trace):
908 multiprocessing_is_available: True iff the multiprocessing module is
910 stack_trace: The stack trace generated by the call we tried that failed.
912 # pylint: disable=global-variable-undefined
913 global cached_multiprocessing_is_available
914 global cached_multiprocessing_check_stack_trace
915 global cached_multiprocessing_is_available_message
916 if cached_multiprocessing_is_available
is not None:
918 logger
.debug(cached_multiprocessing_check_stack_trace
)
919 logger
.warn(cached_multiprocessing_is_available_message
)
920 return (cached_multiprocessing_is_available
,
921 cached_multiprocessing_check_stack_trace
)
924 multiprocessing_is_available
= True
926 You have requested multiple threads or processes for an operation, but the
927 required functionality of Python\'s multiprocessing module is not available.
928 Your operations will be performed sequentially, and any requests for
929 parallelism will be ignored.
932 # Fails if /dev/shm (or some equivalent thereof) is not available for use
933 # (e.g., there's no implementation, or we can't write to it, etc.).
935 multiprocessing
.Value('i', 0)
939 Please ensure that you have write access to both /dev/shm and /run/shm.
941 raise # We'll handle this in one place below.
943 # Manager objects and Windows are generally a pain to work with, so try it
944 # out as a sanity check. This definitely works on some versions of Windows,
945 # but it's certainly possible that there is some unknown configuration for
947 multiprocessing
.Manager()
949 # Check that the max number of open files is reasonable. Always check this
950 # after we're sure that the basic multiprocessing functionality is
951 # available, since this won't matter unless that's true.
953 if HAS_RESOURCE_MODULE
:
954 # Try to set this with both resource names - RLIMIT_NOFILE for most Unix
955 # platforms, and RLIMIT_OFILE for BSD. Ignore AttributeError because the
956 # "resource" module is not guaranteed to know about these names.
959 _IncreaseSoftLimitForResource(
960 resource
.RLIMIT_NOFILE
,
961 MIN_ACCEPTABLE_OPEN_FILES_LIMIT
))
962 except AttributeError:
966 _IncreaseSoftLimitForResource(
967 resource
.RLIMIT_OFILE
, MIN_ACCEPTABLE_OPEN_FILES_LIMIT
))
968 except AttributeError:
971 if limit
< MIN_ACCEPTABLE_OPEN_FILES_LIMIT
and not IS_WINDOWS
:
973 Your max number of open files, %s, is too low to allow safe multiprocessing.
974 On Linux you can fix this by adding something like "ulimit -n 10000" to your
975 ~/.bashrc or equivalent file and opening a new terminal.
977 On MacOS, you may also need to run a command like this once (in addition to the
978 above instructions), which might require a restart of your system to take
980 launchctl limit maxfiles 10000
982 Alternatively, edit /etc/launchd.conf with something like:
983 limit maxfiles 10000 10000
986 raise Exception('Max number of open files, %s, is too low.' % limit
)
987 except: # pylint: disable=bare-except
988 stack_trace
= traceback
.format_exc()
989 multiprocessing_is_available
= False
990 if logger
is not None:
991 logger
.debug(stack_trace
)
994 # Set the cached values so that we never need to do this check again.
995 cached_multiprocessing_is_available
= multiprocessing_is_available
996 cached_multiprocessing_check_stack_trace
= stack_trace
997 cached_multiprocessing_is_available_message
= message
998 return (multiprocessing_is_available
, stack_trace
)
1002 """Returns either a multiprocessing lock or a threading lock.
1004 Use Multiprocessing lock iff we have access to the parts of the
1005 multiprocessing module that are necessary to enable parallelism in operations.
1008 Multiprocessing or threading lock.
1010 if MultiprocessingIsAvailable()[0]:
1011 return manager
.Lock()
1013 return threading
.Lock()
1016 def IsCloudSubdirPlaceholder(url
, blr
=None):
1017 """Determines if URL is a cloud subdir placeholder.
1019 This function is needed because GUI tools (like the GCS cloud console) allow
1020 users to create empty "folders" by creating a placeholder object; and parts
1021 of gsutil need to treat those placeholder objects specially. For example,
1022 gsutil rsync needs to avoid downloading those objects because they can cause
1023 conflicts (see comments in rsync command for details).
1025 We currently detect two cases:
1026 - Cloud objects whose name ends with '_$folder$'
1027 - Cloud objects whose name ends with '/'
1030 url: The URL to be checked.
1031 blr: BucketListingRef to check, or None if not available.
1032 If None, size won't be checked.
1037 if not url
.IsCloudUrl():
1039 url_str
= url
.url_string
1040 if url_str
.endswith('_$folder$'):
1042 if blr
and blr
.IsObject():
1043 size
= blr
.root_object
.size
1046 return size
== 0 and url_str
.endswith('/')
1050 """Returns number of terminal lines."""
1051 # fcntl isn't supported in Windows.
1053 import fcntl
# pylint: disable=g-import-not-at-top
1054 import termios
# pylint: disable=g-import-not-at-top
1056 return _DEFAULT_LINES
1057 def ioctl_GWINSZ(fd
): # pylint: disable=invalid-name
1059 return struct
.unpack(
1060 'hh', fcntl
.ioctl(fd
, termios
.TIOCGWINSZ
, '1234'))[0]
1061 except: # pylint: disable=bare-except
1062 return 0 # Failure (so will retry on different file descriptor below).
1063 # Try to find a valid number of lines from termio for stdin, stdout,
1064 # or stderr, in that order.
1065 ioc
= ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
1068 fd
= os
.open(os
.ctermid(), os
.O_RDONLY
)
1069 ioc
= ioctl_GWINSZ(fd
)
1071 except: # pylint: disable=bare-except
1074 ioc
= os
.environ
.get('LINES', _DEFAULT_LINES
)
1078 class GsutilStreamHandler(logging
.StreamHandler
):
1079 """A subclass of StreamHandler for use in gsutil."""
1082 # Note: we override the flush method here due to a python 2.6 bug. The
1083 # python logging module will try to flush all stream handlers at exit.
1084 # If the StreamHandler is pointing to a file that is already closed, the
1085 # method throws an exception. Our unit tests temporarily redirect stderr,
1086 # which causes the default StreamHandler to open its stream against a
1087 # temporary file. By the time the process shuts down, the underlying file
1088 # is closed, causing an exception. This was fixed in Python 2.7, but to
1089 # remove the flake from Python 2.6, we maintain this here.
1091 logging
.StreamHandler
.flush(self
)
1096 def StdinIterator():
1097 """A generator function that returns lines from stdin."""
1098 for line
in sys
.stdin
: