1 # -*- coding: utf-8 -*-
2 # Copyright 2013 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""
17 from __future__
import absolute_import
25 import multiprocessing
35 from xml
.dom
.minidom
import parseString
as XmlParseString
36 from xml
.sax
import _exceptions
as SaxExceptions
39 from boto
import handler
40 from boto
.exception
import ResumableDownloadException
as BotoResumableDownloadException
41 from boto
.exception
import ResumableTransferDisposition
42 from boto
.gs
.cors
import Cors
43 from boto
.gs
.lifecycle
import LifecycleConfig
44 from boto
.s3
.cors
import CORSConfiguration
as S3Cors
45 from boto
.s3
.deletemarker
import DeleteMarker
46 from boto
.s3
.lifecycle
import Lifecycle
as S3Lifecycle
47 from boto
.s3
.prefix
import Prefix
49 from gslib
.boto_resumable_upload
import BotoResumableUpload
50 from gslib
.cloud_api
import AccessDeniedException
51 from gslib
.cloud_api
import ArgumentException
52 from gslib
.cloud_api
import BadRequestException
53 from gslib
.cloud_api
import CloudApi
54 from gslib
.cloud_api
import NotEmptyException
55 from gslib
.cloud_api
import NotFoundException
56 from gslib
.cloud_api
import PreconditionException
57 from gslib
.cloud_api
import ResumableDownloadException
58 from gslib
.cloud_api
import ResumableUploadAbortException
59 from gslib
.cloud_api
import ResumableUploadException
60 from gslib
.cloud_api
import ResumableUploadStartOverException
61 from gslib
.cloud_api
import ServiceException
62 from gslib
.cloud_api_helper
import ValidateDstObjectMetadata
63 # Imported for boto AuthHandler purposes.
64 import gslib
.devshell_auth_plugin
# pylint: disable=unused-import
65 from gslib
.exception
import CommandException
66 from gslib
.exception
import InvalidUrlError
67 from gslib
.hashing_helper
import Base64EncodeHash
68 from gslib
.hashing_helper
import Base64ToHexHash
69 from gslib
.project_id
import GOOG_PROJ_ID_HDR
70 from gslib
.project_id
import PopulateProjectId
71 from gslib
.storage_url
import StorageUrlFromString
72 from gslib
.third_party
.storage_apitools
import storage_v1_messages
as apitools_messages
73 from gslib
.translation_helper
import AclTranslation
74 from gslib
.translation_helper
import AddS3MarkerAclToObjectMetadata
75 from gslib
.translation_helper
import CorsTranslation
76 from gslib
.translation_helper
import CreateBucketNotFoundException
77 from gslib
.translation_helper
import CreateObjectNotFoundException
78 from gslib
.translation_helper
import DEFAULT_CONTENT_TYPE
79 from gslib
.translation_helper
import EncodeStringAsLong
80 from gslib
.translation_helper
import GenerationFromUrlAndString
81 from gslib
.translation_helper
import HeadersFromObjectMetadata
82 from gslib
.translation_helper
import LifecycleTranslation
83 from gslib
.translation_helper
import REMOVE_CORS_CONFIG
84 from gslib
.translation_helper
import S3MarkerAclFromObjectMetadata
85 from gslib
.util
import ConfigureNoOpAuthIfNeeded
86 from gslib
.util
import DEFAULT_FILE_BUFFER_SIZE
87 from gslib
.util
import GetFileSize
88 from gslib
.util
import GetMaxRetryDelay
89 from gslib
.util
import GetNumRetries
90 from gslib
.util
import MultiprocessingIsAvailable
91 from gslib
.util
import S3_DELETE_MARKER_GUID
92 from gslib
.util
import TWO_MIB
93 from gslib
.util
import UnaryDictToXml
94 from gslib
.util
import UTF8
95 from gslib
.util
import XML_PROGRESS_CALLBACKS
97 TRANSLATABLE_BOTO_EXCEPTIONS
= (boto
.exception
.BotoServerError
,
98 boto
.exception
.InvalidUriError
,
99 boto
.exception
.ResumableDownloadException
,
100 boto
.exception
.ResumableUploadException
,
101 boto
.exception
.StorageCreateError
,
102 boto
.exception
.StorageResponseError
)
104 # If multiprocessing is available, this will be overridden to a (thread-safe)
105 # multiprocessing.Value in a call to InitializeMultiprocessingVariables.
106 boto_auth_initialized
= False
108 NON_EXISTENT_OBJECT_REGEX
= re
.compile(r
'.*non-\s*existent\s*object',
110 # Determines whether an etag is a valid MD5.
111 MD5_REGEX
= re
.compile(r
'^"*[a-fA-F0-9]{32}"*$')
114 def InitializeMultiprocessingVariables():
115 """Perform necessary initialization for multiprocessing.
117 See gslib.command.InitializeMultiprocessingVariables for an explanation
118 of why this is necessary.
120 global boto_auth_initialized
# pylint: disable=global-variable-undefined
121 boto_auth_initialized
= multiprocessing
.Value('i', 0)
124 class BotoTranslation(CloudApi
):
125 """Boto-based XML translation implementation of gsutil Cloud API.
127 This class takes gsutil Cloud API objects, translates them to XML service
128 calls, and translates the results back into gsutil Cloud API objects for
132 def __init__(self
, bucket_storage_uri_class
, logger
, provider
=None,
133 credentials
=None, debug
=0):
134 """Performs necessary setup for interacting with the cloud storage provider.
137 bucket_storage_uri_class: boto storage_uri class, used by APIs that
138 provide boto translation or mocking.
139 logger: logging.logger for outputting log messages.
140 provider: Provider prefix describing cloud storage provider to connect to.
141 'gs' and 's3' are supported. Function implementations ignore
142 the provider argument and use this one instead.
144 debug: Debug level for the API implementation (0..3).
146 super(BotoTranslation
, self
).__init
__(bucket_storage_uri_class
, logger
,
147 provider
=provider
, debug
=debug
)
149 global boto_auth_initialized
# pylint: disable=global-variable-undefined
150 if MultiprocessingIsAvailable()[0] and not boto_auth_initialized
.value
:
151 ConfigureNoOpAuthIfNeeded()
152 boto_auth_initialized
.value
= 1
153 elif not boto_auth_initialized
:
154 ConfigureNoOpAuthIfNeeded()
155 boto_auth_initialized
= True
156 self
.api_version
= boto
.config
.get_value(
157 'GSUtil', 'default_api_version', '1')
159 def GetBucket(self
, bucket_name
, provider
=None, fields
=None):
160 """See CloudApi class for function doc strings."""
162 bucket_uri
= self
._StorageUriForBucket
(bucket_name
)
164 self
._AddApiVersionToHeaders
(headers
)
166 return self
._BotoBucketToBucket
(bucket_uri
.get_bucket(validate
=True,
169 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
170 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
172 def ListBuckets(self
, project_id
=None, provider
=None, fields
=None):
173 """See CloudApi class for function doc strings."""
175 get_fields
= self
._ListToGetFields
(list_fields
=fields
)
177 self
._AddApiVersionToHeaders
(headers
)
178 if self
.provider
== 'gs':
179 headers
[GOOG_PROJ_ID_HDR
] = PopulateProjectId(project_id
)
181 provider_uri
= boto
.storage_uri(
182 '%s://' % self
.provider
,
183 suppress_consec_slashes
=False,
184 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
187 buckets_iter
= provider_uri
.get_all_buckets(headers
=headers
)
188 for bucket
in buckets_iter
:
189 if self
.provider
== 's3' and bucket
.name
.lower() != bucket
.name
:
190 # S3 listings can return buckets with upper-case names, but boto
191 # can't successfully call them.
193 yield self
._BotoBucketToBucket
(bucket
, fields
=get_fields
)
194 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
195 self
._TranslateExceptionAndRaise
(e
)
197 def PatchBucket(self
, bucket_name
, metadata
, canned_acl
=None,
198 canned_def_acl
=None, preconditions
=None, provider
=None,
200 """See CloudApi class for function doc strings."""
202 bucket_uri
= self
._StorageUriForBucket
(bucket_name
)
204 self
._AddApiVersionToHeaders
(headers
)
206 self
._AddPreconditionsToHeaders
(preconditions
, headers
)
208 boto_acl
= AclTranslation
.BotoAclFromMessage(metadata
.acl
)
209 bucket_uri
.set_xml_acl(boto_acl
.to_xml(), headers
=headers
)
211 canned_acls
= bucket_uri
.canned_acls()
212 if canned_acl
not in canned_acls
:
213 raise CommandException('Invalid canned ACL "%s".' % canned_acl
)
214 bucket_uri
.set_acl(canned_acl
, bucket_uri
.object_name
)
216 canned_acls
= bucket_uri
.canned_acls()
217 if canned_def_acl
not in canned_acls
:
218 raise CommandException('Invalid canned ACL "%s".' % canned_def_acl
)
219 bucket_uri
.set_def_acl(canned_def_acl
, bucket_uri
.object_name
)
221 if metadata
.cors
== REMOVE_CORS_CONFIG
:
223 boto_cors
= CorsTranslation
.BotoCorsFromMessage(metadata
.cors
)
224 bucket_uri
.set_cors(boto_cors
, False)
225 if metadata
.defaultObjectAcl
:
226 boto_acl
= AclTranslation
.BotoAclFromMessage(
227 metadata
.defaultObjectAcl
)
228 bucket_uri
.set_def_xml_acl(boto_acl
.to_xml(), headers
=headers
)
229 if metadata
.lifecycle
:
230 boto_lifecycle
= LifecycleTranslation
.BotoLifecycleFromMessage(
232 bucket_uri
.configure_lifecycle(boto_lifecycle
, False)
234 if self
.provider
== 'gs':
235 headers
[GOOG_PROJ_ID_HDR
] = PopulateProjectId(None)
236 if metadata
.logging
.logBucket
and metadata
.logging
.logObjectPrefix
:
237 bucket_uri
.enable_logging(metadata
.logging
.logBucket
,
238 metadata
.logging
.logObjectPrefix
,
240 else: # Logging field is present and empty. Disable logging.
241 bucket_uri
.disable_logging(False, headers
)
242 if metadata
.versioning
:
243 bucket_uri
.configure_versioning(metadata
.versioning
.enabled
,
246 main_page_suffix
= metadata
.website
.mainPageSuffix
247 error_page
= metadata
.website
.notFoundPage
248 bucket_uri
.set_website_config(main_page_suffix
, error_page
)
249 return self
.GetBucket(bucket_name
, fields
=fields
)
250 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
251 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
253 def CreateBucket(self
, bucket_name
, project_id
=None, metadata
=None,
254 provider
=None, fields
=None):
255 """See CloudApi class for function doc strings."""
257 bucket_uri
= self
._StorageUriForBucket
(bucket_name
)
259 if metadata
and metadata
.location
:
260 location
= metadata
.location
261 # Pass storage_class param only if this is a GCS bucket. (In S3 the
262 # storage class is specified on the key object.)
264 if bucket_uri
.scheme
== 'gs':
265 self
._AddApiVersionToHeaders
(headers
)
266 headers
[GOOG_PROJ_ID_HDR
] = PopulateProjectId(project_id
)
268 if metadata
and metadata
.storageClass
:
269 storage_class
= metadata
.storageClass
271 bucket_uri
.create_bucket(headers
=headers
, location
=location
,
272 storage_class
=storage_class
)
273 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
274 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
277 bucket_uri
.create_bucket(headers
=headers
, location
=location
)
278 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
279 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
280 return self
.GetBucket(bucket_name
, fields
=fields
)
282 def DeleteBucket(self
, bucket_name
, preconditions
=None, provider
=None):
283 """See CloudApi class for function doc strings."""
284 _
= provider
, preconditions
285 bucket_uri
= self
._StorageUriForBucket
(bucket_name
)
287 self
._AddApiVersionToHeaders
(headers
)
289 bucket_uri
.delete_bucket(headers
=headers
)
290 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
291 translated_exception
= self
._TranslateBotoException
(
292 e
, bucket_name
=bucket_name
)
293 if (translated_exception
and
294 'BucketNotEmpty' in translated_exception
.reason
):
296 if bucket_uri
.get_versioning_config():
297 if self
.provider
== 's3':
298 raise NotEmptyException(
299 'VersionedBucketNotEmpty (%s). Currently, gsutil does not '
300 'support listing or removing S3 DeleteMarkers, so you may '
301 'need to delete these using another tool to successfully '
302 'delete this bucket.' % bucket_name
, status
=e
.status
)
303 raise NotEmptyException(
304 'VersionedBucketNotEmpty (%s)' % bucket_name
, status
=e
.status
)
306 raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name
,
308 except TRANSLATABLE_BOTO_EXCEPTIONS
, e2
:
309 self
._TranslateExceptionAndRaise
(e2
, bucket_name
=bucket_name
)
310 elif translated_exception
and translated_exception
.status
== 404:
311 raise NotFoundException('Bucket %s does not exist.' % bucket_name
)
313 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
315 def ListObjects(self
, bucket_name
, prefix
=None, delimiter
=None,
316 all_versions
=None, provider
=None, fields
=None):
317 """See CloudApi class for function doc strings."""
319 get_fields
= self
._ListToGetFields
(list_fields
=fields
)
320 bucket_uri
= self
._StorageUriForBucket
(bucket_name
)
323 self
._AddApiVersionToHeaders
(headers
)
325 objects_iter
= bucket_uri
.list_bucket(prefix
=prefix
or '',
326 delimiter
=delimiter
or '',
327 all_versions
=all_versions
,
329 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
330 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
333 for key
in objects_iter
:
334 if isinstance(key
, Prefix
):
335 prefix_list
.append(key
.name
)
336 yield CloudApi
.CsObjectOrPrefix(key
.name
,
337 CloudApi
.CsObjectOrPrefixType
.PREFIX
)
341 # Listed keys are populated with these fields during bucket listing.
342 key_http_fields
= set(['bucket', 'etag', 'name', 'updated',
343 'generation', 'metageneration', 'size'])
345 # When fields == None, the caller is requesting all possible fields.
346 # If the caller requested any fields that are not populated by bucket
347 # listing, we'll need to make a separate HTTP call for each object to
348 # get its metadata and populate the remaining fields with the result.
349 if not get_fields
or (get_fields
and not
350 get_fields
.issubset(key_http_fields
)):
353 if getattr(key
, 'generation', None):
354 generation
= key
.generation
355 if getattr(key
, 'version_id', None):
356 generation
= key
.version_id
357 key_to_convert
= self
._GetBotoKey
(bucket_name
, key
.name
,
358 generation
=generation
)
359 return_object
= self
._BotoKeyToObject
(key_to_convert
,
362 yield CloudApi
.CsObjectOrPrefix(return_object
,
363 CloudApi
.CsObjectOrPrefixType
.OBJECT
)
364 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
365 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
)
367 def GetObjectMetadata(self
, bucket_name
, object_name
, generation
=None,
368 provider
=None, fields
=None):
369 """See CloudApi class for function doc strings."""
372 return self
._BotoKeyToObject
(self
._GetBotoKey
(bucket_name
, object_name
,
373 generation
=generation
),
375 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
376 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
,
377 object_name
=object_name
,
378 generation
=generation
)
380 def _CurryDigester(self
, digester_object
):
381 """Curries a digester object into a form consumable by boto.
383 Key instantiates its own digesters by calling hash_algs[alg]() [note there
384 are no arguments to this function]. So in order to pass in our caught-up
385 digesters during a resumable download, we need to pass the digester
386 object but don't get to look it up based on the algorithm name. Here we
387 use a lambda to make lookup implicit.
390 digester_object: Input object to be returned by the created function.
393 A function which when called will return the input object.
395 return lambda: digester_object
398 self
, bucket_name
, object_name
, download_stream
, provider
=None,
399 generation
=None, object_size
=None,
400 download_strategy
=CloudApi
.DownloadStrategy
.ONE_SHOT
,
401 start_byte
=0, end_byte
=None, progress_callback
=None,
402 serialization_data
=None, digesters
=None):
403 """See CloudApi class for function doc strings."""
404 # This implementation will get the object metadata first if we don't pass it
405 # in via serialization_data.
407 self
._AddApiVersionToHeaders
(headers
)
408 if 'accept-encoding' not in headers
:
409 headers
['accept-encoding'] = 'gzip'
411 headers
['range'] = 'bytes=%s-%s' % (start_byte
, end_byte
)
413 headers
['range'] = 'bytes=%s-' % (start_byte
)
415 headers
['range'] = 'bytes=%s' % (start_byte
)
417 # Since in most cases we already made a call to get the object metadata,
418 # here we avoid an extra HTTP call by unpickling the key. This is coupled
419 # with the implementation in _BotoKeyToObject.
420 if serialization_data
:
421 serialization_dict
= json
.loads(serialization_data
)
422 key
= pickle
.loads(binascii
.a2b_base64(serialization_dict
['url']))
424 key
= self
._GetBotoKey
(bucket_name
, object_name
, generation
=generation
)
426 if digesters
and self
.provider
== 'gs':
428 for alg
in digesters
:
429 hash_algs
[alg
] = self
._CurryDigester
(digesters
[alg
])
433 total_size
= object_size
or 0
434 if serialization_data
:
435 total_size
= json
.loads(serialization_data
)['total_size']
438 num_progress_callbacks
= max(int(total_size
) / TWO_MIB
,
439 XML_PROGRESS_CALLBACKS
)
441 num_progress_callbacks
= XML_PROGRESS_CALLBACKS
444 if download_strategy
is CloudApi
.DownloadStrategy
.RESUMABLE
:
445 self
._PerformResumableDownload
(
446 download_stream
, key
, headers
=headers
, callback
=progress_callback
,
447 num_callbacks
=num_progress_callbacks
, hash_algs
=hash_algs
)
448 elif download_strategy
is CloudApi
.DownloadStrategy
.ONE_SHOT
:
449 self
._PerformSimpleDownload
(
450 download_stream
, key
, progress_callback
=progress_callback
,
451 num_progress_callbacks
=num_progress_callbacks
, headers
=headers
,
454 raise ArgumentException('Unsupported DownloadStrategy: %s' %
456 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
457 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
,
458 object_name
=object_name
,
459 generation
=generation
)
461 if self
.provider
== 's3':
464 class HashToDigester(object):
465 """Wrapper class to expose hash digests.
467 boto creates its own digesters in s3's get_file, returning on-the-fly
468 hashes only by way of key.local_hashes. To propagate the digest back
469 to the caller, this stub class implements the digest() function.
472 def __init__(self
, hash_val
):
473 self
.hash_val
= hash_val
475 def digest(self
): # pylint: disable=invalid-name
478 for alg_name
in digesters
:
479 if ((download_strategy
== CloudApi
.DownloadStrategy
.RESUMABLE
and
481 not ((getattr(key
, 'local_hashes', None) and
482 alg_name
in key
.local_hashes
))):
483 # For resumable downloads, boto does not provide a mechanism to
484 # catch up the hash in the case of a partially complete download.
485 # In this case or in the case where no digest was successfully
486 # calculated, set the digester to None, which indicates that we'll
487 # need to manually calculate the hash from the local file once it
489 digesters
[alg_name
] = None
491 # Use the on-the-fly hash.
492 digesters
[alg_name
] = HashToDigester(key
.local_hashes
[alg_name
])
494 def _PerformSimpleDownload(self
, download_stream
, key
, progress_callback
=None,
495 num_progress_callbacks
=XML_PROGRESS_CALLBACKS
,
496 headers
=None, hash_algs
=None):
499 self
._AddApiVersionToHeaders
(headers
)
501 key
.get_contents_to_file(download_stream
, cb
=progress_callback
,
502 num_cb
=num_progress_callbacks
, headers
=headers
,
504 except TypeError: # s3 and mocks do not support hash_algs
505 key
.get_contents_to_file(download_stream
, cb
=progress_callback
,
506 num_cb
=num_progress_callbacks
, headers
=headers
)
508 def _PerformResumableDownload(self
, fp
, key
, headers
=None, callback
=None,
509 num_callbacks
=XML_PROGRESS_CALLBACKS
,
511 """Downloads bytes from key to fp, resuming as needed.
514 fp: File pointer into which data should be downloaded
515 key: Key object from which data is to be downloaded
516 headers: Headers to send when retrieving the file
517 callback: (optional) a callback function that will be called to report
518 progress on the download. The callback should accept two integer
519 parameters. The first integer represents the number of
520 bytes that have been successfully transmitted from the service. The
521 second represents the total number of bytes that need to be
523 num_callbacks: (optional) If a callback is specified with the callback
524 parameter, this determines the granularity of the callback
525 by defining the maximum number of times the callback will be
526 called during the file transfer.
527 hash_algs: Dict of hash algorithms to apply to downloaded bytes.
530 ResumableDownloadException on error.
534 self
._AddApiVersionToHeaders
(headers
)
536 retryable_exceptions
= (httplib
.HTTPException
, IOError, socket
.error
,
539 debug
= key
.bucket
.connection
.debug
541 num_retries
= GetNumRetries()
542 progress_less_iterations
= 0
544 while True: # Retry as long as we're making progress.
545 had_file_bytes_before_attempt
= GetFileSize(fp
)
547 cur_file_size
= GetFileSize(fp
, position_to_eof
=True)
549 def DownloadProxyCallback(total_bytes_downloaded
, total_size
):
550 """Translates a boto callback into a gsutil Cloud API callback.
552 Callbacks are originally made by boto.s3.Key.get_file(); here we take
553 into account that we're resuming a download.
556 total_bytes_downloaded: Actual bytes downloaded so far, not
557 including the point we resumed from.
558 total_size: Total size of the download.
561 callback(cur_file_size
+ total_bytes_downloaded
, total_size
)
563 headers
= headers
.copy()
564 headers
['Range'] = 'bytes=%d-%d' % (cur_file_size
, key
.size
- 1)
565 cb
= DownloadProxyCallback
567 # Disable AWSAuthConnection-level retry behavior, since that would
568 # cause downloads to restart from scratch.
570 key
.get_file(fp
, headers
, cb
, num_callbacks
, override_num_retries
=0,
573 key
.get_file(fp
, headers
, cb
, num_callbacks
, override_num_retries
=0)
575 # Download succeeded.
577 except retryable_exceptions
, e
:
579 self
.logger
.info('Caught exception (%s)', repr(e
))
580 if isinstance(e
, IOError) and e
.errno
== errno
.EPIPE
:
581 # Broken pipe error causes httplib to immediately
582 # close the socket (http://bugs.python.org/issue5542),
583 # so we need to close and reopen the key before resuming
585 if self
.provider
== 's3':
586 key
.get_file(fp
, headers
, cb
, num_callbacks
, override_num_retries
=0)
587 else: # self.provider == 'gs'
588 key
.get_file(fp
, headers
, cb
, num_callbacks
,
589 override_num_retries
=0, hash_algs
=hash_algs
)
590 except BotoResumableDownloadException
, e
:
592 ResumableTransferDisposition
.ABORT_CUR_PROCESS
):
593 raise ResumableDownloadException(e
.message
)
596 self
.logger
.info('Caught ResumableDownloadException (%s) - will '
599 # At this point we had a re-tryable failure; see if made progress.
600 if GetFileSize(fp
) > had_file_bytes_before_attempt
:
601 progress_less_iterations
= 0
603 progress_less_iterations
+= 1
605 if progress_less_iterations
> num_retries
:
606 # Don't retry any longer in the current process.
607 raise ResumableDownloadException(
608 'Too many resumable download attempts failed without '
609 'progress. You might try this download again later')
611 # Close the key, in case a previous download died partway
612 # through and left data in the underlying key HTTP buffer.
613 # Do this within a try/except block in case the connection is
614 # closed (since key.close() attempts to do a final read, in which
615 # case this read attempt would get an IncompleteRead exception,
616 # which we can safely ignore).
619 except httplib
.IncompleteRead
:
622 sleep_time_secs
= min(random
.random() * (2 ** progress_less_iterations
),
626 'Got retryable failure (%d progress-less in a row).\nSleeping %d '
627 'seconds before re-trying', progress_less_iterations
,
629 time
.sleep(sleep_time_secs
)
631 def PatchObjectMetadata(self
, bucket_name
, object_name
, metadata
,
632 canned_acl
=None, generation
=None, preconditions
=None,
633 provider
=None, fields
=None):
634 """See CloudApi class for function doc strings."""
636 object_uri
= self
._StorageUriForObject
(bucket_name
, object_name
,
637 generation
=generation
)
640 self
._AddApiVersionToHeaders
(headers
)
641 meta_headers
= HeadersFromObjectMetadata(metadata
, self
.provider
)
644 metadata_minus
= set()
645 metadata_changed
= False
646 for k
, v
in meta_headers
.iteritems():
647 metadata_changed
= True
649 metadata_minus
.add(k
)
653 self
._AddPreconditionsToHeaders
(preconditions
, headers
)
657 object_uri
.set_metadata(metadata_plus
, metadata_minus
, False,
659 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
660 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
,
661 object_name
=object_name
,
662 generation
=generation
)
665 boto_acl
= AclTranslation
.BotoAclFromMessage(metadata
.acl
)
667 object_uri
.set_xml_acl(boto_acl
.to_xml(), key_name
=object_name
)
668 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
669 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
,
670 object_name
=object_name
,
671 generation
=generation
)
673 canned_acls
= object_uri
.canned_acls()
674 if canned_acl
not in canned_acls
:
675 raise CommandException('Invalid canned ACL "%s".' % canned_acl
)
676 object_uri
.set_acl(canned_acl
, object_uri
.object_name
)
678 return self
.GetObjectMetadata(bucket_name
, object_name
,
679 generation
=generation
, fields
=fields
)
681 def _PerformSimpleUpload(self
, dst_uri
, upload_stream
, md5
=None,
682 canned_acl
=None, progress_callback
=None,
684 dst_uri
.set_contents_from_file(upload_stream
, md5
=md5
, policy
=canned_acl
,
685 cb
=progress_callback
, headers
=headers
)
687 def _PerformStreamingUpload(self
, dst_uri
, upload_stream
, canned_acl
=None,
688 progress_callback
=None, headers
=None):
689 if dst_uri
.get_provider().supports_chunked_transfer():
690 dst_uri
.set_contents_from_stream(upload_stream
, policy
=canned_acl
,
691 cb
=progress_callback
, headers
=headers
)
693 # Provider doesn't support chunked transfer, so copy to a temporary
695 (temp_fh
, temp_path
) = tempfile
.mkstemp()
697 with
open(temp_path
, 'wb') as out_fp
:
698 stream_bytes
= upload_stream
.read(DEFAULT_FILE_BUFFER_SIZE
)
700 out_fp
.write(stream_bytes
)
701 stream_bytes
= upload_stream
.read(DEFAULT_FILE_BUFFER_SIZE
)
702 with
open(temp_path
, 'rb') as in_fp
:
703 dst_uri
.set_contents_from_file(in_fp
, policy
=canned_acl
,
709 def _PerformResumableUpload(self
, key
, upload_stream
, upload_size
,
710 tracker_callback
, canned_acl
=None,
711 serialization_data
=None, progress_callback
=None,
713 resumable_upload
= BotoResumableUpload(
714 tracker_callback
, self
.logger
, resume_url
=serialization_data
)
715 resumable_upload
.SendFile(key
, upload_stream
, upload_size
,
716 canned_acl
=canned_acl
, cb
=progress_callback
,
719 def _UploadSetup(self
, object_metadata
, preconditions
=None):
720 """Shared upload implementation.
723 object_metadata: Object metadata describing destination object.
724 preconditions: Optional gsutil Cloud API preconditions.
727 Headers dictionary, StorageUri for upload (based on inputs)
729 ValidateDstObjectMetadata(object_metadata
)
731 headers
= HeadersFromObjectMetadata(object_metadata
, self
.provider
)
732 self
._AddApiVersionToHeaders
(headers
)
734 if object_metadata
.crc32c
:
735 if 'x-goog-hash' in headers
:
736 headers
['x-goog-hash'] += (
737 ',crc32c=%s' % object_metadata
.crc32c
.rstrip('\n'))
739 headers
['x-goog-hash'] = (
740 'crc32c=%s' % object_metadata
.crc32c
.rstrip('\n'))
741 if object_metadata
.md5Hash
:
742 if 'x-goog-hash' in headers
:
743 headers
['x-goog-hash'] += (
744 ',md5=%s' % object_metadata
.md5Hash
.rstrip('\n'))
746 headers
['x-goog-hash'] = (
747 'md5=%s' % object_metadata
.md5Hash
.rstrip('\n'))
749 if 'content-type' in headers
and not headers
['content-type']:
750 headers
['content-type'] = 'application/octet-stream'
752 self
._AddPreconditionsToHeaders
(preconditions
, headers
)
754 dst_uri
= self
._StorageUriForObject
(object_metadata
.bucket
,
755 object_metadata
.name
)
756 return headers
, dst_uri
758 def _HandleSuccessfulUpload(self
, dst_uri
, object_metadata
, fields
=None):
759 """Set ACLs on an uploaded object and return its metadata.
762 dst_uri: Generation-specific StorageUri describing the object.
763 object_metadata: Metadata for the object, including an ACL if applicable.
764 fields: If present, return only these Object metadata fields.
767 gsutil Cloud API Object metadata.
770 CommandException if the object was overwritten / deleted concurrently.
773 # The XML API does not support if-generation-match for GET requests.
774 # Therefore, if the object gets overwritten before the ACL and get_key
775 # operations, the best we can do is warn that it happened.
776 self
._SetObjectAcl
(object_metadata
, dst_uri
)
777 return self
._BotoKeyToObject
(dst_uri
.get_key(), fields
=fields
)
778 except boto
.exception
.InvalidUriError
as e
:
779 if e
.message
and NON_EXISTENT_OBJECT_REGEX
.match(e
.message
.encode(UTF8
)):
780 raise CommandException('\n'.join(textwrap
.wrap(
781 'Uploaded object (%s) was deleted or overwritten immediately '
782 'after it was uploaded. This can happen if you attempt to upload '
783 'to the same object multiple times concurrently.' % dst_uri
.uri
)))
787 def _SetObjectAcl(self
, object_metadata
, dst_uri
):
788 """Sets the ACL (if present in object_metadata) on an uploaded object."""
789 if object_metadata
.acl
:
790 boto_acl
= AclTranslation
.BotoAclFromMessage(object_metadata
.acl
)
791 dst_uri
.set_xml_acl(boto_acl
.to_xml())
792 elif self
.provider
== 's3':
793 s3_acl
= S3MarkerAclFromObjectMetadata(object_metadata
)
795 dst_uri
.set_xml_acl(s3_acl
)
797 def UploadObjectResumable(
798 self
, upload_stream
, object_metadata
, canned_acl
=None, preconditions
=None,
799 provider
=None, fields
=None, size
=None, serialization_data
=None,
800 tracker_callback
=None, progress_callback
=None):
801 """See CloudApi class for function doc strings."""
802 if self
.provider
== 's3':
803 # Resumable uploads are not supported for s3.
804 return self
.UploadObject(
805 upload_stream
, object_metadata
, canned_acl
=canned_acl
,
806 preconditions
=preconditions
, fields
=fields
, size
=size
)
807 headers
, dst_uri
= self
._UploadSetup
(object_metadata
,
808 preconditions
=preconditions
)
809 if not tracker_callback
:
810 raise ArgumentException('No tracker callback function set for '
811 'resumable upload of %s' % dst_uri
)
813 self
._PerformResumableUpload
(dst_uri
.new_key(headers
=headers
),
814 upload_stream
, size
, tracker_callback
,
815 canned_acl
=canned_acl
,
816 serialization_data
=serialization_data
,
817 progress_callback
=progress_callback
,
819 return self
._HandleSuccessfulUpload
(dst_uri
, object_metadata
,
821 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
822 self
._TranslateExceptionAndRaise
(e
, bucket_name
=object_metadata
.bucket
,
823 object_name
=object_metadata
.name
)
825 def UploadObjectStreaming(self
, upload_stream
, object_metadata
,
826 canned_acl
=None, progress_callback
=None,
827 preconditions
=None, provider
=None, fields
=None):
828 """See CloudApi class for function doc strings."""
829 headers
, dst_uri
= self
._UploadSetup
(object_metadata
,
830 preconditions
=preconditions
)
833 self
._PerformStreamingUpload
(
834 dst_uri
, upload_stream
, canned_acl
=canned_acl
,
835 progress_callback
=progress_callback
, headers
=headers
)
836 return self
._HandleSuccessfulUpload
(dst_uri
, object_metadata
,
838 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
839 self
._TranslateExceptionAndRaise
(e
, bucket_name
=object_metadata
.bucket
,
840 object_name
=object_metadata
.name
)
842 def UploadObject(self
, upload_stream
, object_metadata
, canned_acl
=None,
843 preconditions
=None, size
=None, progress_callback
=None,
844 provider
=None, fields
=None):
845 """See CloudApi class for function doc strings."""
846 headers
, dst_uri
= self
._UploadSetup
(object_metadata
,
847 preconditions
=preconditions
)
851 if object_metadata
.md5Hash
:
853 # boto expects hex at index 0, base64 at index 1
854 md5
.append(Base64ToHexHash(object_metadata
.md5Hash
))
855 md5
.append(object_metadata
.md5Hash
.strip('\n"\''))
856 self
._PerformSimpleUpload
(dst_uri
, upload_stream
, md5
=md5
,
857 canned_acl
=canned_acl
,
858 progress_callback
=progress_callback
,
860 return self
._HandleSuccessfulUpload
(dst_uri
, object_metadata
,
862 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
863 self
._TranslateExceptionAndRaise
(e
, bucket_name
=object_metadata
.bucket
,
864 object_name
=object_metadata
.name
)
866 def DeleteObject(self
, bucket_name
, object_name
, preconditions
=None,
867 generation
=None, provider
=None):
868 """See CloudApi class for function doc strings."""
871 self
._AddApiVersionToHeaders
(headers
)
872 self
._AddPreconditionsToHeaders
(preconditions
, headers
)
874 uri
= self
._StorageUriForObject
(bucket_name
, object_name
,
875 generation
=generation
)
877 uri
.delete_key(validate
=False, headers
=headers
)
878 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
879 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
,
880 object_name
=object_name
,
881 generation
=generation
)
883 def CopyObject(self
, src_obj_metadata
, dst_obj_metadata
, src_generation
=None,
884 canned_acl
=None, preconditions
=None, progress_callback
=None,
885 max_bytes_per_call
=None, provider
=None, fields
=None):
886 """See CloudApi class for function doc strings."""
889 if max_bytes_per_call
is not None:
890 raise NotImplementedError('XML API does not suport max_bytes_per_call')
891 dst_uri
= self
._StorageUriForObject
(dst_obj_metadata
.bucket
,
892 dst_obj_metadata
.name
)
894 # Usually it's okay to treat version_id and generation as
895 # the same, but in this case the underlying boto call determines the
896 # provider based on the presence of one or the other.
897 src_version_id
= None
898 if self
.provider
== 's3':
899 src_version_id
= src_generation
900 src_generation
= None
902 headers
= HeadersFromObjectMetadata(dst_obj_metadata
, self
.provider
)
903 self
._AddApiVersionToHeaders
(headers
)
904 self
._AddPreconditionsToHeaders
(preconditions
, headers
)
907 headers
[dst_uri
.get_provider().acl_header
] = canned_acl
909 preserve_acl
= True if dst_obj_metadata
.acl
else False
910 if self
.provider
== 's3':
911 s3_acl
= S3MarkerAclFromObjectMetadata(dst_obj_metadata
)
916 new_key
= dst_uri
.copy_key(
917 src_obj_metadata
.bucket
, src_obj_metadata
.name
,
918 preserve_acl
=preserve_acl
, headers
=headers
,
919 src_version_id
=src_version_id
, src_generation
=src_generation
)
921 return self
._BotoKeyToObject
(new_key
, fields
=fields
)
922 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
923 self
._TranslateExceptionAndRaise
(e
, dst_obj_metadata
.bucket
,
924 dst_obj_metadata
.name
)
926 def ComposeObject(self
, src_objs_metadata
, dst_obj_metadata
,
927 preconditions
=None, provider
=None, fields
=None):
928 """See CloudApi class for function doc strings."""
930 ValidateDstObjectMetadata(dst_obj_metadata
)
932 dst_obj_name
= dst_obj_metadata
.name
933 dst_obj_metadata
.name
= None
934 dst_bucket_name
= dst_obj_metadata
.bucket
935 dst_obj_metadata
.bucket
= None
936 headers
= HeadersFromObjectMetadata(dst_obj_metadata
, self
.provider
)
937 if not dst_obj_metadata
.contentType
:
938 dst_obj_metadata
.contentType
= DEFAULT_CONTENT_TYPE
939 headers
['content-type'] = dst_obj_metadata
.contentType
940 self
._AddApiVersionToHeaders
(headers
)
941 self
._AddPreconditionsToHeaders
(preconditions
, headers
)
943 dst_uri
= self
._StorageUriForObject
(dst_bucket_name
, dst_obj_name
)
946 for src_obj
in src_objs_metadata
:
947 src_uri
= self
._StorageUriForObject
(dst_bucket_name
, src_obj
.name
,
948 generation
=src_obj
.generation
)
949 src_components
.append(src_uri
)
952 dst_uri
.compose(src_components
, headers
=headers
)
954 return self
.GetObjectMetadata(dst_bucket_name
, dst_obj_name
,
956 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
957 self
._TranslateExceptionAndRaise
(e
, dst_obj_metadata
.bucket
,
958 dst_obj_metadata
.name
)
960 def _AddPreconditionsToHeaders(self
, preconditions
, headers
):
961 """Adds preconditions (if any) to headers."""
962 if preconditions
and self
.provider
== 'gs':
963 if preconditions
.gen_match
is not None:
964 headers
['x-goog-if-generation-match'] = str(preconditions
.gen_match
)
965 if preconditions
.meta_gen_match
is not None:
966 headers
['x-goog-if-metageneration-match'] = str(
967 preconditions
.meta_gen_match
)
969 def _AddApiVersionToHeaders(self
, headers
):
970 if self
.provider
== 'gs':
971 headers
['x-goog-api-version'] = self
.api_version
973 def _GetMD5FromETag(self
, src_etag
):
974 """Returns an MD5 from the etag iff the etag is a valid MD5 hash.
977 src_etag: Object etag for which to return the MD5.
980 MD5 in hex string format, or None.
982 if src_etag
and MD5_REGEX
.search(src_etag
):
983 return src_etag
.strip('"\'').lower()
985 def _StorageUriForBucket(self
, bucket
):
986 """Returns a boto storage_uri for the given bucket name.
989 bucket: Bucket name (string).
992 Boto storage_uri for the bucket.
994 return boto
.storage_uri(
995 '%s://%s' % (self
.provider
, bucket
),
996 suppress_consec_slashes
=False,
997 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1000 def _StorageUriForObject(self
, bucket
, object_name
, generation
=None):
1001 """Returns a boto storage_uri for the given object.
1004 bucket: Bucket name (string).
1005 object_name: Object name (string).
1006 generation: Generation or version_id of object. If None, live version
1007 of the object is used.
1010 Boto storage_uri for the object.
1012 uri_string
= '%s://%s/%s' % (self
.provider
, bucket
, object_name
)
1014 uri_string
+= '#%s' % generation
1015 return boto
.storage_uri(
1016 uri_string
, suppress_consec_slashes
=False,
1017 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1020 def _GetBotoKey(self
, bucket_name
, object_name
, generation
=None):
1021 """Gets the boto key for an object.
1024 bucket_name: Bucket containing the object.
1025 object_name: Object name.
1026 generation: Generation or version of the object to retrieve.
1029 Boto key for the object.
1031 object_uri
= self
._StorageUriForObject
(bucket_name
, object_name
,
1032 generation
=generation
)
1034 key
= object_uri
.get_key()
1036 raise CreateObjectNotFoundException('404', self
.provider
,
1037 bucket_name
, object_name
,
1038 generation
=generation
)
1040 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1041 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket_name
,
1042 object_name
=object_name
,
1043 generation
=generation
)
1045 def _ListToGetFields(self
, list_fields
=None):
1046 """Removes 'items/' from the input fields and converts it to a set.
1048 This way field sets requested for ListBucket/ListObject can be used in
1049 _BotoBucketToBucket and _BotoKeyToObject calls.
1052 list_fields: Iterable fields usable in ListBucket/ListObject calls.
1055 Set of fields usable in GetBucket/GetObject or
1056 _BotoBucketToBucket/_BotoKeyToObject calls.
1060 for field
in list_fields
:
1061 if field
in ['kind', 'nextPageToken', 'prefixes']:
1062 # These are not actually object / bucket metadata fields.
1063 # They are fields specific to listing, so we don't consider them.
1065 get_fields
.add(re
.sub(r
'items/', '', field
))
1068 # pylint: disable=too-many-statements
1069 def _BotoBucketToBucket(self
, bucket
, fields
=None):
1070 """Constructs an apitools Bucket from a boto bucket.
1073 bucket: Boto bucket.
1074 fields: If present, construct the apitools Bucket with only this set of
1080 bucket_uri
= self
._StorageUriForBucket
(bucket
.name
)
1082 cloud_api_bucket
= apitools_messages
.Bucket(name
=bucket
.name
,
1085 self
._AddApiVersionToHeaders
(headers
)
1086 if self
.provider
== 'gs':
1087 if not fields
or 'storageClass' in fields
:
1088 if hasattr(bucket
, 'get_storage_class'):
1089 cloud_api_bucket
.storageClass
= bucket
.get_storage_class()
1090 if not fields
or 'acl' in fields
:
1091 for acl
in AclTranslation
.BotoBucketAclToMessage(
1092 bucket
.get_acl(headers
=headers
)):
1094 cloud_api_bucket
.acl
.append(acl
)
1095 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1096 translated_exception
= self
._TranslateBotoException
(
1097 e
, bucket_name
=bucket
.name
)
1098 if (translated_exception
and
1099 isinstance(translated_exception
,
1100 AccessDeniedException
)):
1101 # JSON API doesn't differentiate between a blank ACL list
1102 # and an access denied, so this is intentionally left blank.
1105 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket
.name
)
1106 if not fields
or 'cors' in fields
:
1108 boto_cors
= bucket_uri
.get_cors()
1109 cloud_api_bucket
.cors
= CorsTranslation
.BotoCorsToMessage(boto_cors
)
1110 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1111 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket
.name
)
1112 if not fields
or 'defaultObjectAcl' in fields
:
1113 for acl
in AclTranslation
.BotoObjectAclToMessage(
1114 bucket
.get_def_acl(headers
=headers
)):
1116 cloud_api_bucket
.defaultObjectAcl
.append(acl
)
1117 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1118 translated_exception
= self
._TranslateBotoException
(
1119 e
, bucket_name
=bucket
.name
)
1120 if (translated_exception
and
1121 isinstance(translated_exception
,
1122 AccessDeniedException
)):
1123 # JSON API doesn't differentiate between a blank ACL list
1124 # and an access denied, so this is intentionally left blank.
1127 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket
.name
)
1128 if not fields
or 'lifecycle' in fields
:
1130 boto_lifecycle
= bucket_uri
.get_lifecycle_config()
1131 cloud_api_bucket
.lifecycle
= (
1132 LifecycleTranslation
.BotoLifecycleToMessage(boto_lifecycle
))
1133 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1134 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket
.name
)
1135 if not fields
or 'logging' in fields
:
1137 boto_logging
= bucket_uri
.get_logging_config()
1138 if boto_logging
and 'Logging' in boto_logging
:
1139 logging_config
= boto_logging
['Logging']
1140 log_object_prefix_present
= 'LogObjectPrefix' in logging_config
1141 log_bucket_present
= 'LogBucket' in logging_config
1142 if log_object_prefix_present
or log_bucket_present
:
1143 cloud_api_bucket
.logging
= apitools_messages
.Bucket
.LoggingValue()
1144 if log_object_prefix_present
:
1145 cloud_api_bucket
.logging
.logObjectPrefix
= (
1146 logging_config
['LogObjectPrefix'])
1147 if log_bucket_present
:
1148 cloud_api_bucket
.logging
.logBucket
= logging_config
['LogBucket']
1149 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1150 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket
.name
)
1151 if not fields
or 'website' in fields
:
1153 boto_website
= bucket_uri
.get_website_config()
1154 if boto_website
and 'WebsiteConfiguration' in boto_website
:
1155 website_config
= boto_website
['WebsiteConfiguration']
1156 main_page_suffix_present
= 'MainPageSuffix' in website_config
1157 not_found_page_present
= 'NotFoundPage' in website_config
1158 if main_page_suffix_present
or not_found_page_present
:
1159 cloud_api_bucket
.website
= apitools_messages
.Bucket
.WebsiteValue()
1160 if main_page_suffix_present
:
1161 cloud_api_bucket
.website
.mainPageSuffix
= (
1162 website_config
['MainPageSuffix'])
1163 if not_found_page_present
:
1164 cloud_api_bucket
.website
.notFoundPage
= (
1165 website_config
['NotFoundPage'])
1166 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1167 self
._TranslateExceptionAndRaise
(e
, bucket_name
=bucket
.name
)
1168 if not fields
or 'location' in fields
:
1169 cloud_api_bucket
.location
= bucket_uri
.get_location()
1170 if not fields
or 'versioning' in fields
:
1171 versioning
= bucket_uri
.get_versioning_config(headers
=headers
)
1173 if (self
.provider
== 's3' and 'Versioning' in versioning
and
1174 versioning
['Versioning'] == 'Enabled'):
1175 cloud_api_bucket
.versioning
= (
1176 apitools_messages
.Bucket
.VersioningValue(enabled
=True))
1177 elif self
.provider
== 'gs':
1178 cloud_api_bucket
.versioning
= (
1179 apitools_messages
.Bucket
.VersioningValue(enabled
=True))
1181 # For S3 long bucket listing we do not support CORS, lifecycle, website, and
1182 # logging translation. The individual commands can be used to get
1183 # the XML equivalents for S3.
1184 return cloud_api_bucket
1186 def _BotoKeyToObject(self
, key
, fields
=None):
1187 """Constructs an apitools Object from a boto key.
1190 key: Boto key to construct Object from.
1191 fields: If present, construct the apitools Object with only this set of
1195 apitools Object corresponding to key.
1197 custom_metadata
= None
1198 if not fields
or 'metadata' in fields
:
1199 custom_metadata
= self
._TranslateBotoKeyCustomMetadata
(key
)
1200 cache_control
= None
1201 if not fields
or 'cacheControl' in fields
:
1202 cache_control
= getattr(key
, 'cache_control', None)
1203 component_count
= None
1204 if not fields
or 'componentCount' in fields
:
1205 component_count
= getattr(key
, 'component_count', None)
1206 content_disposition
= None
1207 if not fields
or 'contentDisposition' in fields
:
1208 content_disposition
= getattr(key
, 'content_disposition', None)
1209 # Other fields like updated and ACL depend on the generation
1210 # of the object, so populate that regardless of whether it was requested.
1211 generation
= self
._TranslateBotoKeyGeneration
(key
)
1212 metageneration
= None
1213 if not fields
or 'metageneration' in fields
:
1214 metageneration
= self
._TranslateBotoKeyMetageneration
(key
)
1216 # Translation code to avoid a dependency on dateutil.
1217 if not fields
or 'updated' in fields
:
1218 updated
= self
._TranslateBotoKeyTimestamp
(key
)
1220 if not fields
or 'etag' in fields
:
1221 etag
= getattr(key
, 'etag', None)
1223 etag
= etag
.strip('"\'')
1225 if not fields
or 'crc32c' in fields
:
1226 if hasattr(key
, 'cloud_hashes') and 'crc32c' in key
.cloud_hashes
:
1227 crc32c
= base64
.encodestring(key
.cloud_hashes
['crc32c']).rstrip('\n')
1229 if not fields
or 'md5Hash' in fields
:
1230 if hasattr(key
, 'cloud_hashes') and 'md5' in key
.cloud_hashes
:
1231 md5_hash
= base64
.encodestring(key
.cloud_hashes
['md5']).rstrip('\n')
1232 elif self
._GetMD
5FromETag
(getattr(key
, 'etag', None)):
1233 md5_hash
= Base64EncodeHash(self
._GetMD
5FromETag
(key
.etag
))
1234 elif self
.provider
== 's3':
1235 # S3 etags are MD5s for non-multi-part objects, but multi-part objects
1236 # (which include all objects >= 5 GB) have a custom checksum
1237 # implementation that is not currently supported by gsutil.
1239 'Non-MD5 etag (%s) present for key %s, data integrity checks are '
1240 'not possible.', key
.etag
, key
)
1242 # Serialize the boto key in the media link if it is requested. This
1243 # way we can later access the key without adding an HTTP call.
1245 if not fields
or 'mediaLink' in fields
:
1246 media_link
= binascii
.b2a_base64(
1247 pickle
.dumps(key
, pickle
.HIGHEST_PROTOCOL
))
1249 if not fields
or 'size' in fields
:
1250 size
= key
.size
or 0
1251 storage_class
= None
1252 if not fields
or 'storageClass' in fields
:
1253 storage_class
= getattr(key
, 'storage_class', None)
1255 cloud_api_object
= apitools_messages
.Object(
1256 bucket
=key
.bucket
.name
,
1259 contentEncoding
=key
.content_encoding
,
1260 contentLanguage
=key
.content_language
,
1261 contentType
=key
.content_type
,
1262 cacheControl
=cache_control
,
1263 contentDisposition
=content_disposition
,
1267 generation
=generation
,
1268 metageneration
=metageneration
,
1269 componentCount
=component_count
,
1271 metadata
=custom_metadata
,
1272 mediaLink
=media_link
,
1273 storageClass
=storage_class
)
1275 # Remaining functions amend cloud_api_object.
1276 self
._TranslateDeleteMarker
(key
, cloud_api_object
)
1277 if not fields
or 'acl' in fields
:
1278 generation_str
= GenerationFromUrlAndString(
1279 StorageUrlFromString(self
.provider
), generation
)
1280 self
._TranslateBotoKeyAcl
(key
, cloud_api_object
,
1281 generation
=generation_str
)
1283 return cloud_api_object
1285 def _TranslateBotoKeyCustomMetadata(self
, key
):
1286 """Populates an apitools message from custom metadata in the boto key."""
1287 custom_metadata
= None
1288 if getattr(key
, 'metadata', None):
1289 custom_metadata
= apitools_messages
.Object
.MetadataValue(
1290 additionalProperties
=[])
1291 for k
, v
in key
.metadata
.iteritems():
1292 if k
.lower() == 'content-language':
1293 # Work around content-language being inserted into custom metadata.
1295 custom_metadata
.additionalProperties
.append(
1296 apitools_messages
.Object
.MetadataValue
.AdditionalProperty(
1298 return custom_metadata
1300 def _TranslateBotoKeyGeneration(self
, key
):
1301 """Returns the generation/version_id number from the boto key if present."""
1303 if self
.provider
== 'gs':
1304 if getattr(key
, 'generation', None):
1305 generation
= long(key
.generation
)
1306 elif self
.provider
== 's3':
1307 if getattr(key
, 'version_id', None):
1308 generation
= EncodeStringAsLong(key
.version_id
)
1311 def _TranslateBotoKeyMetageneration(self
, key
):
1312 """Returns the metageneration number from the boto key if present."""
1313 metageneration
= None
1314 if self
.provider
== 'gs':
1315 if getattr(key
, 'metageneration', None):
1316 metageneration
= long(key
.metageneration
)
1317 return metageneration
1319 def _TranslateBotoKeyTimestamp(self
, key
):
1320 """Parses the timestamp from the boto key into an datetime object.
1322 This avoids a dependency on dateutil.
1325 key: Boto key to get timestamp from.
1328 datetime object if string is parsed successfully, None otherwise.
1330 if key
.last_modified
:
1331 if '.' in key
.last_modified
:
1332 key_us_timestamp
= key
.last_modified
.rstrip('Z') + '000Z'
1334 key_us_timestamp
= key
.last_modified
.rstrip('Z') + '.000000Z'
1335 fmt
= '%Y-%m-%dT%H:%M:%S.%fZ'
1337 return datetime
.datetime
.strptime(key_us_timestamp
, fmt
)
1340 # Try alternate format
1341 fmt
= '%a, %d %b %Y %H:%M:%S %Z'
1342 return datetime
.datetime
.strptime(key
.last_modified
, fmt
)
1344 # Could not parse the time; leave updated as None.
1347 def _TranslateDeleteMarker(self
, key
, cloud_api_object
):
1348 """Marks deleted objects with a metadata value (for S3 compatibility)."""
1349 if isinstance(key
, DeleteMarker
):
1350 if not cloud_api_object
.metadata
:
1351 cloud_api_object
.metadata
= apitools_messages
.Object
.MetadataValue()
1352 cloud_api_object
.metadata
.additionalProperties
= []
1353 cloud_api_object
.metadata
.additionalProperties
.append(
1354 apitools_messages
.Object
.MetadataValue
.AdditionalProperty(
1355 key
=S3_DELETE_MARKER_GUID
, value
=True))
1357 def _TranslateBotoKeyAcl(self
, key
, cloud_api_object
, generation
=None):
1358 """Updates cloud_api_object with the ACL from the boto key."""
1359 storage_uri_for_key
= self
._StorageUriForObject
(key
.bucket
.name
, key
.name
,
1360 generation
=generation
)
1362 self
._AddApiVersionToHeaders
(headers
)
1364 if self
.provider
== 'gs':
1365 key_acl
= storage_uri_for_key
.get_acl(headers
=headers
)
1366 # key.get_acl() does not support versioning so we need to use
1367 # storage_uri to ensure we're getting the versioned ACL.
1368 for acl
in AclTranslation
.BotoObjectAclToMessage(key_acl
):
1369 cloud_api_object
.acl
.append(acl
)
1370 if self
.provider
== 's3':
1371 key_acl
= key
.get_xml_acl(headers
=headers
)
1372 # ACLs for s3 are different and we use special markers to represent
1373 # them in the gsutil Cloud API.
1374 AddS3MarkerAclToObjectMetadata(cloud_api_object
, key_acl
)
1375 except boto
.exception
.GSResponseError
, e
:
1377 # Consume access denied exceptions to mimic JSON behavior of simply
1378 # returning None if sufficient permission is not present. The caller
1379 # needs to handle the case where the ACL is not populated.
1384 def _TranslateExceptionAndRaise(self
, e
, bucket_name
=None, object_name
=None,
1386 """Translates a Boto exception and raises the translated or original value.
1390 bucket_name: Optional bucket name in request that caused the exception.
1391 object_name: Optional object name in request that caused the exception.
1392 generation: Optional generation in request that caused the exception.
1395 Translated CloudApi exception, or the original exception if it was not
1398 translated_exception
= self
._TranslateBotoException
(
1399 e
, bucket_name
=bucket_name
, object_name
=object_name
,
1400 generation
=generation
)
1401 if translated_exception
:
1402 raise translated_exception
1406 def _TranslateBotoException(self
, e
, bucket_name
=None, object_name
=None,
1408 """Translates boto exceptions into their gsutil Cloud API equivalents.
1411 e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS.
1412 bucket_name: Optional bucket name in request that caused the exception.
1413 object_name: Optional object name in request that caused the exception.
1414 generation: Optional generation in request that caused the exception.
1417 CloudStorageApiServiceException for translatable exceptions, None
1420 Because we're using isinstance, check for subtypes first.
1422 if isinstance(e
, boto
.exception
.StorageResponseError
):
1424 return BadRequestException(e
.code
, status
=e
.status
, body
=e
.body
)
1425 elif e
.status
== 401 or e
.status
== 403:
1426 return AccessDeniedException(e
.code
, status
=e
.status
, body
=e
.body
)
1427 elif e
.status
== 404:
1430 return CreateObjectNotFoundException(e
.status
, self
.provider
,
1431 bucket_name
, object_name
,
1432 generation
=generation
)
1433 return CreateBucketNotFoundException(e
.status
, self
.provider
,
1435 return NotFoundException(e
.code
, status
=e
.status
, body
=e
.body
)
1436 elif e
.status
== 409 and e
.code
and 'BucketNotEmpty' in e
.code
:
1437 return NotEmptyException('BucketNotEmpty (%s)' % bucket_name
,
1438 status
=e
.status
, body
=e
.body
)
1439 elif e
.status
== 410:
1440 # 410 errors should always cause us to start over - either the UploadID
1441 # has expired or there was a server-side problem that requires starting
1442 # the upload over from scratch.
1443 return ResumableUploadStartOverException(e
.message
)
1444 elif e
.status
== 412:
1445 return PreconditionException(e
.code
, status
=e
.status
, body
=e
.body
)
1446 if isinstance(e
, boto
.exception
.StorageCreateError
):
1447 return ServiceException('Bucket already exists.', status
=e
.status
,
1450 if isinstance(e
, boto
.exception
.BotoServerError
):
1451 return ServiceException(e
.message
, status
=e
.status
, body
=e
.body
)
1453 if isinstance(e
, boto
.exception
.InvalidUriError
):
1454 # Work around textwrap when searching for this string.
1455 if e
.message
and NON_EXISTENT_OBJECT_REGEX
.match(e
.message
.encode(UTF8
)):
1456 return NotFoundException(e
.message
, status
=404)
1457 return InvalidUrlError(e
.message
)
1459 if isinstance(e
, boto
.exception
.ResumableUploadException
):
1460 if e
.disposition
== boto
.exception
.ResumableTransferDisposition
.ABORT
:
1461 return ResumableUploadAbortException(e
.message
)
1462 elif (e
.disposition
==
1463 boto
.exception
.ResumableTransferDisposition
.START_OVER
):
1464 return ResumableUploadStartOverException(e
.message
)
1466 return ResumableUploadException(e
.message
)
1468 if isinstance(e
, boto
.exception
.ResumableDownloadException
):
1469 return ResumableDownloadException(e
.message
)
1473 # For function docstrings, see CloudApiDelegator class.
1474 def XmlPassThroughGetAcl(self
, storage_url
, def_obj_acl
=False):
1475 """See CloudApiDelegator class for function doc strings."""
1477 uri
= boto
.storage_uri(
1478 storage_url
.url_string
, suppress_consec_slashes
=False,
1479 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1482 return uri
.get_def_acl()
1484 return uri
.get_acl()
1485 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1486 self
._TranslateExceptionAndRaise
(e
)
1488 def XmlPassThroughSetAcl(self
, acl_text
, storage_url
, canned
=True,
1490 """See CloudApiDelegator class for function doc strings."""
1492 uri
= boto
.storage_uri(
1493 storage_url
.url_string
, suppress_consec_slashes
=False,
1494 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1498 canned_acls
= uri
.canned_acls()
1499 if acl_text
not in canned_acls
:
1500 raise CommandException('Invalid canned ACL "%s".' % acl_text
)
1501 uri
.set_def_acl(acl_text
, uri
.object_name
)
1503 canned_acls
= uri
.canned_acls()
1504 if acl_text
not in canned_acls
:
1505 raise CommandException('Invalid canned ACL "%s".' % acl_text
)
1506 uri
.set_acl(acl_text
, uri
.object_name
)
1509 uri
.set_def_xml_acl(acl_text
, uri
.object_name
)
1511 uri
.set_xml_acl(acl_text
, uri
.object_name
)
1512 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1513 self
._TranslateExceptionAndRaise
(e
)
1515 # pylint: disable=catching-non-exception
1516 def XmlPassThroughSetCors(self
, cors_text
, storage_url
):
1517 """See CloudApiDelegator class for function doc strings."""
1518 # Parse XML document and convert into Cors object.
1519 if storage_url
.scheme
== 's3':
1523 h
= handler
.XmlHandler(cors_obj
, None)
1525 xml
.sax
.parseString(cors_text
, h
)
1526 except SaxExceptions
.SAXParseException
, e
:
1527 raise CommandException('Requested CORS is invalid: %s at line %s, '
1528 'column %s' % (e
.getMessage(), e
.getLineNumber(),
1529 e
.getColumnNumber()))
1532 uri
= boto
.storage_uri(
1533 storage_url
.url_string
, suppress_consec_slashes
=False,
1534 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1536 uri
.set_cors(cors_obj
, False)
1537 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1538 self
._TranslateExceptionAndRaise
(e
)
1540 def XmlPassThroughGetCors(self
, storage_url
):
1541 """See CloudApiDelegator class for function doc strings."""
1542 uri
= boto
.storage_uri(
1543 storage_url
.url_string
, suppress_consec_slashes
=False,
1544 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1547 cors
= uri
.get_cors(False)
1548 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1549 self
._TranslateExceptionAndRaise
(e
)
1551 parsed_xml
= xml
.dom
.minidom
.parseString(cors
.to_xml().encode(UTF8
))
1552 # Pretty-print the XML to make it more easily human editable.
1553 return parsed_xml
.toprettyxml(indent
=' ')
1555 def XmlPassThroughGetLifecycle(self
, storage_url
):
1556 """See CloudApiDelegator class for function doc strings."""
1558 uri
= boto
.storage_uri(
1559 storage_url
.url_string
, suppress_consec_slashes
=False,
1560 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1562 lifecycle
= uri
.get_lifecycle_config(False)
1563 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1564 self
._TranslateExceptionAndRaise
(e
)
1566 parsed_xml
= xml
.dom
.minidom
.parseString(lifecycle
.to_xml().encode(UTF8
))
1567 # Pretty-print the XML to make it more easily human editable.
1568 return parsed_xml
.toprettyxml(indent
=' ')
1570 def XmlPassThroughSetLifecycle(self
, lifecycle_text
, storage_url
):
1571 """See CloudApiDelegator class for function doc strings."""
1572 # Parse XML document and convert into lifecycle object.
1573 if storage_url
.scheme
== 's3':
1574 lifecycle_obj
= S3Lifecycle()
1576 lifecycle_obj
= LifecycleConfig()
1577 h
= handler
.XmlHandler(lifecycle_obj
, None)
1579 xml
.sax
.parseString(lifecycle_text
, h
)
1580 except SaxExceptions
.SAXParseException
, e
:
1581 raise CommandException(
1582 'Requested lifecycle config is invalid: %s at line %s, column %s' %
1583 (e
.getMessage(), e
.getLineNumber(), e
.getColumnNumber()))
1586 uri
= boto
.storage_uri(
1587 storage_url
.url_string
, suppress_consec_slashes
=False,
1588 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1590 uri
.configure_lifecycle(lifecycle_obj
, False)
1591 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1592 self
._TranslateExceptionAndRaise
(e
)
1594 def XmlPassThroughGetLogging(self
, storage_url
):
1595 """See CloudApiDelegator class for function doc strings."""
1597 uri
= boto
.storage_uri(
1598 storage_url
.url_string
, suppress_consec_slashes
=False,
1599 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1601 logging_config_xml
= UnaryDictToXml(uri
.get_logging_config())
1602 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1603 self
._TranslateExceptionAndRaise
(e
)
1605 return XmlParseString(logging_config_xml
).toprettyxml()
1607 def XmlPassThroughGetWebsite(self
, storage_url
):
1608 """See CloudApiDelegator class for function doc strings."""
1610 uri
= boto
.storage_uri(
1611 storage_url
.url_string
, suppress_consec_slashes
=False,
1612 bucket_storage_uri_class
=self
.bucket_storage_uri_class
,
1614 web_config_xml
= UnaryDictToXml(uri
.get_website_config())
1615 except TRANSLATABLE_BOTO_EXCEPTIONS
, e
:
1616 self
._TranslateExceptionAndRaise
(e
)
1618 return XmlParseString(web_config_xml
).toprettyxml()