Only grant permissions to new extensions from sync if they have the expected version
[chromium-blink-merge.git] / tools / telemetry / third_party / gsutilz / gslib / boto_translation.py
blob92013904c47ffa66a82776e3d2f6da29ebcff54b
1 # -*- coding: utf-8 -*-
2 # Copyright 2013 Google Inc. All Rights Reserved.
4 # Licensed under the Apache License, Version 2.0 (the "License");
5 # you may not use this file except in compliance with the License.
6 # You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 """XML/boto gsutil Cloud API implementation for GCS and Amazon S3."""
17 from __future__ import absolute_import
19 import base64
20 import binascii
21 import datetime
22 import errno
23 import httplib
24 import json
25 import multiprocessing
26 import os
27 import pickle
28 import random
29 import re
30 import socket
31 import tempfile
32 import textwrap
33 import time
34 import xml
35 from xml.dom.minidom import parseString as XmlParseString
36 from xml.sax import _exceptions as SaxExceptions
38 import boto
39 from boto import handler
40 from boto.exception import ResumableDownloadException as BotoResumableDownloadException
41 from boto.exception import ResumableTransferDisposition
42 from boto.gs.cors import Cors
43 from boto.gs.lifecycle import LifecycleConfig
44 from boto.s3.cors import CORSConfiguration as S3Cors
45 from boto.s3.deletemarker import DeleteMarker
46 from boto.s3.lifecycle import Lifecycle as S3Lifecycle
47 from boto.s3.prefix import Prefix
49 from gslib.boto_resumable_upload import BotoResumableUpload
50 from gslib.cloud_api import AccessDeniedException
51 from gslib.cloud_api import ArgumentException
52 from gslib.cloud_api import BadRequestException
53 from gslib.cloud_api import CloudApi
54 from gslib.cloud_api import NotEmptyException
55 from gslib.cloud_api import NotFoundException
56 from gslib.cloud_api import PreconditionException
57 from gslib.cloud_api import ResumableDownloadException
58 from gslib.cloud_api import ResumableUploadAbortException
59 from gslib.cloud_api import ResumableUploadException
60 from gslib.cloud_api import ResumableUploadStartOverException
61 from gslib.cloud_api import ServiceException
62 from gslib.cloud_api_helper import ValidateDstObjectMetadata
63 # Imported for boto AuthHandler purposes.
64 import gslib.devshell_auth_plugin # pylint: disable=unused-import
65 from gslib.exception import CommandException
66 from gslib.exception import InvalidUrlError
67 from gslib.hashing_helper import Base64EncodeHash
68 from gslib.hashing_helper import Base64ToHexHash
69 from gslib.project_id import GOOG_PROJ_ID_HDR
70 from gslib.project_id import PopulateProjectId
71 from gslib.storage_url import StorageUrlFromString
72 from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
73 from gslib.translation_helper import AclTranslation
74 from gslib.translation_helper import AddS3MarkerAclToObjectMetadata
75 from gslib.translation_helper import CorsTranslation
76 from gslib.translation_helper import CreateBucketNotFoundException
77 from gslib.translation_helper import CreateObjectNotFoundException
78 from gslib.translation_helper import DEFAULT_CONTENT_TYPE
79 from gslib.translation_helper import EncodeStringAsLong
80 from gslib.translation_helper import GenerationFromUrlAndString
81 from gslib.translation_helper import HeadersFromObjectMetadata
82 from gslib.translation_helper import LifecycleTranslation
83 from gslib.translation_helper import REMOVE_CORS_CONFIG
84 from gslib.translation_helper import S3MarkerAclFromObjectMetadata
85 from gslib.util import ConfigureNoOpAuthIfNeeded
86 from gslib.util import DEFAULT_FILE_BUFFER_SIZE
87 from gslib.util import GetFileSize
88 from gslib.util import GetMaxRetryDelay
89 from gslib.util import GetNumRetries
90 from gslib.util import MultiprocessingIsAvailable
91 from gslib.util import S3_DELETE_MARKER_GUID
92 from gslib.util import TWO_MIB
93 from gslib.util import UnaryDictToXml
94 from gslib.util import UTF8
95 from gslib.util import XML_PROGRESS_CALLBACKS
97 TRANSLATABLE_BOTO_EXCEPTIONS = (boto.exception.BotoServerError,
98 boto.exception.InvalidUriError,
99 boto.exception.ResumableDownloadException,
100 boto.exception.ResumableUploadException,
101 boto.exception.StorageCreateError,
102 boto.exception.StorageResponseError)
104 # If multiprocessing is available, this will be overridden to a (thread-safe)
105 # multiprocessing.Value in a call to InitializeMultiprocessingVariables.
106 boto_auth_initialized = False
108 NON_EXISTENT_OBJECT_REGEX = re.compile(r'.*non-\s*existent\s*object',
109 flags=re.DOTALL)
110 # Determines whether an etag is a valid MD5.
111 MD5_REGEX = re.compile(r'^"*[a-fA-F0-9]{32}"*$')
114 def InitializeMultiprocessingVariables():
115 """Perform necessary initialization for multiprocessing.
117 See gslib.command.InitializeMultiprocessingVariables for an explanation
118 of why this is necessary.
120 global boto_auth_initialized # pylint: disable=global-variable-undefined
121 boto_auth_initialized = multiprocessing.Value('i', 0)
124 class BotoTranslation(CloudApi):
125 """Boto-based XML translation implementation of gsutil Cloud API.
127 This class takes gsutil Cloud API objects, translates them to XML service
128 calls, and translates the results back into gsutil Cloud API objects for
129 use by the caller.
132 def __init__(self, bucket_storage_uri_class, logger, provider=None,
133 credentials=None, debug=0):
134 """Performs necessary setup for interacting with the cloud storage provider.
136 Args:
137 bucket_storage_uri_class: boto storage_uri class, used by APIs that
138 provide boto translation or mocking.
139 logger: logging.logger for outputting log messages.
140 provider: Provider prefix describing cloud storage provider to connect to.
141 'gs' and 's3' are supported. Function implementations ignore
142 the provider argument and use this one instead.
143 credentials: Unused.
144 debug: Debug level for the API implementation (0..3).
146 super(BotoTranslation, self).__init__(bucket_storage_uri_class, logger,
147 provider=provider, debug=debug)
148 _ = credentials
149 global boto_auth_initialized # pylint: disable=global-variable-undefined
150 if MultiprocessingIsAvailable()[0] and not boto_auth_initialized.value:
151 ConfigureNoOpAuthIfNeeded()
152 boto_auth_initialized.value = 1
153 elif not boto_auth_initialized:
154 ConfigureNoOpAuthIfNeeded()
155 boto_auth_initialized = True
156 self.api_version = boto.config.get_value(
157 'GSUtil', 'default_api_version', '1')
159 def GetBucket(self, bucket_name, provider=None, fields=None):
160 """See CloudApi class for function doc strings."""
161 _ = provider
162 bucket_uri = self._StorageUriForBucket(bucket_name)
163 headers = {}
164 self._AddApiVersionToHeaders(headers)
165 try:
166 return self._BotoBucketToBucket(bucket_uri.get_bucket(validate=True,
167 headers=headers),
168 fields=fields)
169 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
170 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
172 def ListBuckets(self, project_id=None, provider=None, fields=None):
173 """See CloudApi class for function doc strings."""
174 _ = provider
175 get_fields = self._ListToGetFields(list_fields=fields)
176 headers = {}
177 self._AddApiVersionToHeaders(headers)
178 if self.provider == 'gs':
179 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
180 try:
181 provider_uri = boto.storage_uri(
182 '%s://' % self.provider,
183 suppress_consec_slashes=False,
184 bucket_storage_uri_class=self.bucket_storage_uri_class,
185 debug=self.debug)
187 buckets_iter = provider_uri.get_all_buckets(headers=headers)
188 for bucket in buckets_iter:
189 if self.provider == 's3' and bucket.name.lower() != bucket.name:
190 # S3 listings can return buckets with upper-case names, but boto
191 # can't successfully call them.
192 continue
193 yield self._BotoBucketToBucket(bucket, fields=get_fields)
194 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
195 self._TranslateExceptionAndRaise(e)
197 def PatchBucket(self, bucket_name, metadata, canned_acl=None,
198 canned_def_acl=None, preconditions=None, provider=None,
199 fields=None):
200 """See CloudApi class for function doc strings."""
201 _ = provider
202 bucket_uri = self._StorageUriForBucket(bucket_name)
203 headers = {}
204 self._AddApiVersionToHeaders(headers)
205 try:
206 self._AddPreconditionsToHeaders(preconditions, headers)
207 if metadata.acl:
208 boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
209 bucket_uri.set_xml_acl(boto_acl.to_xml(), headers=headers)
210 if canned_acl:
211 canned_acls = bucket_uri.canned_acls()
212 if canned_acl not in canned_acls:
213 raise CommandException('Invalid canned ACL "%s".' % canned_acl)
214 bucket_uri.set_acl(canned_acl, bucket_uri.object_name)
215 if canned_def_acl:
216 canned_acls = bucket_uri.canned_acls()
217 if canned_def_acl not in canned_acls:
218 raise CommandException('Invalid canned ACL "%s".' % canned_def_acl)
219 bucket_uri.set_def_acl(canned_def_acl, bucket_uri.object_name)
220 if metadata.cors:
221 if metadata.cors == REMOVE_CORS_CONFIG:
222 metadata.cors = []
223 boto_cors = CorsTranslation.BotoCorsFromMessage(metadata.cors)
224 bucket_uri.set_cors(boto_cors, False)
225 if metadata.defaultObjectAcl:
226 boto_acl = AclTranslation.BotoAclFromMessage(
227 metadata.defaultObjectAcl)
228 bucket_uri.set_def_xml_acl(boto_acl.to_xml(), headers=headers)
229 if metadata.lifecycle:
230 boto_lifecycle = LifecycleTranslation.BotoLifecycleFromMessage(
231 metadata.lifecycle)
232 bucket_uri.configure_lifecycle(boto_lifecycle, False)
233 if metadata.logging:
234 if self.provider == 'gs':
235 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(None)
236 if metadata.logging.logBucket and metadata.logging.logObjectPrefix:
237 bucket_uri.enable_logging(metadata.logging.logBucket,
238 metadata.logging.logObjectPrefix,
239 False, headers)
240 else: # Logging field is present and empty. Disable logging.
241 bucket_uri.disable_logging(False, headers)
242 if metadata.versioning:
243 bucket_uri.configure_versioning(metadata.versioning.enabled,
244 headers=headers)
245 if metadata.website:
246 main_page_suffix = metadata.website.mainPageSuffix
247 error_page = metadata.website.notFoundPage
248 bucket_uri.set_website_config(main_page_suffix, error_page)
249 return self.GetBucket(bucket_name, fields=fields)
250 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
251 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
253 def CreateBucket(self, bucket_name, project_id=None, metadata=None,
254 provider=None, fields=None):
255 """See CloudApi class for function doc strings."""
256 _ = provider
257 bucket_uri = self._StorageUriForBucket(bucket_name)
258 location = ''
259 if metadata and metadata.location:
260 location = metadata.location
261 # Pass storage_class param only if this is a GCS bucket. (In S3 the
262 # storage class is specified on the key object.)
263 headers = {}
264 if bucket_uri.scheme == 'gs':
265 self._AddApiVersionToHeaders(headers)
266 headers[GOOG_PROJ_ID_HDR] = PopulateProjectId(project_id)
267 storage_class = ''
268 if metadata and metadata.storageClass:
269 storage_class = metadata.storageClass
270 try:
271 bucket_uri.create_bucket(headers=headers, location=location,
272 storage_class=storage_class)
273 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
274 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
275 else:
276 try:
277 bucket_uri.create_bucket(headers=headers, location=location)
278 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
279 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
280 return self.GetBucket(bucket_name, fields=fields)
282 def DeleteBucket(self, bucket_name, preconditions=None, provider=None):
283 """See CloudApi class for function doc strings."""
284 _ = provider, preconditions
285 bucket_uri = self._StorageUriForBucket(bucket_name)
286 headers = {}
287 self._AddApiVersionToHeaders(headers)
288 try:
289 bucket_uri.delete_bucket(headers=headers)
290 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
291 translated_exception = self._TranslateBotoException(
292 e, bucket_name=bucket_name)
293 if (translated_exception and
294 'BucketNotEmpty' in translated_exception.reason):
295 try:
296 if bucket_uri.get_versioning_config():
297 if self.provider == 's3':
298 raise NotEmptyException(
299 'VersionedBucketNotEmpty (%s). Currently, gsutil does not '
300 'support listing or removing S3 DeleteMarkers, so you may '
301 'need to delete these using another tool to successfully '
302 'delete this bucket.' % bucket_name, status=e.status)
303 raise NotEmptyException(
304 'VersionedBucketNotEmpty (%s)' % bucket_name, status=e.status)
305 else:
306 raise NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
307 status=e.status)
308 except TRANSLATABLE_BOTO_EXCEPTIONS, e2:
309 self._TranslateExceptionAndRaise(e2, bucket_name=bucket_name)
310 elif translated_exception and translated_exception.status == 404:
311 raise NotFoundException('Bucket %s does not exist.' % bucket_name)
312 else:
313 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
315 def ListObjects(self, bucket_name, prefix=None, delimiter=None,
316 all_versions=None, provider=None, fields=None):
317 """See CloudApi class for function doc strings."""
318 _ = provider
319 get_fields = self._ListToGetFields(list_fields=fields)
320 bucket_uri = self._StorageUriForBucket(bucket_name)
321 prefix_list = []
322 headers = {}
323 self._AddApiVersionToHeaders(headers)
324 try:
325 objects_iter = bucket_uri.list_bucket(prefix=prefix or '',
326 delimiter=delimiter or '',
327 all_versions=all_versions,
328 headers=headers)
329 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
330 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
332 try:
333 for key in objects_iter:
334 if isinstance(key, Prefix):
335 prefix_list.append(key.name)
336 yield CloudApi.CsObjectOrPrefix(key.name,
337 CloudApi.CsObjectOrPrefixType.PREFIX)
338 else:
339 key_to_convert = key
341 # Listed keys are populated with these fields during bucket listing.
342 key_http_fields = set(['bucket', 'etag', 'name', 'updated',
343 'generation', 'metageneration', 'size'])
345 # When fields == None, the caller is requesting all possible fields.
346 # If the caller requested any fields that are not populated by bucket
347 # listing, we'll need to make a separate HTTP call for each object to
348 # get its metadata and populate the remaining fields with the result.
349 if not get_fields or (get_fields and not
350 get_fields.issubset(key_http_fields)):
352 generation = None
353 if getattr(key, 'generation', None):
354 generation = key.generation
355 if getattr(key, 'version_id', None):
356 generation = key.version_id
357 key_to_convert = self._GetBotoKey(bucket_name, key.name,
358 generation=generation)
359 return_object = self._BotoKeyToObject(key_to_convert,
360 fields=get_fields)
362 yield CloudApi.CsObjectOrPrefix(return_object,
363 CloudApi.CsObjectOrPrefixType.OBJECT)
364 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
365 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name)
367 def GetObjectMetadata(self, bucket_name, object_name, generation=None,
368 provider=None, fields=None):
369 """See CloudApi class for function doc strings."""
370 _ = provider
371 try:
372 return self._BotoKeyToObject(self._GetBotoKey(bucket_name, object_name,
373 generation=generation),
374 fields=fields)
375 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
376 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
377 object_name=object_name,
378 generation=generation)
380 def _CurryDigester(self, digester_object):
381 """Curries a digester object into a form consumable by boto.
383 Key instantiates its own digesters by calling hash_algs[alg]() [note there
384 are no arguments to this function]. So in order to pass in our caught-up
385 digesters during a resumable download, we need to pass the digester
386 object but don't get to look it up based on the algorithm name. Here we
387 use a lambda to make lookup implicit.
389 Args:
390 digester_object: Input object to be returned by the created function.
392 Returns:
393 A function which when called will return the input object.
395 return lambda: digester_object
397 def GetObjectMedia(
398 self, bucket_name, object_name, download_stream, provider=None,
399 generation=None, object_size=None,
400 download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
401 start_byte=0, end_byte=None, progress_callback=None,
402 serialization_data=None, digesters=None):
403 """See CloudApi class for function doc strings."""
404 # This implementation will get the object metadata first if we don't pass it
405 # in via serialization_data.
406 headers = {}
407 self._AddApiVersionToHeaders(headers)
408 if 'accept-encoding' not in headers:
409 headers['accept-encoding'] = 'gzip'
410 if end_byte:
411 headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
412 elif start_byte > 0:
413 headers['range'] = 'bytes=%s-' % (start_byte)
414 else:
415 headers['range'] = 'bytes=%s' % (start_byte)
417 # Since in most cases we already made a call to get the object metadata,
418 # here we avoid an extra HTTP call by unpickling the key. This is coupled
419 # with the implementation in _BotoKeyToObject.
420 if serialization_data:
421 serialization_dict = json.loads(serialization_data)
422 key = pickle.loads(binascii.a2b_base64(serialization_dict['url']))
423 else:
424 key = self._GetBotoKey(bucket_name, object_name, generation=generation)
426 if digesters and self.provider == 'gs':
427 hash_algs = {}
428 for alg in digesters:
429 hash_algs[alg] = self._CurryDigester(digesters[alg])
430 else:
431 hash_algs = {}
433 total_size = object_size or 0
434 if serialization_data:
435 total_size = json.loads(serialization_data)['total_size']
437 if total_size:
438 num_progress_callbacks = max(int(total_size) / TWO_MIB,
439 XML_PROGRESS_CALLBACKS)
440 else:
441 num_progress_callbacks = XML_PROGRESS_CALLBACKS
443 try:
444 if download_strategy is CloudApi.DownloadStrategy.RESUMABLE:
445 self._PerformResumableDownload(
446 download_stream, key, headers=headers, callback=progress_callback,
447 num_callbacks=num_progress_callbacks, hash_algs=hash_algs)
448 elif download_strategy is CloudApi.DownloadStrategy.ONE_SHOT:
449 self._PerformSimpleDownload(
450 download_stream, key, progress_callback=progress_callback,
451 num_progress_callbacks=num_progress_callbacks, headers=headers,
452 hash_algs=hash_algs)
453 else:
454 raise ArgumentException('Unsupported DownloadStrategy: %s' %
455 download_strategy)
456 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
457 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
458 object_name=object_name,
459 generation=generation)
461 if self.provider == 's3':
462 if digesters:
464 class HashToDigester(object):
465 """Wrapper class to expose hash digests.
467 boto creates its own digesters in s3's get_file, returning on-the-fly
468 hashes only by way of key.local_hashes. To propagate the digest back
469 to the caller, this stub class implements the digest() function.
472 def __init__(self, hash_val):
473 self.hash_val = hash_val
475 def digest(self): # pylint: disable=invalid-name
476 return self.hash_val
478 for alg_name in digesters:
479 if ((download_strategy == CloudApi.DownloadStrategy.RESUMABLE and
480 start_byte != 0) or
481 not ((getattr(key, 'local_hashes', None) and
482 alg_name in key.local_hashes))):
483 # For resumable downloads, boto does not provide a mechanism to
484 # catch up the hash in the case of a partially complete download.
485 # In this case or in the case where no digest was successfully
486 # calculated, set the digester to None, which indicates that we'll
487 # need to manually calculate the hash from the local file once it
488 # is complete.
489 digesters[alg_name] = None
490 else:
491 # Use the on-the-fly hash.
492 digesters[alg_name] = HashToDigester(key.local_hashes[alg_name])
494 def _PerformSimpleDownload(self, download_stream, key, progress_callback=None,
495 num_progress_callbacks=XML_PROGRESS_CALLBACKS,
496 headers=None, hash_algs=None):
497 if not headers:
498 headers = {}
499 self._AddApiVersionToHeaders(headers)
500 try:
501 key.get_contents_to_file(download_stream, cb=progress_callback,
502 num_cb=num_progress_callbacks, headers=headers,
503 hash_algs=hash_algs)
504 except TypeError: # s3 and mocks do not support hash_algs
505 key.get_contents_to_file(download_stream, cb=progress_callback,
506 num_cb=num_progress_callbacks, headers=headers)
508 def _PerformResumableDownload(self, fp, key, headers=None, callback=None,
509 num_callbacks=XML_PROGRESS_CALLBACKS,
510 hash_algs=None):
511 """Downloads bytes from key to fp, resuming as needed.
513 Args:
514 fp: File pointer into which data should be downloaded
515 key: Key object from which data is to be downloaded
516 headers: Headers to send when retrieving the file
517 callback: (optional) a callback function that will be called to report
518 progress on the download. The callback should accept two integer
519 parameters. The first integer represents the number of
520 bytes that have been successfully transmitted from the service. The
521 second represents the total number of bytes that need to be
522 transmitted.
523 num_callbacks: (optional) If a callback is specified with the callback
524 parameter, this determines the granularity of the callback
525 by defining the maximum number of times the callback will be
526 called during the file transfer.
527 hash_algs: Dict of hash algorithms to apply to downloaded bytes.
529 Raises:
530 ResumableDownloadException on error.
532 if not headers:
533 headers = {}
534 self._AddApiVersionToHeaders(headers)
536 retryable_exceptions = (httplib.HTTPException, IOError, socket.error,
537 socket.gaierror)
539 debug = key.bucket.connection.debug
541 num_retries = GetNumRetries()
542 progress_less_iterations = 0
544 while True: # Retry as long as we're making progress.
545 had_file_bytes_before_attempt = GetFileSize(fp)
546 try:
547 cur_file_size = GetFileSize(fp, position_to_eof=True)
549 def DownloadProxyCallback(total_bytes_downloaded, total_size):
550 """Translates a boto callback into a gsutil Cloud API callback.
552 Callbacks are originally made by boto.s3.Key.get_file(); here we take
553 into account that we're resuming a download.
555 Args:
556 total_bytes_downloaded: Actual bytes downloaded so far, not
557 including the point we resumed from.
558 total_size: Total size of the download.
560 if callback:
561 callback(cur_file_size + total_bytes_downloaded, total_size)
563 headers = headers.copy()
564 headers['Range'] = 'bytes=%d-%d' % (cur_file_size, key.size - 1)
565 cb = DownloadProxyCallback
567 # Disable AWSAuthConnection-level retry behavior, since that would
568 # cause downloads to restart from scratch.
569 try:
570 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0,
571 hash_algs=hash_algs)
572 except TypeError:
573 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0)
574 fp.flush()
575 # Download succeeded.
576 return
577 except retryable_exceptions, e:
578 if debug >= 1:
579 self.logger.info('Caught exception (%s)', repr(e))
580 if isinstance(e, IOError) and e.errno == errno.EPIPE:
581 # Broken pipe error causes httplib to immediately
582 # close the socket (http://bugs.python.org/issue5542),
583 # so we need to close and reopen the key before resuming
584 # the download.
585 if self.provider == 's3':
586 key.get_file(fp, headers, cb, num_callbacks, override_num_retries=0)
587 else: # self.provider == 'gs'
588 key.get_file(fp, headers, cb, num_callbacks,
589 override_num_retries=0, hash_algs=hash_algs)
590 except BotoResumableDownloadException, e:
591 if (e.disposition ==
592 ResumableTransferDisposition.ABORT_CUR_PROCESS):
593 raise ResumableDownloadException(e.message)
594 else:
595 if debug >= 1:
596 self.logger.info('Caught ResumableDownloadException (%s) - will '
597 'retry', e.message)
599 # At this point we had a re-tryable failure; see if made progress.
600 if GetFileSize(fp) > had_file_bytes_before_attempt:
601 progress_less_iterations = 0
602 else:
603 progress_less_iterations += 1
605 if progress_less_iterations > num_retries:
606 # Don't retry any longer in the current process.
607 raise ResumableDownloadException(
608 'Too many resumable download attempts failed without '
609 'progress. You might try this download again later')
611 # Close the key, in case a previous download died partway
612 # through and left data in the underlying key HTTP buffer.
613 # Do this within a try/except block in case the connection is
614 # closed (since key.close() attempts to do a final read, in which
615 # case this read attempt would get an IncompleteRead exception,
616 # which we can safely ignore).
617 try:
618 key.close()
619 except httplib.IncompleteRead:
620 pass
622 sleep_time_secs = min(random.random() * (2 ** progress_less_iterations),
623 GetMaxRetryDelay())
624 if debug >= 1:
625 self.logger.info(
626 'Got retryable failure (%d progress-less in a row).\nSleeping %d '
627 'seconds before re-trying', progress_less_iterations,
628 sleep_time_secs)
629 time.sleep(sleep_time_secs)
631 def PatchObjectMetadata(self, bucket_name, object_name, metadata,
632 canned_acl=None, generation=None, preconditions=None,
633 provider=None, fields=None):
634 """See CloudApi class for function doc strings."""
635 _ = provider
636 object_uri = self._StorageUriForObject(bucket_name, object_name,
637 generation=generation)
639 headers = {}
640 self._AddApiVersionToHeaders(headers)
641 meta_headers = HeadersFromObjectMetadata(metadata, self.provider)
643 metadata_plus = {}
644 metadata_minus = set()
645 metadata_changed = False
646 for k, v in meta_headers.iteritems():
647 metadata_changed = True
648 if v is None:
649 metadata_minus.add(k)
650 else:
651 metadata_plus[k] = v
653 self._AddPreconditionsToHeaders(preconditions, headers)
655 if metadata_changed:
656 try:
657 object_uri.set_metadata(metadata_plus, metadata_minus, False,
658 headers=headers)
659 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
660 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
661 object_name=object_name,
662 generation=generation)
664 if metadata.acl:
665 boto_acl = AclTranslation.BotoAclFromMessage(metadata.acl)
666 try:
667 object_uri.set_xml_acl(boto_acl.to_xml(), key_name=object_name)
668 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
669 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
670 object_name=object_name,
671 generation=generation)
672 if canned_acl:
673 canned_acls = object_uri.canned_acls()
674 if canned_acl not in canned_acls:
675 raise CommandException('Invalid canned ACL "%s".' % canned_acl)
676 object_uri.set_acl(canned_acl, object_uri.object_name)
678 return self.GetObjectMetadata(bucket_name, object_name,
679 generation=generation, fields=fields)
681 def _PerformSimpleUpload(self, dst_uri, upload_stream, md5=None,
682 canned_acl=None, progress_callback=None,
683 headers=None):
684 dst_uri.set_contents_from_file(upload_stream, md5=md5, policy=canned_acl,
685 cb=progress_callback, headers=headers)
687 def _PerformStreamingUpload(self, dst_uri, upload_stream, canned_acl=None,
688 progress_callback=None, headers=None):
689 if dst_uri.get_provider().supports_chunked_transfer():
690 dst_uri.set_contents_from_stream(upload_stream, policy=canned_acl,
691 cb=progress_callback, headers=headers)
692 else:
693 # Provider doesn't support chunked transfer, so copy to a temporary
694 # file.
695 (temp_fh, temp_path) = tempfile.mkstemp()
696 try:
697 with open(temp_path, 'wb') as out_fp:
698 stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
699 while stream_bytes:
700 out_fp.write(stream_bytes)
701 stream_bytes = upload_stream.read(DEFAULT_FILE_BUFFER_SIZE)
702 with open(temp_path, 'rb') as in_fp:
703 dst_uri.set_contents_from_file(in_fp, policy=canned_acl,
704 headers=headers)
705 finally:
706 os.close(temp_fh)
707 os.unlink(temp_path)
709 def _PerformResumableUpload(self, key, upload_stream, upload_size,
710 tracker_callback, canned_acl=None,
711 serialization_data=None, progress_callback=None,
712 headers=None):
713 resumable_upload = BotoResumableUpload(
714 tracker_callback, self.logger, resume_url=serialization_data)
715 resumable_upload.SendFile(key, upload_stream, upload_size,
716 canned_acl=canned_acl, cb=progress_callback,
717 headers=headers)
719 def _UploadSetup(self, object_metadata, preconditions=None):
720 """Shared upload implementation.
722 Args:
723 object_metadata: Object metadata describing destination object.
724 preconditions: Optional gsutil Cloud API preconditions.
726 Returns:
727 Headers dictionary, StorageUri for upload (based on inputs)
729 ValidateDstObjectMetadata(object_metadata)
731 headers = HeadersFromObjectMetadata(object_metadata, self.provider)
732 self._AddApiVersionToHeaders(headers)
734 if object_metadata.crc32c:
735 if 'x-goog-hash' in headers:
736 headers['x-goog-hash'] += (
737 ',crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
738 else:
739 headers['x-goog-hash'] = (
740 'crc32c=%s' % object_metadata.crc32c.rstrip('\n'))
741 if object_metadata.md5Hash:
742 if 'x-goog-hash' in headers:
743 headers['x-goog-hash'] += (
744 ',md5=%s' % object_metadata.md5Hash.rstrip('\n'))
745 else:
746 headers['x-goog-hash'] = (
747 'md5=%s' % object_metadata.md5Hash.rstrip('\n'))
749 if 'content-type' in headers and not headers['content-type']:
750 headers['content-type'] = 'application/octet-stream'
752 self._AddPreconditionsToHeaders(preconditions, headers)
754 dst_uri = self._StorageUriForObject(object_metadata.bucket,
755 object_metadata.name)
756 return headers, dst_uri
758 def _HandleSuccessfulUpload(self, dst_uri, object_metadata, fields=None):
759 """Set ACLs on an uploaded object and return its metadata.
761 Args:
762 dst_uri: Generation-specific StorageUri describing the object.
763 object_metadata: Metadata for the object, including an ACL if applicable.
764 fields: If present, return only these Object metadata fields.
766 Returns:
767 gsutil Cloud API Object metadata.
769 Raises:
770 CommandException if the object was overwritten / deleted concurrently.
772 try:
773 # The XML API does not support if-generation-match for GET requests.
774 # Therefore, if the object gets overwritten before the ACL and get_key
775 # operations, the best we can do is warn that it happened.
776 self._SetObjectAcl(object_metadata, dst_uri)
777 return self._BotoKeyToObject(dst_uri.get_key(), fields=fields)
778 except boto.exception.InvalidUriError as e:
779 if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
780 raise CommandException('\n'.join(textwrap.wrap(
781 'Uploaded object (%s) was deleted or overwritten immediately '
782 'after it was uploaded. This can happen if you attempt to upload '
783 'to the same object multiple times concurrently.' % dst_uri.uri)))
784 else:
785 raise
787 def _SetObjectAcl(self, object_metadata, dst_uri):
788 """Sets the ACL (if present in object_metadata) on an uploaded object."""
789 if object_metadata.acl:
790 boto_acl = AclTranslation.BotoAclFromMessage(object_metadata.acl)
791 dst_uri.set_xml_acl(boto_acl.to_xml())
792 elif self.provider == 's3':
793 s3_acl = S3MarkerAclFromObjectMetadata(object_metadata)
794 if s3_acl:
795 dst_uri.set_xml_acl(s3_acl)
797 def UploadObjectResumable(
798 self, upload_stream, object_metadata, canned_acl=None, preconditions=None,
799 provider=None, fields=None, size=None, serialization_data=None,
800 tracker_callback=None, progress_callback=None):
801 """See CloudApi class for function doc strings."""
802 if self.provider == 's3':
803 # Resumable uploads are not supported for s3.
804 return self.UploadObject(
805 upload_stream, object_metadata, canned_acl=canned_acl,
806 preconditions=preconditions, fields=fields, size=size)
807 headers, dst_uri = self._UploadSetup(object_metadata,
808 preconditions=preconditions)
809 if not tracker_callback:
810 raise ArgumentException('No tracker callback function set for '
811 'resumable upload of %s' % dst_uri)
812 try:
813 self._PerformResumableUpload(dst_uri.new_key(headers=headers),
814 upload_stream, size, tracker_callback,
815 canned_acl=canned_acl,
816 serialization_data=serialization_data,
817 progress_callback=progress_callback,
818 headers=headers)
819 return self._HandleSuccessfulUpload(dst_uri, object_metadata,
820 fields=fields)
821 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
822 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
823 object_name=object_metadata.name)
825 def UploadObjectStreaming(self, upload_stream, object_metadata,
826 canned_acl=None, progress_callback=None,
827 preconditions=None, provider=None, fields=None):
828 """See CloudApi class for function doc strings."""
829 headers, dst_uri = self._UploadSetup(object_metadata,
830 preconditions=preconditions)
832 try:
833 self._PerformStreamingUpload(
834 dst_uri, upload_stream, canned_acl=canned_acl,
835 progress_callback=progress_callback, headers=headers)
836 return self._HandleSuccessfulUpload(dst_uri, object_metadata,
837 fields=fields)
838 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
839 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
840 object_name=object_metadata.name)
842 def UploadObject(self, upload_stream, object_metadata, canned_acl=None,
843 preconditions=None, size=None, progress_callback=None,
844 provider=None, fields=None):
845 """See CloudApi class for function doc strings."""
846 headers, dst_uri = self._UploadSetup(object_metadata,
847 preconditions=preconditions)
849 try:
850 md5 = None
851 if object_metadata.md5Hash:
852 md5 = []
853 # boto expects hex at index 0, base64 at index 1
854 md5.append(Base64ToHexHash(object_metadata.md5Hash))
855 md5.append(object_metadata.md5Hash.strip('\n"\''))
856 self._PerformSimpleUpload(dst_uri, upload_stream, md5=md5,
857 canned_acl=canned_acl,
858 progress_callback=progress_callback,
859 headers=headers)
860 return self._HandleSuccessfulUpload(dst_uri, object_metadata,
861 fields=fields)
862 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
863 self._TranslateExceptionAndRaise(e, bucket_name=object_metadata.bucket,
864 object_name=object_metadata.name)
866 def DeleteObject(self, bucket_name, object_name, preconditions=None,
867 generation=None, provider=None):
868 """See CloudApi class for function doc strings."""
869 _ = provider
870 headers = {}
871 self._AddApiVersionToHeaders(headers)
872 self._AddPreconditionsToHeaders(preconditions, headers)
874 uri = self._StorageUriForObject(bucket_name, object_name,
875 generation=generation)
876 try:
877 uri.delete_key(validate=False, headers=headers)
878 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
879 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
880 object_name=object_name,
881 generation=generation)
883 def CopyObject(self, src_obj_metadata, dst_obj_metadata, src_generation=None,
884 canned_acl=None, preconditions=None, progress_callback=None,
885 max_bytes_per_call=None, provider=None, fields=None):
886 """See CloudApi class for function doc strings."""
887 _ = provider
889 if max_bytes_per_call is not None:
890 raise NotImplementedError('XML API does not suport max_bytes_per_call')
891 dst_uri = self._StorageUriForObject(dst_obj_metadata.bucket,
892 dst_obj_metadata.name)
894 # Usually it's okay to treat version_id and generation as
895 # the same, but in this case the underlying boto call determines the
896 # provider based on the presence of one or the other.
897 src_version_id = None
898 if self.provider == 's3':
899 src_version_id = src_generation
900 src_generation = None
902 headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
903 self._AddApiVersionToHeaders(headers)
904 self._AddPreconditionsToHeaders(preconditions, headers)
906 if canned_acl:
907 headers[dst_uri.get_provider().acl_header] = canned_acl
909 preserve_acl = True if dst_obj_metadata.acl else False
910 if self.provider == 's3':
911 s3_acl = S3MarkerAclFromObjectMetadata(dst_obj_metadata)
912 if s3_acl:
913 preserve_acl = True
915 try:
916 new_key = dst_uri.copy_key(
917 src_obj_metadata.bucket, src_obj_metadata.name,
918 preserve_acl=preserve_acl, headers=headers,
919 src_version_id=src_version_id, src_generation=src_generation)
921 return self._BotoKeyToObject(new_key, fields=fields)
922 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
923 self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
924 dst_obj_metadata.name)
926 def ComposeObject(self, src_objs_metadata, dst_obj_metadata,
927 preconditions=None, provider=None, fields=None):
928 """See CloudApi class for function doc strings."""
929 _ = provider
930 ValidateDstObjectMetadata(dst_obj_metadata)
932 dst_obj_name = dst_obj_metadata.name
933 dst_obj_metadata.name = None
934 dst_bucket_name = dst_obj_metadata.bucket
935 dst_obj_metadata.bucket = None
936 headers = HeadersFromObjectMetadata(dst_obj_metadata, self.provider)
937 if not dst_obj_metadata.contentType:
938 dst_obj_metadata.contentType = DEFAULT_CONTENT_TYPE
939 headers['content-type'] = dst_obj_metadata.contentType
940 self._AddApiVersionToHeaders(headers)
941 self._AddPreconditionsToHeaders(preconditions, headers)
943 dst_uri = self._StorageUriForObject(dst_bucket_name, dst_obj_name)
945 src_components = []
946 for src_obj in src_objs_metadata:
947 src_uri = self._StorageUriForObject(dst_bucket_name, src_obj.name,
948 generation=src_obj.generation)
949 src_components.append(src_uri)
951 try:
952 dst_uri.compose(src_components, headers=headers)
954 return self.GetObjectMetadata(dst_bucket_name, dst_obj_name,
955 fields=fields)
956 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
957 self._TranslateExceptionAndRaise(e, dst_obj_metadata.bucket,
958 dst_obj_metadata.name)
960 def _AddPreconditionsToHeaders(self, preconditions, headers):
961 """Adds preconditions (if any) to headers."""
962 if preconditions and self.provider == 'gs':
963 if preconditions.gen_match is not None:
964 headers['x-goog-if-generation-match'] = str(preconditions.gen_match)
965 if preconditions.meta_gen_match is not None:
966 headers['x-goog-if-metageneration-match'] = str(
967 preconditions.meta_gen_match)
969 def _AddApiVersionToHeaders(self, headers):
970 if self.provider == 'gs':
971 headers['x-goog-api-version'] = self.api_version
973 def _GetMD5FromETag(self, src_etag):
974 """Returns an MD5 from the etag iff the etag is a valid MD5 hash.
976 Args:
977 src_etag: Object etag for which to return the MD5.
979 Returns:
980 MD5 in hex string format, or None.
982 if src_etag and MD5_REGEX.search(src_etag):
983 return src_etag.strip('"\'').lower()
985 def _StorageUriForBucket(self, bucket):
986 """Returns a boto storage_uri for the given bucket name.
988 Args:
989 bucket: Bucket name (string).
991 Returns:
992 Boto storage_uri for the bucket.
994 return boto.storage_uri(
995 '%s://%s' % (self.provider, bucket),
996 suppress_consec_slashes=False,
997 bucket_storage_uri_class=self.bucket_storage_uri_class,
998 debug=self.debug)
1000 def _StorageUriForObject(self, bucket, object_name, generation=None):
1001 """Returns a boto storage_uri for the given object.
1003 Args:
1004 bucket: Bucket name (string).
1005 object_name: Object name (string).
1006 generation: Generation or version_id of object. If None, live version
1007 of the object is used.
1009 Returns:
1010 Boto storage_uri for the object.
1012 uri_string = '%s://%s/%s' % (self.provider, bucket, object_name)
1013 if generation:
1014 uri_string += '#%s' % generation
1015 return boto.storage_uri(
1016 uri_string, suppress_consec_slashes=False,
1017 bucket_storage_uri_class=self.bucket_storage_uri_class,
1018 debug=self.debug)
1020 def _GetBotoKey(self, bucket_name, object_name, generation=None):
1021 """Gets the boto key for an object.
1023 Args:
1024 bucket_name: Bucket containing the object.
1025 object_name: Object name.
1026 generation: Generation or version of the object to retrieve.
1028 Returns:
1029 Boto key for the object.
1031 object_uri = self._StorageUriForObject(bucket_name, object_name,
1032 generation=generation)
1033 try:
1034 key = object_uri.get_key()
1035 if not key:
1036 raise CreateObjectNotFoundException('404', self.provider,
1037 bucket_name, object_name,
1038 generation=generation)
1039 return key
1040 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1041 self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
1042 object_name=object_name,
1043 generation=generation)
1045 def _ListToGetFields(self, list_fields=None):
1046 """Removes 'items/' from the input fields and converts it to a set.
1048 This way field sets requested for ListBucket/ListObject can be used in
1049 _BotoBucketToBucket and _BotoKeyToObject calls.
1051 Args:
1052 list_fields: Iterable fields usable in ListBucket/ListObject calls.
1054 Returns:
1055 Set of fields usable in GetBucket/GetObject or
1056 _BotoBucketToBucket/_BotoKeyToObject calls.
1058 if list_fields:
1059 get_fields = set()
1060 for field in list_fields:
1061 if field in ['kind', 'nextPageToken', 'prefixes']:
1062 # These are not actually object / bucket metadata fields.
1063 # They are fields specific to listing, so we don't consider them.
1064 continue
1065 get_fields.add(re.sub(r'items/', '', field))
1066 return get_fields
1068 # pylint: disable=too-many-statements
1069 def _BotoBucketToBucket(self, bucket, fields=None):
1070 """Constructs an apitools Bucket from a boto bucket.
1072 Args:
1073 bucket: Boto bucket.
1074 fields: If present, construct the apitools Bucket with only this set of
1075 metadata fields.
1077 Returns:
1078 apitools Bucket.
1080 bucket_uri = self._StorageUriForBucket(bucket.name)
1082 cloud_api_bucket = apitools_messages.Bucket(name=bucket.name,
1083 id=bucket.name)
1084 headers = {}
1085 self._AddApiVersionToHeaders(headers)
1086 if self.provider == 'gs':
1087 if not fields or 'storageClass' in fields:
1088 if hasattr(bucket, 'get_storage_class'):
1089 cloud_api_bucket.storageClass = bucket.get_storage_class()
1090 if not fields or 'acl' in fields:
1091 for acl in AclTranslation.BotoBucketAclToMessage(
1092 bucket.get_acl(headers=headers)):
1093 try:
1094 cloud_api_bucket.acl.append(acl)
1095 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1096 translated_exception = self._TranslateBotoException(
1097 e, bucket_name=bucket.name)
1098 if (translated_exception and
1099 isinstance(translated_exception,
1100 AccessDeniedException)):
1101 # JSON API doesn't differentiate between a blank ACL list
1102 # and an access denied, so this is intentionally left blank.
1103 pass
1104 else:
1105 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1106 if not fields or 'cors' in fields:
1107 try:
1108 boto_cors = bucket_uri.get_cors()
1109 cloud_api_bucket.cors = CorsTranslation.BotoCorsToMessage(boto_cors)
1110 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1111 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1112 if not fields or 'defaultObjectAcl' in fields:
1113 for acl in AclTranslation.BotoObjectAclToMessage(
1114 bucket.get_def_acl(headers=headers)):
1115 try:
1116 cloud_api_bucket.defaultObjectAcl.append(acl)
1117 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1118 translated_exception = self._TranslateBotoException(
1119 e, bucket_name=bucket.name)
1120 if (translated_exception and
1121 isinstance(translated_exception,
1122 AccessDeniedException)):
1123 # JSON API doesn't differentiate between a blank ACL list
1124 # and an access denied, so this is intentionally left blank.
1125 pass
1126 else:
1127 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1128 if not fields or 'lifecycle' in fields:
1129 try:
1130 boto_lifecycle = bucket_uri.get_lifecycle_config()
1131 cloud_api_bucket.lifecycle = (
1132 LifecycleTranslation.BotoLifecycleToMessage(boto_lifecycle))
1133 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1134 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1135 if not fields or 'logging' in fields:
1136 try:
1137 boto_logging = bucket_uri.get_logging_config()
1138 if boto_logging and 'Logging' in boto_logging:
1139 logging_config = boto_logging['Logging']
1140 log_object_prefix_present = 'LogObjectPrefix' in logging_config
1141 log_bucket_present = 'LogBucket' in logging_config
1142 if log_object_prefix_present or log_bucket_present:
1143 cloud_api_bucket.logging = apitools_messages.Bucket.LoggingValue()
1144 if log_object_prefix_present:
1145 cloud_api_bucket.logging.logObjectPrefix = (
1146 logging_config['LogObjectPrefix'])
1147 if log_bucket_present:
1148 cloud_api_bucket.logging.logBucket = logging_config['LogBucket']
1149 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1150 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1151 if not fields or 'website' in fields:
1152 try:
1153 boto_website = bucket_uri.get_website_config()
1154 if boto_website and 'WebsiteConfiguration' in boto_website:
1155 website_config = boto_website['WebsiteConfiguration']
1156 main_page_suffix_present = 'MainPageSuffix' in website_config
1157 not_found_page_present = 'NotFoundPage' in website_config
1158 if main_page_suffix_present or not_found_page_present:
1159 cloud_api_bucket.website = apitools_messages.Bucket.WebsiteValue()
1160 if main_page_suffix_present:
1161 cloud_api_bucket.website.mainPageSuffix = (
1162 website_config['MainPageSuffix'])
1163 if not_found_page_present:
1164 cloud_api_bucket.website.notFoundPage = (
1165 website_config['NotFoundPage'])
1166 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1167 self._TranslateExceptionAndRaise(e, bucket_name=bucket.name)
1168 if not fields or 'location' in fields:
1169 cloud_api_bucket.location = bucket_uri.get_location()
1170 if not fields or 'versioning' in fields:
1171 versioning = bucket_uri.get_versioning_config(headers=headers)
1172 if versioning:
1173 if (self.provider == 's3' and 'Versioning' in versioning and
1174 versioning['Versioning'] == 'Enabled'):
1175 cloud_api_bucket.versioning = (
1176 apitools_messages.Bucket.VersioningValue(enabled=True))
1177 elif self.provider == 'gs':
1178 cloud_api_bucket.versioning = (
1179 apitools_messages.Bucket.VersioningValue(enabled=True))
1181 # For S3 long bucket listing we do not support CORS, lifecycle, website, and
1182 # logging translation. The individual commands can be used to get
1183 # the XML equivalents for S3.
1184 return cloud_api_bucket
1186 def _BotoKeyToObject(self, key, fields=None):
1187 """Constructs an apitools Object from a boto key.
1189 Args:
1190 key: Boto key to construct Object from.
1191 fields: If present, construct the apitools Object with only this set of
1192 metadata fields.
1194 Returns:
1195 apitools Object corresponding to key.
1197 custom_metadata = None
1198 if not fields or 'metadata' in fields:
1199 custom_metadata = self._TranslateBotoKeyCustomMetadata(key)
1200 cache_control = None
1201 if not fields or 'cacheControl' in fields:
1202 cache_control = getattr(key, 'cache_control', None)
1203 component_count = None
1204 if not fields or 'componentCount' in fields:
1205 component_count = getattr(key, 'component_count', None)
1206 content_disposition = None
1207 if not fields or 'contentDisposition' in fields:
1208 content_disposition = getattr(key, 'content_disposition', None)
1209 # Other fields like updated and ACL depend on the generation
1210 # of the object, so populate that regardless of whether it was requested.
1211 generation = self._TranslateBotoKeyGeneration(key)
1212 metageneration = None
1213 if not fields or 'metageneration' in fields:
1214 metageneration = self._TranslateBotoKeyMetageneration(key)
1215 updated = None
1216 # Translation code to avoid a dependency on dateutil.
1217 if not fields or 'updated' in fields:
1218 updated = self._TranslateBotoKeyTimestamp(key)
1219 etag = None
1220 if not fields or 'etag' in fields:
1221 etag = getattr(key, 'etag', None)
1222 if etag:
1223 etag = etag.strip('"\'')
1224 crc32c = None
1225 if not fields or 'crc32c' in fields:
1226 if hasattr(key, 'cloud_hashes') and 'crc32c' in key.cloud_hashes:
1227 crc32c = base64.encodestring(key.cloud_hashes['crc32c']).rstrip('\n')
1228 md5_hash = None
1229 if not fields or 'md5Hash' in fields:
1230 if hasattr(key, 'cloud_hashes') and 'md5' in key.cloud_hashes:
1231 md5_hash = base64.encodestring(key.cloud_hashes['md5']).rstrip('\n')
1232 elif self._GetMD5FromETag(getattr(key, 'etag', None)):
1233 md5_hash = Base64EncodeHash(self._GetMD5FromETag(key.etag))
1234 elif self.provider == 's3':
1235 # S3 etags are MD5s for non-multi-part objects, but multi-part objects
1236 # (which include all objects >= 5 GB) have a custom checksum
1237 # implementation that is not currently supported by gsutil.
1238 self.logger.warn(
1239 'Non-MD5 etag (%s) present for key %s, data integrity checks are '
1240 'not possible.', key.etag, key)
1242 # Serialize the boto key in the media link if it is requested. This
1243 # way we can later access the key without adding an HTTP call.
1244 media_link = None
1245 if not fields or 'mediaLink' in fields:
1246 media_link = binascii.b2a_base64(
1247 pickle.dumps(key, pickle.HIGHEST_PROTOCOL))
1248 size = None
1249 if not fields or 'size' in fields:
1250 size = key.size or 0
1251 storage_class = None
1252 if not fields or 'storageClass' in fields:
1253 storage_class = getattr(key, 'storage_class', None)
1255 cloud_api_object = apitools_messages.Object(
1256 bucket=key.bucket.name,
1257 name=key.name,
1258 size=size,
1259 contentEncoding=key.content_encoding,
1260 contentLanguage=key.content_language,
1261 contentType=key.content_type,
1262 cacheControl=cache_control,
1263 contentDisposition=content_disposition,
1264 etag=etag,
1265 crc32c=crc32c,
1266 md5Hash=md5_hash,
1267 generation=generation,
1268 metageneration=metageneration,
1269 componentCount=component_count,
1270 updated=updated,
1271 metadata=custom_metadata,
1272 mediaLink=media_link,
1273 storageClass=storage_class)
1275 # Remaining functions amend cloud_api_object.
1276 self._TranslateDeleteMarker(key, cloud_api_object)
1277 if not fields or 'acl' in fields:
1278 generation_str = GenerationFromUrlAndString(
1279 StorageUrlFromString(self.provider), generation)
1280 self._TranslateBotoKeyAcl(key, cloud_api_object,
1281 generation=generation_str)
1283 return cloud_api_object
1285 def _TranslateBotoKeyCustomMetadata(self, key):
1286 """Populates an apitools message from custom metadata in the boto key."""
1287 custom_metadata = None
1288 if getattr(key, 'metadata', None):
1289 custom_metadata = apitools_messages.Object.MetadataValue(
1290 additionalProperties=[])
1291 for k, v in key.metadata.iteritems():
1292 if k.lower() == 'content-language':
1293 # Work around content-language being inserted into custom metadata.
1294 continue
1295 custom_metadata.additionalProperties.append(
1296 apitools_messages.Object.MetadataValue.AdditionalProperty(
1297 key=k, value=v))
1298 return custom_metadata
1300 def _TranslateBotoKeyGeneration(self, key):
1301 """Returns the generation/version_id number from the boto key if present."""
1302 generation = None
1303 if self.provider == 'gs':
1304 if getattr(key, 'generation', None):
1305 generation = long(key.generation)
1306 elif self.provider == 's3':
1307 if getattr(key, 'version_id', None):
1308 generation = EncodeStringAsLong(key.version_id)
1309 return generation
1311 def _TranslateBotoKeyMetageneration(self, key):
1312 """Returns the metageneration number from the boto key if present."""
1313 metageneration = None
1314 if self.provider == 'gs':
1315 if getattr(key, 'metageneration', None):
1316 metageneration = long(key.metageneration)
1317 return metageneration
1319 def _TranslateBotoKeyTimestamp(self, key):
1320 """Parses the timestamp from the boto key into an datetime object.
1322 This avoids a dependency on dateutil.
1324 Args:
1325 key: Boto key to get timestamp from.
1327 Returns:
1328 datetime object if string is parsed successfully, None otherwise.
1330 if key.last_modified:
1331 if '.' in key.last_modified:
1332 key_us_timestamp = key.last_modified.rstrip('Z') + '000Z'
1333 else:
1334 key_us_timestamp = key.last_modified.rstrip('Z') + '.000000Z'
1335 fmt = '%Y-%m-%dT%H:%M:%S.%fZ'
1336 try:
1337 return datetime.datetime.strptime(key_us_timestamp, fmt)
1338 except ValueError:
1339 try:
1340 # Try alternate format
1341 fmt = '%a, %d %b %Y %H:%M:%S %Z'
1342 return datetime.datetime.strptime(key.last_modified, fmt)
1343 except ValueError:
1344 # Could not parse the time; leave updated as None.
1345 return None
1347 def _TranslateDeleteMarker(self, key, cloud_api_object):
1348 """Marks deleted objects with a metadata value (for S3 compatibility)."""
1349 if isinstance(key, DeleteMarker):
1350 if not cloud_api_object.metadata:
1351 cloud_api_object.metadata = apitools_messages.Object.MetadataValue()
1352 cloud_api_object.metadata.additionalProperties = []
1353 cloud_api_object.metadata.additionalProperties.append(
1354 apitools_messages.Object.MetadataValue.AdditionalProperty(
1355 key=S3_DELETE_MARKER_GUID, value=True))
1357 def _TranslateBotoKeyAcl(self, key, cloud_api_object, generation=None):
1358 """Updates cloud_api_object with the ACL from the boto key."""
1359 storage_uri_for_key = self._StorageUriForObject(key.bucket.name, key.name,
1360 generation=generation)
1361 headers = {}
1362 self._AddApiVersionToHeaders(headers)
1363 try:
1364 if self.provider == 'gs':
1365 key_acl = storage_uri_for_key.get_acl(headers=headers)
1366 # key.get_acl() does not support versioning so we need to use
1367 # storage_uri to ensure we're getting the versioned ACL.
1368 for acl in AclTranslation.BotoObjectAclToMessage(key_acl):
1369 cloud_api_object.acl.append(acl)
1370 if self.provider == 's3':
1371 key_acl = key.get_xml_acl(headers=headers)
1372 # ACLs for s3 are different and we use special markers to represent
1373 # them in the gsutil Cloud API.
1374 AddS3MarkerAclToObjectMetadata(cloud_api_object, key_acl)
1375 except boto.exception.GSResponseError, e:
1376 if e.status == 403:
1377 # Consume access denied exceptions to mimic JSON behavior of simply
1378 # returning None if sufficient permission is not present. The caller
1379 # needs to handle the case where the ACL is not populated.
1380 pass
1381 else:
1382 raise
1384 def _TranslateExceptionAndRaise(self, e, bucket_name=None, object_name=None,
1385 generation=None):
1386 """Translates a Boto exception and raises the translated or original value.
1388 Args:
1389 e: Any Exception.
1390 bucket_name: Optional bucket name in request that caused the exception.
1391 object_name: Optional object name in request that caused the exception.
1392 generation: Optional generation in request that caused the exception.
1394 Raises:
1395 Translated CloudApi exception, or the original exception if it was not
1396 translatable.
1398 translated_exception = self._TranslateBotoException(
1399 e, bucket_name=bucket_name, object_name=object_name,
1400 generation=generation)
1401 if translated_exception:
1402 raise translated_exception
1403 else:
1404 raise
1406 def _TranslateBotoException(self, e, bucket_name=None, object_name=None,
1407 generation=None):
1408 """Translates boto exceptions into their gsutil Cloud API equivalents.
1410 Args:
1411 e: Any exception in TRANSLATABLE_BOTO_EXCEPTIONS.
1412 bucket_name: Optional bucket name in request that caused the exception.
1413 object_name: Optional object name in request that caused the exception.
1414 generation: Optional generation in request that caused the exception.
1416 Returns:
1417 CloudStorageApiServiceException for translatable exceptions, None
1418 otherwise.
1420 Because we're using isinstance, check for subtypes first.
1422 if isinstance(e, boto.exception.StorageResponseError):
1423 if e.status == 400:
1424 return BadRequestException(e.code, status=e.status, body=e.body)
1425 elif e.status == 401 or e.status == 403:
1426 return AccessDeniedException(e.code, status=e.status, body=e.body)
1427 elif e.status == 404:
1428 if bucket_name:
1429 if object_name:
1430 return CreateObjectNotFoundException(e.status, self.provider,
1431 bucket_name, object_name,
1432 generation=generation)
1433 return CreateBucketNotFoundException(e.status, self.provider,
1434 bucket_name)
1435 return NotFoundException(e.code, status=e.status, body=e.body)
1436 elif e.status == 409 and e.code and 'BucketNotEmpty' in e.code:
1437 return NotEmptyException('BucketNotEmpty (%s)' % bucket_name,
1438 status=e.status, body=e.body)
1439 elif e.status == 410:
1440 # 410 errors should always cause us to start over - either the UploadID
1441 # has expired or there was a server-side problem that requires starting
1442 # the upload over from scratch.
1443 return ResumableUploadStartOverException(e.message)
1444 elif e.status == 412:
1445 return PreconditionException(e.code, status=e.status, body=e.body)
1446 if isinstance(e, boto.exception.StorageCreateError):
1447 return ServiceException('Bucket already exists.', status=e.status,
1448 body=e.body)
1450 if isinstance(e, boto.exception.BotoServerError):
1451 return ServiceException(e.message, status=e.status, body=e.body)
1453 if isinstance(e, boto.exception.InvalidUriError):
1454 # Work around textwrap when searching for this string.
1455 if e.message and NON_EXISTENT_OBJECT_REGEX.match(e.message.encode(UTF8)):
1456 return NotFoundException(e.message, status=404)
1457 return InvalidUrlError(e.message)
1459 if isinstance(e, boto.exception.ResumableUploadException):
1460 if e.disposition == boto.exception.ResumableTransferDisposition.ABORT:
1461 return ResumableUploadAbortException(e.message)
1462 elif (e.disposition ==
1463 boto.exception.ResumableTransferDisposition.START_OVER):
1464 return ResumableUploadStartOverException(e.message)
1465 else:
1466 return ResumableUploadException(e.message)
1468 if isinstance(e, boto.exception.ResumableDownloadException):
1469 return ResumableDownloadException(e.message)
1471 return None
1473 # For function docstrings, see CloudApiDelegator class.
1474 def XmlPassThroughGetAcl(self, storage_url, def_obj_acl=False):
1475 """See CloudApiDelegator class for function doc strings."""
1476 try:
1477 uri = boto.storage_uri(
1478 storage_url.url_string, suppress_consec_slashes=False,
1479 bucket_storage_uri_class=self.bucket_storage_uri_class,
1480 debug=self.debug)
1481 if def_obj_acl:
1482 return uri.get_def_acl()
1483 else:
1484 return uri.get_acl()
1485 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1486 self._TranslateExceptionAndRaise(e)
1488 def XmlPassThroughSetAcl(self, acl_text, storage_url, canned=True,
1489 def_obj_acl=False):
1490 """See CloudApiDelegator class for function doc strings."""
1491 try:
1492 uri = boto.storage_uri(
1493 storage_url.url_string, suppress_consec_slashes=False,
1494 bucket_storage_uri_class=self.bucket_storage_uri_class,
1495 debug=self.debug)
1496 if canned:
1497 if def_obj_acl:
1498 canned_acls = uri.canned_acls()
1499 if acl_text not in canned_acls:
1500 raise CommandException('Invalid canned ACL "%s".' % acl_text)
1501 uri.set_def_acl(acl_text, uri.object_name)
1502 else:
1503 canned_acls = uri.canned_acls()
1504 if acl_text not in canned_acls:
1505 raise CommandException('Invalid canned ACL "%s".' % acl_text)
1506 uri.set_acl(acl_text, uri.object_name)
1507 else:
1508 if def_obj_acl:
1509 uri.set_def_xml_acl(acl_text, uri.object_name)
1510 else:
1511 uri.set_xml_acl(acl_text, uri.object_name)
1512 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1513 self._TranslateExceptionAndRaise(e)
1515 # pylint: disable=catching-non-exception
1516 def XmlPassThroughSetCors(self, cors_text, storage_url):
1517 """See CloudApiDelegator class for function doc strings."""
1518 # Parse XML document and convert into Cors object.
1519 if storage_url.scheme == 's3':
1520 cors_obj = S3Cors()
1521 else:
1522 cors_obj = Cors()
1523 h = handler.XmlHandler(cors_obj, None)
1524 try:
1525 xml.sax.parseString(cors_text, h)
1526 except SaxExceptions.SAXParseException, e:
1527 raise CommandException('Requested CORS is invalid: %s at line %s, '
1528 'column %s' % (e.getMessage(), e.getLineNumber(),
1529 e.getColumnNumber()))
1531 try:
1532 uri = boto.storage_uri(
1533 storage_url.url_string, suppress_consec_slashes=False,
1534 bucket_storage_uri_class=self.bucket_storage_uri_class,
1535 debug=self.debug)
1536 uri.set_cors(cors_obj, False)
1537 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1538 self._TranslateExceptionAndRaise(e)
1540 def XmlPassThroughGetCors(self, storage_url):
1541 """See CloudApiDelegator class for function doc strings."""
1542 uri = boto.storage_uri(
1543 storage_url.url_string, suppress_consec_slashes=False,
1544 bucket_storage_uri_class=self.bucket_storage_uri_class,
1545 debug=self.debug)
1546 try:
1547 cors = uri.get_cors(False)
1548 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1549 self._TranslateExceptionAndRaise(e)
1551 parsed_xml = xml.dom.minidom.parseString(cors.to_xml().encode(UTF8))
1552 # Pretty-print the XML to make it more easily human editable.
1553 return parsed_xml.toprettyxml(indent=' ')
1555 def XmlPassThroughGetLifecycle(self, storage_url):
1556 """See CloudApiDelegator class for function doc strings."""
1557 try:
1558 uri = boto.storage_uri(
1559 storage_url.url_string, suppress_consec_slashes=False,
1560 bucket_storage_uri_class=self.bucket_storage_uri_class,
1561 debug=self.debug)
1562 lifecycle = uri.get_lifecycle_config(False)
1563 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1564 self._TranslateExceptionAndRaise(e)
1566 parsed_xml = xml.dom.minidom.parseString(lifecycle.to_xml().encode(UTF8))
1567 # Pretty-print the XML to make it more easily human editable.
1568 return parsed_xml.toprettyxml(indent=' ')
1570 def XmlPassThroughSetLifecycle(self, lifecycle_text, storage_url):
1571 """See CloudApiDelegator class for function doc strings."""
1572 # Parse XML document and convert into lifecycle object.
1573 if storage_url.scheme == 's3':
1574 lifecycle_obj = S3Lifecycle()
1575 else:
1576 lifecycle_obj = LifecycleConfig()
1577 h = handler.XmlHandler(lifecycle_obj, None)
1578 try:
1579 xml.sax.parseString(lifecycle_text, h)
1580 except SaxExceptions.SAXParseException, e:
1581 raise CommandException(
1582 'Requested lifecycle config is invalid: %s at line %s, column %s' %
1583 (e.getMessage(), e.getLineNumber(), e.getColumnNumber()))
1585 try:
1586 uri = boto.storage_uri(
1587 storage_url.url_string, suppress_consec_slashes=False,
1588 bucket_storage_uri_class=self.bucket_storage_uri_class,
1589 debug=self.debug)
1590 uri.configure_lifecycle(lifecycle_obj, False)
1591 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1592 self._TranslateExceptionAndRaise(e)
1594 def XmlPassThroughGetLogging(self, storage_url):
1595 """See CloudApiDelegator class for function doc strings."""
1596 try:
1597 uri = boto.storage_uri(
1598 storage_url.url_string, suppress_consec_slashes=False,
1599 bucket_storage_uri_class=self.bucket_storage_uri_class,
1600 debug=self.debug)
1601 logging_config_xml = UnaryDictToXml(uri.get_logging_config())
1602 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1603 self._TranslateExceptionAndRaise(e)
1605 return XmlParseString(logging_config_xml).toprettyxml()
1607 def XmlPassThroughGetWebsite(self, storage_url):
1608 """See CloudApiDelegator class for function doc strings."""
1609 try:
1610 uri = boto.storage_uri(
1611 storage_url.url_string, suppress_consec_slashes=False,
1612 bucket_storage_uri_class=self.bucket_storage_uri_class,
1613 debug=self.debug)
1614 web_config_xml = UnaryDictToXml(uri.get_website_config())
1615 except TRANSLATABLE_BOTO_EXCEPTIONS, e:
1616 self._TranslateExceptionAndRaise(e)
1618 return XmlParseString(web_config_xml).toprettyxml()