Revert of Update the extension whitelist for application host change. (patchset ...
[chromium-blink-merge.git] / third_party / google_appengine_cloudstorage / cloudstorage / cloudstorage_api.py
blob51b4c842b798d9d148c59a60747ae851ee218958
1 # Copyright 2012 Google Inc. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing,
10 # software distributed under the License is distributed on an
11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 # either express or implied. See the License for the specific
13 # language governing permissions and limitations under the License.
15 """File Interface for Google Cloud Storage."""
19 from __future__ import with_statement
23 __all__ = ['delete',
24 'listbucket',
25 'open',
26 'stat',
29 import logging
30 import StringIO
31 import urllib
32 import xml.etree.cElementTree as ET
33 from . import api_utils
34 from . import common
35 from . import errors
36 from . import storage_api
40 def open(filename,
41 mode='r',
42 content_type=None,
43 options=None,
44 read_buffer_size=storage_api.ReadBuffer.DEFAULT_BUFFER_SIZE,
45 retry_params=None,
46 _account_id=None):
47 """Opens a Google Cloud Storage file and returns it as a File-like object.
49 Args:
50 filename: A Google Cloud Storage filename of form '/bucket/filename'.
51 mode: 'r' for reading mode. 'w' for writing mode.
52 In reading mode, the file must exist. In writing mode, a file will
53 be created or be overrode.
54 content_type: The MIME type of the file. str. Only valid in writing mode.
55 options: A str->basestring dict to specify additional headers to pass to
56 GCS e.g. {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
57 Supported options are x-goog-acl, x-goog-meta-, cache-control,
58 content-disposition, and content-encoding.
59 Only valid in writing mode.
60 See https://developers.google.com/storage/docs/reference-headers
61 for details.
62 read_buffer_size: The buffer size for read. Read keeps a buffer
63 and prefetches another one. To minimize blocking for large files,
64 always read by buffer size. To minimize number of RPC requests for
65 small files, set a large buffer size. Max is 30MB.
66 retry_params: An instance of api_utils.RetryParams for subsequent calls
67 to GCS from this file handle. If None, the default one is used.
68 _account_id: Internal-use only.
70 Returns:
71 A reading or writing buffer that supports File-like interface. Buffer
72 must be closed after operations are done.
74 Raises:
75 errors.AuthorizationError: if authorization failed.
76 errors.NotFoundError: if an object that's expected to exist doesn't.
77 ValueError: invalid open mode or if content_type or options are specified
78 in reading mode.
79 """
80 common.validate_file_path(filename)
81 api = storage_api._get_storage_api(retry_params=retry_params,
82 account_id=_account_id)
83 filename = api_utils._quote_filename(filename)
85 if mode == 'w':
86 common.validate_options(options)
87 return storage_api.StreamingBuffer(api, filename, content_type, options)
88 elif mode == 'r':
89 if content_type or options:
90 raise ValueError('Options and content_type can only be specified '
91 'for writing mode.')
92 return storage_api.ReadBuffer(api,
93 filename,
94 buffer_size=read_buffer_size)
95 else:
96 raise ValueError('Invalid mode %s.' % mode)
99 def delete(filename, retry_params=None, _account_id=None):
100 """Delete a Google Cloud Storage file.
102 Args:
103 filename: A Google Cloud Storage filename of form '/bucket/filename'.
104 retry_params: An api_utils.RetryParams for this call to GCS. If None,
105 the default one is used.
106 _account_id: Internal-use only.
108 Raises:
109 errors.NotFoundError: if the file doesn't exist prior to deletion.
111 api = storage_api._get_storage_api(retry_params=retry_params,
112 account_id=_account_id)
113 common.validate_file_path(filename)
114 filename = api_utils._quote_filename(filename)
115 status, resp_headers, _ = api.delete_object(filename)
116 errors.check_status(status, [204], filename, resp_headers=resp_headers)
119 def stat(filename, retry_params=None, _account_id=None):
120 """Get GCSFileStat of a Google Cloud storage file.
122 Args:
123 filename: A Google Cloud Storage filename of form '/bucket/filename'.
124 retry_params: An api_utils.RetryParams for this call to GCS. If None,
125 the default one is used.
126 _account_id: Internal-use only.
128 Returns:
129 a GCSFileStat object containing info about this file.
131 Raises:
132 errors.AuthorizationError: if authorization failed.
133 errors.NotFoundError: if an object that's expected to exist doesn't.
135 common.validate_file_path(filename)
136 api = storage_api._get_storage_api(retry_params=retry_params,
137 account_id=_account_id)
138 status, headers, _ = api.head_object(api_utils._quote_filename(filename))
139 errors.check_status(status, [200], filename, resp_headers=headers)
140 file_stat = common.GCSFileStat(
141 filename=filename,
142 st_size=headers.get('content-length'),
143 st_ctime=common.http_time_to_posix(headers.get('last-modified')),
144 etag=headers.get('etag'),
145 content_type=headers.get('content-type'),
146 metadata=common.get_metadata(headers))
148 return file_stat
151 def _copy2(src, dst, metadata=None, retry_params=None):
152 """Copy the file content from src to dst.
154 Internal use only!
156 Args:
157 src: /bucket/filename
158 dst: /bucket/filename
159 metadata: a dict of metadata for this copy. If None, old metadata is copied.
160 For example, {'x-goog-meta-foo': 'bar'}.
161 retry_params: An api_utils.RetryParams for this call to GCS. If None,
162 the default one is used.
164 Raises:
165 errors.AuthorizationError: if authorization failed.
166 errors.NotFoundError: if an object that's expected to exist doesn't.
168 common.validate_file_path(src)
169 common.validate_file_path(dst)
171 if metadata is None:
172 metadata = {}
173 copy_meta = 'COPY'
174 else:
175 copy_meta = 'REPLACE'
176 metadata.update({'x-goog-copy-source': src,
177 'x-goog-metadata-directive': copy_meta})
179 api = storage_api._get_storage_api(retry_params=retry_params)
180 status, resp_headers, _ = api.put_object(
181 api_utils._quote_filename(dst), headers=metadata)
182 errors.check_status(status, [200], src, metadata, resp_headers)
185 def listbucket(path_prefix, marker=None, prefix=None, max_keys=None,
186 delimiter=None, retry_params=None, _account_id=None):
187 """Returns a GCSFileStat iterator over a bucket.
189 Optional arguments can limit the result to a subset of files under bucket.
191 This function has two modes:
192 1. List bucket mode: Lists all files in the bucket without any concept of
193 hierarchy. GCS doesn't have real directory hierarchies.
194 2. Directory emulation mode: If you specify the 'delimiter' argument,
195 it is used as a path separator to emulate a hierarchy of directories.
196 In this mode, the "path_prefix" argument should end in the delimiter
197 specified (thus designates a logical directory). The logical directory's
198 contents, both files and subdirectories, are listed. The names of
199 subdirectories returned will end with the delimiter. So listbucket
200 can be called with the subdirectory name to list the subdirectory's
201 contents.
203 Args:
204 path_prefix: A Google Cloud Storage path of format "/bucket" or
205 "/bucket/prefix". Only objects whose fullpath starts with the
206 path_prefix will be returned.
207 marker: Another path prefix. Only objects whose fullpath starts
208 lexicographically after marker will be returned (exclusive).
209 prefix: Deprecated. Use path_prefix.
210 max_keys: The limit on the number of objects to return. int.
211 For best performance, specify max_keys only if you know how many objects
212 you want. Otherwise, this method requests large batches and handles
213 pagination for you.
214 delimiter: Use to turn on directory mode. str of one or multiple chars
215 that your bucket uses as its directory separator.
216 retry_params: An api_utils.RetryParams for this call to GCS. If None,
217 the default one is used.
218 _account_id: Internal-use only.
220 Examples:
221 For files "/bucket/a",
222 "/bucket/bar/1"
223 "/bucket/foo",
224 "/bucket/foo/1", "/bucket/foo/2/1", "/bucket/foo/3/1",
226 Regular mode:
227 listbucket("/bucket/f", marker="/bucket/foo/1")
228 will match "/bucket/foo/2/1", "/bucket/foo/3/1".
230 Directory mode:
231 listbucket("/bucket/", delimiter="/")
232 will match "/bucket/a, "/bucket/bar/" "/bucket/foo", "/bucket/foo/".
233 listbucket("/bucket/foo/", delimiter="/")
234 will match "/bucket/foo/1", "/bucket/foo/2/", "/bucket/foo/3/"
236 Returns:
237 Regular mode:
238 A GCSFileStat iterator over matched files ordered by filename.
239 The iterator returns GCSFileStat objects. filename, etag, st_size,
240 st_ctime, and is_dir are set.
242 Directory emulation mode:
243 A GCSFileStat iterator over matched files and directories ordered by
244 name. The iterator returns GCSFileStat objects. For directories,
245 only the filename and is_dir fields are set.
247 The last name yielded can be used as next call's marker.
249 if prefix:
250 common.validate_bucket_path(path_prefix)
251 bucket = path_prefix
252 else:
253 bucket, prefix = common._process_path_prefix(path_prefix)
255 if marker and marker.startswith(bucket):
256 marker = marker[len(bucket) + 1:]
258 api = storage_api._get_storage_api(retry_params=retry_params,
259 account_id=_account_id)
260 options = {}
261 if marker:
262 options['marker'] = marker
263 if max_keys:
264 options['max-keys'] = max_keys
265 if prefix:
266 options['prefix'] = prefix
267 if delimiter:
268 options['delimiter'] = delimiter
270 return _Bucket(api, bucket, options)
273 class _Bucket(object):
274 """A wrapper for a GCS bucket as the return value of listbucket."""
276 def __init__(self, api, path, options):
277 """Initialize.
279 Args:
280 api: storage_api instance.
281 path: bucket path of form '/bucket'.
282 options: a dict of listbucket options. Please see listbucket doc.
284 self._init(api, path, options)
286 def _init(self, api, path, options):
287 self._api = api
288 self._path = path
289 self._options = options.copy()
290 self._get_bucket_fut = self._api.get_bucket_async(
291 self._path + '?' + urllib.urlencode(self._options))
292 self._last_yield = None
293 self._new_max_keys = self._options.get('max-keys')
295 def __getstate__(self):
296 options = self._options
297 if self._last_yield:
298 options['marker'] = self._last_yield.filename[len(self._path) + 1:]
299 if self._new_max_keys is not None:
300 options['max-keys'] = self._new_max_keys
301 return {'api': self._api,
302 'path': self._path,
303 'options': options}
305 def __setstate__(self, state):
306 self._init(state['api'], state['path'], state['options'])
308 def __iter__(self):
309 """Iter over the bucket.
311 Yields:
312 GCSFileStat: a GCSFileStat for an object in the bucket.
313 They are ordered by GCSFileStat.filename.
315 total = 0
316 max_keys = self._options.get('max-keys')
318 while self._get_bucket_fut:
319 status, resp_headers, content = self._get_bucket_fut.get_result()
320 errors.check_status(status, [200], self._path, resp_headers=resp_headers,
321 extras=self._options)
323 if self._should_get_another_batch(content):
324 self._get_bucket_fut = self._api.get_bucket_async(
325 self._path + '?' + urllib.urlencode(self._options))
326 else:
327 self._get_bucket_fut = None
329 root = ET.fromstring(content)
330 dirs = self._next_dir_gen(root)
331 files = self._next_file_gen(root)
332 next_file = files.next()
333 next_dir = dirs.next()
335 while ((max_keys is None or total < max_keys) and
336 not (next_file is None and next_dir is None)):
337 total += 1
338 if next_file is None:
339 self._last_yield = next_dir
340 next_dir = dirs.next()
341 elif next_dir is None:
342 self._last_yield = next_file
343 next_file = files.next()
344 elif next_dir < next_file:
345 self._last_yield = next_dir
346 next_dir = dirs.next()
347 elif next_file < next_dir:
348 self._last_yield = next_file
349 next_file = files.next()
350 else:
351 logging.error(
352 'Should never reach. next file is %r. next dir is %r.',
353 next_file, next_dir)
354 if self._new_max_keys:
355 self._new_max_keys -= 1
356 yield self._last_yield
358 def _next_file_gen(self, root):
359 """Generator for next file element in the document.
361 Args:
362 root: root element of the XML tree.
364 Yields:
365 GCSFileStat for the next file.
367 for e in root.getiterator(common._T_CONTENTS):
368 st_ctime, size, etag, key = None, None, None, None
369 for child in e.getiterator('*'):
370 if child.tag == common._T_LAST_MODIFIED:
371 st_ctime = common.dt_str_to_posix(child.text)
372 elif child.tag == common._T_ETAG:
373 etag = child.text
374 elif child.tag == common._T_SIZE:
375 size = child.text
376 elif child.tag == common._T_KEY:
377 key = child.text
378 yield common.GCSFileStat(self._path + '/' + key,
379 size, etag, st_ctime)
380 e.clear()
381 yield None
383 def _next_dir_gen(self, root):
384 """Generator for next directory element in the document.
386 Args:
387 root: root element in the XML tree.
389 Yields:
390 GCSFileStat for the next directory.
392 for e in root.getiterator(common._T_COMMON_PREFIXES):
393 yield common.GCSFileStat(
394 self._path + '/' + e.find(common._T_PREFIX).text,
395 st_size=None, etag=None, st_ctime=None, is_dir=True)
396 e.clear()
397 yield None
399 def _should_get_another_batch(self, content):
400 """Whether to issue another GET bucket call.
402 Args:
403 content: response XML.
405 Returns:
406 True if should, also update self._options for the next request.
407 False otherwise.
409 if ('max-keys' in self._options and
410 self._options['max-keys'] <= common._MAX_GET_BUCKET_RESULT):
411 return False
413 elements = self._find_elements(
414 content, set([common._T_IS_TRUNCATED,
415 common._T_NEXT_MARKER]))
416 if elements.get(common._T_IS_TRUNCATED, 'false').lower() != 'true':
417 return False
419 next_marker = elements.get(common._T_NEXT_MARKER)
420 if next_marker is None:
421 self._options.pop('marker', None)
422 return False
423 self._options['marker'] = next_marker
424 return True
426 def _find_elements(self, result, elements):
427 """Find interesting elements from XML.
429 This function tries to only look for specified elements
430 without parsing the entire XML. The specified elements is better
431 located near the beginning.
433 Args:
434 result: response XML.
435 elements: a set of interesting element tags.
437 Returns:
438 A dict from element tag to element value.
440 element_mapping = {}
441 result = StringIO.StringIO(result)
442 for _, e in ET.iterparse(result, events=('end',)):
443 if not elements:
444 break
445 if e.tag in elements:
446 element_mapping[e.tag] = e.text
447 elements.remove(e.tag)
448 return element_mapping