Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / third_party / google_appengine_cloudstorage / cloudstorage / common.py
blob9976b919f7c981cdccf2efd18d5308c995abaf49
1 # Copyright 2012 Google Inc. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing,
10 # software distributed under the License is distributed on an
11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 # either express or implied. See the License for the specific
13 # language governing permissions and limitations under the License.
15 """Helpers shared by cloudstorage_stub and cloudstorage_api."""
21 __all__ = ['CS_XML_NS',
22 'CSFileStat',
23 'dt_str_to_posix',
24 'local_api_url',
25 'LOCAL_GCS_ENDPOINT',
26 'local_run',
27 'get_access_token',
28 'get_metadata',
29 'GCSFileStat',
30 'http_time_to_posix',
31 'memory_usage',
32 'posix_time_to_http',
33 'posix_to_dt_str',
34 'set_access_token',
35 'validate_options',
36 'validate_bucket_name',
37 'validate_bucket_path',
38 'validate_file_path',
42 import calendar
43 import datetime
44 from email import utils as email_utils
45 import logging
46 import os
47 import re
49 try:
50 from google.appengine.api import runtime
51 except ImportError:
52 from google.appengine.api import runtime
55 _GCS_BUCKET_REGEX_BASE = r'[a-z0-9\.\-_]{3,63}'
56 _GCS_BUCKET_REGEX = re.compile(_GCS_BUCKET_REGEX_BASE + r'$')
57 _GCS_BUCKET_PATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'$')
58 _GCS_PATH_PREFIX_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'.*')
59 _GCS_FULLPATH_REGEX = re.compile(r'/' + _GCS_BUCKET_REGEX_BASE + r'/.*')
60 _GCS_METADATA = ['x-goog-meta-',
61 'content-disposition',
62 'cache-control',
63 'content-encoding']
64 _GCS_OPTIONS = _GCS_METADATA + ['x-goog-acl']
65 CS_XML_NS = 'http://doc.s3.amazonaws.com/2006-03-01'
66 LOCAL_GCS_ENDPOINT = '/_ah/gcs'
67 _access_token = ''
70 _MAX_GET_BUCKET_RESULT = 1000
73 def set_access_token(access_token):
74 """Set the shared access token to authenticate with Google Cloud Storage.
76 When set, the library will always attempt to communicate with the
77 real Google Cloud Storage with this token even when running on dev appserver.
78 Note the token could expire so it's up to you to renew it.
80 When absent, the library will automatically request and refresh a token
81 on appserver, or when on dev appserver, talk to a Google Cloud Storage
82 stub.
84 Args:
85 access_token: you can get one by run 'gsutil -d ls' and copy the
86 str after 'Bearer'.
87 """
88 global _access_token
89 _access_token = access_token
92 def get_access_token():
93 """Returns the shared access token."""
94 return _access_token
97 class GCSFileStat(object):
98 """Container for GCS file stat."""
100 def __init__(self,
101 filename,
102 st_size,
103 etag,
104 st_ctime,
105 content_type=None,
106 metadata=None,
107 is_dir=False):
108 """Initialize.
110 For files, the non optional arguments are always set.
111 For directories, only filename and is_dir is set.
113 Args:
114 filename: a Google Cloud Storage filename of form '/bucket/filename'.
115 st_size: file size in bytes. long compatible.
116 etag: hex digest of the md5 hash of the file's content. str.
117 st_ctime: posix file creation time. float compatible.
118 content_type: content type. str.
119 metadata: a str->str dict of user specified options when creating
120 the file. Possible keys are x-goog-meta-, content-disposition,
121 content-encoding, and cache-control.
122 is_dir: True if this represents a directory. False if this is a real file.
124 self.filename = filename
125 self.is_dir = is_dir
126 self.st_size = None
127 self.st_ctime = None
128 self.etag = None
129 self.content_type = content_type
130 self.metadata = metadata
132 if not is_dir:
133 self.st_size = long(st_size)
134 self.st_ctime = float(st_ctime)
135 if etag[0] == '"' and etag[-1] == '"':
136 etag = etag[1:-1]
137 self.etag = etag
139 def __repr__(self):
140 if self.is_dir:
141 return '(directory: %s)' % self.filename
143 return (
144 '(filename: %(filename)s, st_size: %(st_size)s, '
145 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
146 'content_type: %(content_type)s, '
147 'metadata: %(metadata)s)' %
148 dict(filename=self.filename,
149 st_size=self.st_size,
150 st_ctime=self.st_ctime,
151 etag=self.etag,
152 content_type=self.content_type,
153 metadata=self.metadata))
155 def __cmp__(self, other):
156 if not isinstance(other, self.__class__):
157 raise ValueError('Argument to cmp must have the same type. '
158 'Expect %s, got %s', self.__class__.__name__,
159 other.__class__.__name__)
160 if self.filename > other.filename:
161 return 1
162 elif self.filename < other.filename:
163 return -1
164 return 0
166 def __hash__(self):
167 if self.etag:
168 return hash(self.etag)
169 return hash(self.filename)
172 CSFileStat = GCSFileStat
175 def get_metadata(headers):
176 """Get user defined options from HTTP response headers."""
177 return dict((k, v) for k, v in headers.iteritems()
178 if any(k.lower().startswith(valid) for valid in _GCS_METADATA))
181 def validate_bucket_name(name):
182 """Validate a Google Storage bucket name.
184 Args:
185 name: a Google Storage bucket name with no prefix or suffix.
187 Raises:
188 ValueError: if name is invalid.
190 _validate_path(name)
191 if not _GCS_BUCKET_REGEX.match(name):
192 raise ValueError('Bucket should be 3-63 characters long using only a-z,'
193 '0-9, underscore, dash or dot but got %s' % name)
196 def validate_bucket_path(path):
197 """Validate a Google Cloud Storage bucket path.
199 Args:
200 path: a Google Storage bucket path. It should have form '/bucket'.
202 Raises:
203 ValueError: if path is invalid.
205 _validate_path(path)
206 if not _GCS_BUCKET_PATH_REGEX.match(path):
207 raise ValueError('Bucket should have format /bucket '
208 'but got %s' % path)
211 def validate_file_path(path):
212 """Validate a Google Cloud Storage file path.
214 Args:
215 path: a Google Storage file path. It should have form '/bucket/filename'.
217 Raises:
218 ValueError: if path is invalid.
220 _validate_path(path)
221 if not _GCS_FULLPATH_REGEX.match(path):
222 raise ValueError('Path should have format /bucket/filename '
223 'but got %s' % path)
226 def _process_path_prefix(path_prefix):
227 """Validate and process a Google Cloud Stoarge path prefix.
229 Args:
230 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
231 or '/bucket/' or '/bucket'.
233 Raises:
234 ValueError: if path is invalid.
236 Returns:
237 a tuple of /bucket and prefix. prefix can be None.
239 _validate_path(path_prefix)
240 if not _GCS_PATH_PREFIX_REGEX.match(path_prefix):
241 raise ValueError('Path prefix should have format /bucket, /bucket/, '
242 'or /bucket/prefix but got %s.' % path_prefix)
243 bucket_name_end = path_prefix.find('/', 1)
244 bucket = path_prefix
245 prefix = None
246 if bucket_name_end != -1:
247 bucket = path_prefix[:bucket_name_end]
248 prefix = path_prefix[bucket_name_end + 1:] or None
249 return bucket, prefix
252 def _validate_path(path):
253 """Basic validation of Google Storage paths.
255 Args:
256 path: a Google Storage path. It should have form '/bucket/filename'
257 or '/bucket'.
259 Raises:
260 ValueError: if path is invalid.
261 TypeError: if path is not of type basestring.
263 if not path:
264 raise ValueError('Path is empty')
265 if not isinstance(path, basestring):
266 raise TypeError('Path should be a string but is %s (%s).' %
267 (path.__class__, path))
270 def validate_options(options):
271 """Validate Google Cloud Storage options.
273 Args:
274 options: a str->basestring dict of options to pass to Google Cloud Storage.
276 Raises:
277 ValueError: if option is not supported.
278 TypeError: if option is not of type str or value of an option
279 is not of type basestring.
281 if not options:
282 return
284 for k, v in options.iteritems():
285 if not isinstance(k, str):
286 raise TypeError('option %r should be a str.' % k)
287 if not any(k.lower().startswith(valid) for valid in _GCS_OPTIONS):
288 raise ValueError('option %s is not supported.' % k)
289 if not isinstance(v, basestring):
290 raise TypeError('value %r for option %s should be of type basestring.' %
291 (v, k))
294 def http_time_to_posix(http_time):
295 """Convert HTTP time format to posix time.
297 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
298 for http time format.
300 Args:
301 http_time: time in RFC 2616 format. e.g.
302 "Mon, 20 Nov 1995 19:12:08 GMT".
304 Returns:
305 A float of secs from unix epoch.
307 if http_time is not None:
308 return email_utils.mktime_tz(email_utils.parsedate_tz(http_time))
311 def posix_time_to_http(posix_time):
312 """Convert posix time to HTML header time format.
314 Args:
315 posix_time: unix time.
317 Returns:
318 A datatime str in RFC 2616 format.
320 if posix_time:
321 return email_utils.formatdate(posix_time, usegmt=True)
324 _DT_FORMAT = '%Y-%m-%dT%H:%M:%S'
327 def dt_str_to_posix(dt_str):
328 """format str to posix.
330 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
331 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
332 between date and time when they are on the same line.
333 Z indicates UTC (zero meridian).
335 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
337 This is used to parse LastModified node from GCS's GET bucket XML response.
339 Args:
340 dt_str: A datetime str.
342 Returns:
343 A float of secs from unix epoch. By posix definition, epoch is midnight
344 1970/1/1 UTC.
346 parsable, _ = dt_str.split('.')
347 dt = datetime.datetime.strptime(parsable, _DT_FORMAT)
348 return calendar.timegm(dt.utctimetuple())
351 def posix_to_dt_str(posix):
352 """Reverse of str_to_datetime.
354 This is used by GCS stub to generate GET bucket XML response.
356 Args:
357 posix: A float of secs from unix epoch.
359 Returns:
360 A datetime str.
362 dt = datetime.datetime.utcfromtimestamp(posix)
363 dt_str = dt.strftime(_DT_FORMAT)
364 return dt_str + '.000Z'
367 def local_run():
368 """Whether we should hit GCS dev appserver stub."""
369 server_software = os.environ.get('SERVER_SOFTWARE')
370 if server_software is None:
371 return True
372 if 'remote_api' in server_software:
373 return False
374 if server_software.startswith(('Development', 'testutil')):
375 return True
376 return False
379 def local_api_url():
380 """Return URL for GCS emulation on dev appserver."""
381 return 'http://%s%s' % (os.environ.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT)
384 def memory_usage(method):
385 """Log memory usage before and after a method."""
386 def wrapper(*args, **kwargs):
387 logging.info('Memory before method %s is %s.',
388 method.__name__, runtime.memory_usage().current())
389 result = method(*args, **kwargs)
390 logging.info('Memory after method %s is %s',
391 method.__name__, runtime.memory_usage().current())
392 return result
393 return wrapper
396 def _add_ns(tagname):
397 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS,
398 'tag': tagname}
401 _T_CONTENTS = _add_ns('Contents')
402 _T_LAST_MODIFIED = _add_ns('LastModified')
403 _T_ETAG = _add_ns('ETag')
404 _T_KEY = _add_ns('Key')
405 _T_SIZE = _add_ns('Size')
406 _T_PREFIX = _add_ns('Prefix')
407 _T_COMMON_PREFIXES = _add_ns('CommonPrefixes')
408 _T_NEXT_MARKER = _add_ns('NextMarker')
409 _T_IS_TRUNCATED = _add_ns('IsTruncated')