1 # Copyright 2012 Google Inc. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing,
10 # software distributed under the License is distributed on an
11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 # either express or implied. See the License for the specific
13 # language governing permissions and limitations under the License.
15 """Helpers shared by cloudstorage_stub and cloudstorage_api."""
21 __all__
= ['CS_XML_NS',
36 'validate_bucket_name',
37 'validate_bucket_path',
44 from email
import utils
as email_utils
50 from google
.appengine
.api
import runtime
52 from google
.appengine
.api
import runtime
55 _GCS_BUCKET_REGEX_BASE
= r
'[a-z0-9\.\-_]{3,63}'
56 _GCS_BUCKET_REGEX
= re
.compile(_GCS_BUCKET_REGEX_BASE
+ r
'$')
57 _GCS_BUCKET_PATH_REGEX
= re
.compile(r
'/' + _GCS_BUCKET_REGEX_BASE
+ r
'$')
58 _GCS_PATH_PREFIX_REGEX
= re
.compile(r
'/' + _GCS_BUCKET_REGEX_BASE
+ r
'.*')
59 _GCS_FULLPATH_REGEX
= re
.compile(r
'/' + _GCS_BUCKET_REGEX_BASE
+ r
'/.*')
60 _GCS_METADATA
= ['x-goog-meta-',
61 'content-disposition',
64 _GCS_OPTIONS
= _GCS_METADATA
+ ['x-goog-acl']
65 CS_XML_NS
= 'http://doc.s3.amazonaws.com/2006-03-01'
66 LOCAL_GCS_ENDPOINT
= '/_ah/gcs'
70 _MAX_GET_BUCKET_RESULT
= 1000
73 def set_access_token(access_token
):
74 """Set the shared access token to authenticate with Google Cloud Storage.
76 When set, the library will always attempt to communicate with the
77 real Google Cloud Storage with this token even when running on dev appserver.
78 Note the token could expire so it's up to you to renew it.
80 When absent, the library will automatically request and refresh a token
81 on appserver, or when on dev appserver, talk to a Google Cloud Storage
85 access_token: you can get one by run 'gsutil -d ls' and copy the
89 _access_token
= access_token
92 def get_access_token():
93 """Returns the shared access token."""
97 class GCSFileStat(object):
98 """Container for GCS file stat."""
110 For files, the non optional arguments are always set.
111 For directories, only filename and is_dir is set.
114 filename: a Google Cloud Storage filename of form '/bucket/filename'.
115 st_size: file size in bytes. long compatible.
116 etag: hex digest of the md5 hash of the file's content. str.
117 st_ctime: posix file creation time. float compatible.
118 content_type: content type. str.
119 metadata: a str->str dict of user specified options when creating
120 the file. Possible keys are x-goog-meta-, content-disposition,
121 content-encoding, and cache-control.
122 is_dir: True if this represents a directory. False if this is a real file.
124 self
.filename
= filename
129 self
.content_type
= content_type
130 self
.metadata
= metadata
133 self
.st_size
= long(st_size
)
134 self
.st_ctime
= float(st_ctime
)
135 if etag
[0] == '"' and etag
[-1] == '"':
141 return '(directory: %s)' % self
.filename
144 '(filename: %(filename)s, st_size: %(st_size)s, '
145 'st_ctime: %(st_ctime)s, etag: %(etag)s, '
146 'content_type: %(content_type)s, '
147 'metadata: %(metadata)s)' %
148 dict(filename
=self
.filename
,
149 st_size
=self
.st_size
,
150 st_ctime
=self
.st_ctime
,
152 content_type
=self
.content_type
,
153 metadata
=self
.metadata
))
155 def __cmp__(self
, other
):
156 if not isinstance(other
, self
.__class
__):
157 raise ValueError('Argument to cmp must have the same type. '
158 'Expect %s, got %s', self
.__class
__.__name
__,
159 other
.__class
__.__name
__)
160 if self
.filename
> other
.filename
:
162 elif self
.filename
< other
.filename
:
168 return hash(self
.etag
)
169 return hash(self
.filename
)
172 CSFileStat
= GCSFileStat
175 def get_metadata(headers
):
176 """Get user defined options from HTTP response headers."""
177 return dict((k
, v
) for k
, v
in headers
.iteritems()
178 if any(k
.lower().startswith(valid
) for valid
in _GCS_METADATA
))
181 def validate_bucket_name(name
):
182 """Validate a Google Storage bucket name.
185 name: a Google Storage bucket name with no prefix or suffix.
188 ValueError: if name is invalid.
191 if not _GCS_BUCKET_REGEX
.match(name
):
192 raise ValueError('Bucket should be 3-63 characters long using only a-z,'
193 '0-9, underscore, dash or dot but got %s' % name
)
196 def validate_bucket_path(path
):
197 """Validate a Google Cloud Storage bucket path.
200 path: a Google Storage bucket path. It should have form '/bucket'.
203 ValueError: if path is invalid.
206 if not _GCS_BUCKET_PATH_REGEX
.match(path
):
207 raise ValueError('Bucket should have format /bucket '
211 def validate_file_path(path
):
212 """Validate a Google Cloud Storage file path.
215 path: a Google Storage file path. It should have form '/bucket/filename'.
218 ValueError: if path is invalid.
221 if not _GCS_FULLPATH_REGEX
.match(path
):
222 raise ValueError('Path should have format /bucket/filename '
226 def _process_path_prefix(path_prefix
):
227 """Validate and process a Google Cloud Stoarge path prefix.
230 path_prefix: a Google Cloud Storage path prefix of format '/bucket/prefix'
231 or '/bucket/' or '/bucket'.
234 ValueError: if path is invalid.
237 a tuple of /bucket and prefix. prefix can be None.
239 _validate_path(path_prefix
)
240 if not _GCS_PATH_PREFIX_REGEX
.match(path_prefix
):
241 raise ValueError('Path prefix should have format /bucket, /bucket/, '
242 'or /bucket/prefix but got %s.' % path_prefix
)
243 bucket_name_end
= path_prefix
.find('/', 1)
246 if bucket_name_end
!= -1:
247 bucket
= path_prefix
[:bucket_name_end
]
248 prefix
= path_prefix
[bucket_name_end
+ 1:] or None
249 return bucket
, prefix
252 def _validate_path(path
):
253 """Basic validation of Google Storage paths.
256 path: a Google Storage path. It should have form '/bucket/filename'
260 ValueError: if path is invalid.
261 TypeError: if path is not of type basestring.
264 raise ValueError('Path is empty')
265 if not isinstance(path
, basestring
):
266 raise TypeError('Path should be a string but is %s (%s).' %
267 (path
.__class
__, path
))
270 def validate_options(options
):
271 """Validate Google Cloud Storage options.
274 options: a str->basestring dict of options to pass to Google Cloud Storage.
277 ValueError: if option is not supported.
278 TypeError: if option is not of type str or value of an option
279 is not of type basestring.
284 for k
, v
in options
.iteritems():
285 if not isinstance(k
, str):
286 raise TypeError('option %r should be a str.' % k
)
287 if not any(k
.lower().startswith(valid
) for valid
in _GCS_OPTIONS
):
288 raise ValueError('option %s is not supported.' % k
)
289 if not isinstance(v
, basestring
):
290 raise TypeError('value %r for option %s should be of type basestring.' %
294 def http_time_to_posix(http_time
):
295 """Convert HTTP time format to posix time.
297 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1
298 for http time format.
301 http_time: time in RFC 2616 format. e.g.
302 "Mon, 20 Nov 1995 19:12:08 GMT".
305 A float of secs from unix epoch.
307 if http_time
is not None:
308 return email_utils
.mktime_tz(email_utils
.parsedate_tz(http_time
))
311 def posix_time_to_http(posix_time
):
312 """Convert posix time to HTML header time format.
315 posix_time: unix time.
318 A datatime str in RFC 2616 format.
321 return email_utils
.formatdate(posix_time
, usegmt
=True)
324 _DT_FORMAT
= '%Y-%m-%dT%H:%M:%S'
327 def dt_str_to_posix(dt_str
):
328 """format str to posix.
330 datetime str is of format %Y-%m-%dT%H:%M:%S.%fZ,
331 e.g. 2013-04-12T00:22:27.978Z. According to ISO 8601, T is a separator
332 between date and time when they are on the same line.
333 Z indicates UTC (zero meridian).
335 A pointer: http://www.cl.cam.ac.uk/~mgk25/iso-time.html
337 This is used to parse LastModified node from GCS's GET bucket XML response.
340 dt_str: A datetime str.
343 A float of secs from unix epoch. By posix definition, epoch is midnight
346 parsable
, _
= dt_str
.split('.')
347 dt
= datetime
.datetime
.strptime(parsable
, _DT_FORMAT
)
348 return calendar
.timegm(dt
.utctimetuple())
351 def posix_to_dt_str(posix
):
352 """Reverse of str_to_datetime.
354 This is used by GCS stub to generate GET bucket XML response.
357 posix: A float of secs from unix epoch.
362 dt
= datetime
.datetime
.utcfromtimestamp(posix
)
363 dt_str
= dt
.strftime(_DT_FORMAT
)
364 return dt_str
+ '.000Z'
368 """Whether we should hit GCS dev appserver stub."""
369 server_software
= os
.environ
.get('SERVER_SOFTWARE')
370 if server_software
is None:
372 if 'remote_api' in server_software
:
374 if server_software
.startswith(('Development', 'testutil')):
380 """Return URL for GCS emulation on dev appserver."""
381 return 'http://%s%s' % (os
.environ
.get('HTTP_HOST'), LOCAL_GCS_ENDPOINT
)
384 def memory_usage(method
):
385 """Log memory usage before and after a method."""
386 def wrapper(*args
, **kwargs
):
387 logging
.info('Memory before method %s is %s.',
388 method
.__name
__, runtime
.memory_usage().current())
389 result
= method(*args
, **kwargs
)
390 logging
.info('Memory after method %s is %s',
391 method
.__name
__, runtime
.memory_usage().current())
396 def _add_ns(tagname
):
397 return '{%(ns)s}%(tag)s' % {'ns': CS_XML_NS
,
401 _T_CONTENTS
= _add_ns('Contents')
402 _T_LAST_MODIFIED
= _add_ns('LastModified')
403 _T_ETAG
= _add_ns('ETag')
404 _T_KEY
= _add_ns('Key')
405 _T_SIZE
= _add_ns('Size')
406 _T_PREFIX
= _add_ns('Prefix')
407 _T_COMMON_PREFIXES
= _add_ns('CommonPrefixes')
408 _T_NEXT_MARKER
= _add_ns('NextMarker')
409 _T_IS_TRUNCATED
= _add_ns('IsTruncated')