1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
11 from docs_server_utils
import StringIdentity
12 from environment_wrappers
import CreateUrlFetcher
13 from file_system
import FileSystem
, FileNotFoundError
, StatInfo
14 from future
import Future
15 from path_util
import (
16 AssertIsDirectory
, AssertIsFile
, AssertIsValid
, IsDirectory
, Join
)
19 # See gcs_file_system_provider.py for documentation on using Google Cloud
20 # Storage as a filesystem.
22 # Note that the path requirements for GCS are different for the docserver;
23 # GCS requires that paths start with a /, we require that they don't.
26 # Name of the file containing the Git hash of the latest commit sync'ed
27 # to Cloud Storage. This file is generated by the Github->GCS sync script
28 _LAST_COMMIT_HASH_FILENAME
= '.__lastcommit.txt'
31 # Base URL for GCS requests.
32 _STORAGE_API_BASE
= 'https://www.googleapis.com/storage/v1'
35 class CloudStorageFileSystem(FileSystem
):
36 '''FileSystem implementation which fetches resources from Google Cloud
39 def __init__(self
, bucket
, debug_bucket_prefix
=None):
41 self
._access
_token
= None
42 self
._last
_commit
_hash
= None
43 AssertIsValid(self
._bucket
)
45 def Read(self
, paths
, skip_not_found
=False):
50 result
[path
] = self
._ListDir
(path
)
52 result
[path
] = self
._ReadFile
(path
)
55 return Future(callback
=resolve
)
58 return Future(value
=())
62 return self
._CreateStatInfo
(path
)
64 def GetIdentity(self
):
65 return '@'.join((self
.__class
__.__name
__, StringIdentity(self
._bucket
)))
67 def _CreateStatInfo(self
, path
):
68 if not self
._last
_commit
_hash
:
69 self
._last
_commit
_hash
= self
._ReadFile
(_LAST_COMMIT_HASH_FILENAME
)
71 child_versions
= dict((filename
, self
._last
_commit
_hash
)
72 for filename
in self
._ListDir
(path
))
75 return StatInfo(self
._last
_commit
_hash
, child_versions
)
77 def _ReadFile(self
, path
):
79 return self
._FetchObjectData
(path
)
81 def _ListDir(self
, path
, recursive
=False):
82 AssertIsDirectory(path
)
83 # The listbucket method uses a prefix approach to simulate hierarchy.
84 # Calling it with the "delimiter" argument set to '/' gets only files
85 # directly inside the directory, not all recursive content.
87 # Subdirectories are returned in the 'prefixes' property, but they are
88 # full paths from the root. This plucks off the name of the leaf with a
90 def path_from_prefix(prefix
):
91 return posixpath
.split(posixpath
.split(prefix
)[0])[1] + '/'
93 query
= { 'prefix': path
}
95 query
['delimiter'] = '/'
96 root_object
= json
.loads(self
._FetchObject
('', query
=query
))
97 files
= [posixpath
.basename(o
['name'])
98 for o
in root_object
.get('items', [])]
99 dirs
= [path_from_prefix(prefix
)
100 for prefix
in root_object
.get('prefixes', [])]
103 def _FetchObject(self
, path
, query
={}):
104 # Escape the path, including slashes.
105 url_path
= urllib
.quote(path
.lstrip('/'), safe
='')
106 fetcher
= CreateUrlFetcher()
107 object_url
= '%s/b/%s/o/%s' % (_STORAGE_API_BASE
, self
._bucket
, url_path
)
108 response
= fetcher
.Fetch(object_url
, query
=query
)
109 if response
.status_code
!= 200:
110 raise FileNotFoundError(
111 'Path %s not found in GCS bucket %s' % (path
, self
._bucket
))
112 return response
.content
114 def _FetchObjectData(self
, path
, query
={}):
116 q
.update({ 'alt': 'media' })
117 return self
._FetchObject
(path
, query
=q
)
120 return 'CloudStorageFileSystem(%s)' % self
._bucket