Disable view source for Developer Tools.
[chromium-blink-merge.git] / chrome / common / extensions / docs / server2 / new_github_file_system.py
blobf9d0266d4b9bcd9b7f182962949e3ed829e8da76
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import json
6 import logging
7 from cStringIO import StringIO
8 import posixpath
9 import traceback
10 from zipfile import ZipFile
12 import appengine_blobstore as blobstore
13 from appengine_url_fetcher import AppEngineUrlFetcher
14 from appengine_wrappers import urlfetch
15 from docs_server_utils import StringIdentity
16 from file_system import FileNotFoundError, FileSystem, FileSystemError, StatInfo
17 from future import Future, Gettable
18 from object_store_creator import ObjectStoreCreator
19 import url_constants
22 _GITHUB_REPOS_NAMESPACE = 'GithubRepos'
25 def _LoadCredentials(object_store_creator):
26 '''Returns (username, password) from |password_store|.
27 '''
28 password_store = object_store_creator.Create(
29 GithubFileSystem,
30 app_version=None,
31 category='password',
32 start_empty=False)
33 password_data = password_store.GetMulti(('username', 'password')).Get()
34 return password_data.get('username'), password_data.get('password')
37 class _GithubZipFile(object):
38 '''A view of a ZipFile with a more convenient interface which ignores the
39 'zipball' prefix that all paths have. The zip files that come straight from
40 GitHub have paths like ['zipball/foo.txt', 'zipball/bar.txt'] but we only
41 care about ['foo.txt', 'bar.txt'].
42 '''
44 @classmethod
45 def Create(cls, repo_name, blob):
46 try:
47 zipball = ZipFile(StringIO(blob))
48 except:
49 logging.warning('zipball "%s" is not a valid zip' % repo_name)
50 return None
52 if not zipball.namelist():
53 logging.warning('zipball "%s" is empty' % repo_name)
54 return None
56 name_prefix = None # probably 'zipball'
57 paths = []
58 for name in zipball.namelist():
59 prefix, path = name.split('/', 1)
60 if name_prefix and prefix != name_prefix:
61 logging.warning('zipball "%s" has names with inconsistent prefix: %s' %
62 (repo_name, zipball.namelist()))
63 return None
64 name_prefix = prefix
65 paths.append(path)
66 return cls(zipball, name_prefix, paths)
68 def __init__(self, zipball, name_prefix, paths):
69 self._zipball = zipball
70 self._name_prefix = name_prefix
71 self._paths = paths
73 def Paths(self):
74 '''Return all file paths in this zip file.
75 '''
76 return self._paths
78 def List(self, path):
79 '''Returns all files within a directory at |path|. Not recursive. Paths
80 are returned relative to |path|.
81 '''
82 assert path == '' or path.endswith('/')
83 return [p[len(path):] for p in self._paths
84 if p != path and
85 p.startswith(path) and
86 '/' not in p[len(path):].rstrip('/')]
88 def Read(self, path):
89 '''Returns the contents of |path|. Raises a KeyError if it doesn't exist.
90 '''
91 return self._zipball.read(posixpath.join(self._name_prefix, path))
94 class GithubFileSystem(FileSystem):
95 '''Allows reading from a github.com repository.
96 '''
97 @staticmethod
98 def Create(owner, repo, object_store_creator):
99 '''Creates a GithubFileSystem that corresponds to a single github repository
100 specified by |owner| and |repo|.
102 return GithubFileSystem(
103 url_constants.GITHUB_REPOS,
104 owner,
105 repo,
106 object_store_creator,
107 AppEngineUrlFetcher)
109 @staticmethod
110 def ForTest(repo, fake_fetcher, path=None, object_store_creator=None):
111 '''Creates a GithubFileSystem that can be used for testing. It reads zip
112 files and commit data from server2/test_data/github_file_system/test_owner
113 instead of github.com. It reads from files specified by |repo|.
115 return GithubFileSystem(
116 path if path is not None else 'test_data/github_file_system',
117 'test_owner',
118 repo,
119 object_store_creator or ObjectStoreCreator.ForTest(),
120 fake_fetcher)
122 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher):
123 self._repo_key = '%s/%s' % (owner, repo)
124 self._repo_url = '%s/%s/%s' % (base_url, owner, repo)
125 self._username, self._password = _LoadCredentials(object_store_creator)
126 self._blobstore = blobstore.AppEngineBlobstore()
127 self._fetcher = Fetcher(self._repo_url)
128 # Stores whether the github is up-to-date. This will either be True or
129 # empty, the emptiness most likely due to this being a cron run.
130 self._up_to_date_cache = object_store_creator.Create(
131 GithubFileSystem, category='up-to-date')
132 # Caches the zip file's stat. Overrides start_empty=False and use
133 # |self._up_to_date_cache| to determine whether we need to refresh.
134 self._stat_cache = object_store_creator.Create(
135 GithubFileSystem, category='stat-cache', start_empty=False)
137 # Created lazily in |_EnsureRepoZip|.
138 self._repo_zip = None
140 def _EnsureRepoZip(self):
141 '''Initializes |self._repo_zip| if it hasn't already been (i.e. if
142 _EnsureRepoZip has never been called before). In that case |self._repo_zip|
143 will be set to a Future of _GithubZipFile and the fetch process started,
144 whether that be from a blobstore or if necessary all the way from GitHub.
146 if self._repo_zip is not None:
147 return
149 repo_key, repo_url, username, password = (
150 self._repo_key, self._repo_url, self._username, self._password)
152 def fetch_from_blobstore(version):
153 '''Returns a Future which resolves to the _GithubZipFile for this repo
154 fetched from blobstore.
156 blob = None
157 try:
158 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE)
159 except blobstore.BlobNotFoundError:
160 pass
162 if blob is None:
163 logging.warning('No blob for %s found in datastore' % repo_key)
164 return fetch_from_github(version)
166 repo_zip = _GithubZipFile.Create(repo_key, blob)
167 if repo_zip is None:
168 logging.warning('Blob for %s was corrupted in blobstore!?' % repo_key)
169 return fetch_from_github(version)
171 return Future(value=repo_zip)
173 def fetch_from_github(version):
174 '''Returns a Future which resolves to the _GithubZipFile for this repo
175 fetched new from GitHub, then writes it to blobstore and |version| to the
176 stat caches.
178 github_future = self._fetcher.FetchAsync(
179 'zipball', username=username, password=password)
180 def resolve():
181 try:
182 blob = github_future.Get().content
183 except urlfetch.DownloadError:
184 raise FileSystemError('Failed to download repo %s file from %s' %
185 (repo_key, repo_url))
187 repo_zip = _GithubZipFile.Create(repo_key, blob)
188 if repo_zip is None:
189 raise FileSystemError('Blob for %s was fetched corrupted from %s' %
190 (repo_key, repo_url))
192 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
193 self._up_to_date_cache.Set(repo_key, True)
194 self._stat_cache.Set(repo_key, version)
195 return repo_zip
196 return Future(delegate=Gettable(resolve))
198 # To decide whether we need to re-stat, and from there whether to re-fetch,
199 # make use of ObjectStore's start-empty configuration. If
200 # |object_store_creator| is configured to start empty then our creator
201 # wants to refresh (e.g. running a cron), so fetch the live stat from
202 # GitHub. If the stat hasn't changed since last time then no reason to
203 # re-fetch from GitHub, just take from blobstore.
205 cached_version = self._stat_cache.Get(repo_key).Get()
206 if self._up_to_date_cache.Get(repo_key).Get() is None:
207 # This is either a cron or an instance where a cron has never been run.
208 live_version = self._FetchLiveVersion(username, password)
209 if cached_version != live_version:
210 # Note: branch intentionally triggered if |cached_version| is None.
211 logging.info('%s has changed, fetching from GitHub.' % repo_url)
212 self._repo_zip = fetch_from_github(live_version)
213 else:
214 # Already up to date. Fetch from blobstore. No need to set up-to-date
215 # to True here since it'll already be set for instances, and it'll
216 # never be set for crons.
217 logging.info('%s is up to date.' % repo_url)
218 self._repo_zip = fetch_from_blobstore(cached_version)
219 else:
220 # Instance where cron has been run. It should be in blobstore.
221 self._repo_zip = fetch_from_blobstore(cached_version)
223 assert self._repo_zip is not None
225 def _FetchLiveVersion(self, username, password):
226 '''Fetches the current repository version from github.com and returns it.
227 The version is a 'sha' hash value.
229 # TODO(kalman): Do this asynchronously (use FetchAsync).
230 result = self._fetcher.Fetch(
231 'commits/HEAD', username=username, password=password)
233 try:
234 return json.loads(result.content)['sha']
235 except (KeyError, ValueError):
236 raise FileSystemError('Error parsing JSON from repo %s: %s' %
237 (self._repo_url, traceback.format_exc()))
239 def Refresh(self):
240 return self.ReadSingle('')
242 def Read(self, paths):
243 '''Returns a directory mapping |paths| to the contents of the file at each
244 path. If path ends with a '/', it is treated as a directory and is mapped to
245 a list of filenames in that directory.
247 self._EnsureRepoZip()
248 def resolve():
249 repo_zip = self._repo_zip.Get()
250 reads = {}
251 for path in paths:
252 if path not in repo_zip.Paths():
253 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path))
254 if path == '' or path.endswith('/'):
255 reads[path] = repo_zip.List(path)
256 else:
257 reads[path] = repo_zip.Read(path)
258 return reads
259 return Future(delegate=Gettable(resolve))
261 def Stat(self, path):
262 '''Stats |path| returning its version as as StatInfo object. If |path| ends
263 with a '/', it is assumed to be a directory and the StatInfo object returned
264 includes child_versions for all paths in the directory.
266 File paths do not include the name of the zip file, which is arbitrary and
267 useless to consumers.
269 Because the repository will only be downloaded once per server version, all
270 stat versions are always 0.
272 self._EnsureRepoZip()
273 repo_zip = self._repo_zip.Get()
275 if path not in repo_zip.Paths():
276 raise FileNotFoundError('"%s" does not contain file "%s"' %
277 (self._repo_key, path))
279 version = self._stat_cache.Get(self._repo_key).Get()
280 assert version is not None, ('There was a zipball in datastore; there '
281 'should be a version cached for it')
283 stat_info = StatInfo(version)
284 if path == '' or path.endswith('/'):
285 stat_info.child_versions = dict((p, StatInfo(version))
286 for p in repo_zip.List(path))
287 return stat_info
289 def GetIdentity(self):
290 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key)
292 def __repr__(self):
293 return '%s(key=%s, url=%s)' % (type(self).__name__,
294 self._repo_key,
295 self._repo_url)