Enable Enterprise enrollment on desktop builds.
[chromium-blink-merge.git] / chrome / common / extensions / docs / server2 / new_github_file_system.py
blob5aa1c2d9d82bdda36e22785ea0b893f16bcc0f52
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import json
6 import logging
7 from cStringIO import StringIO
8 import posixpath
9 import traceback
10 from zipfile import ZipFile
12 import appengine_blobstore as blobstore
13 from appengine_url_fetcher import AppEngineUrlFetcher
14 from appengine_wrappers import urlfetch
15 from docs_server_utils import StringIdentity
16 from file_system import FileNotFoundError, FileSystem, FileSystemError, StatInfo
17 from future import Future
18 from object_store_creator import ObjectStoreCreator
19 from path_util import AssertIsDirectory, IsDirectory
20 import url_constants
23 _GITHUB_REPOS_NAMESPACE = 'GithubRepos'
26 def _LoadCredentials(object_store_creator):
27 '''Returns (username, password) from |password_store|.
28 '''
29 password_store = object_store_creator.Create(
30 GithubFileSystem,
31 app_version=None,
32 category='password',
33 start_empty=False)
34 password_data = password_store.GetMulti(('username', 'password')).Get()
35 return password_data.get('username'), password_data.get('password')
38 class _GithubZipFile(object):
39 '''A view of a ZipFile with a more convenient interface which ignores the
40 'zipball' prefix that all paths have. The zip files that come straight from
41 GitHub have paths like ['zipball/foo.txt', 'zipball/bar.txt'] but we only
42 care about ['foo.txt', 'bar.txt'].
43 '''
45 @classmethod
46 def Create(cls, repo_name, blob):
47 try:
48 zipball = ZipFile(StringIO(blob))
49 except:
50 logging.warning('zipball "%s" is not a valid zip' % repo_name)
51 return None
53 if not zipball.namelist():
54 logging.warning('zipball "%s" is empty' % repo_name)
55 return None
57 name_prefix = None # probably 'zipball'
58 paths = []
59 for name in zipball.namelist():
60 prefix, path = name.split('/', 1)
61 if name_prefix and prefix != name_prefix:
62 logging.warning('zipball "%s" has names with inconsistent prefix: %s' %
63 (repo_name, zipball.namelist()))
64 return None
65 name_prefix = prefix
66 paths.append(path)
67 return cls(zipball, name_prefix, paths)
69 def __init__(self, zipball, name_prefix, paths):
70 self._zipball = zipball
71 self._name_prefix = name_prefix
72 self._paths = paths
74 def Paths(self):
75 '''Return all file paths in this zip file.
76 '''
77 return self._paths
79 def List(self, path):
80 '''Returns all files within a directory at |path|. Not recursive. Paths
81 are returned relative to |path|.
82 '''
83 AssertIsDirectory(path)
84 return [p[len(path):] for p in self._paths
85 if p != path and
86 p.startswith(path) and
87 '/' not in p[len(path):].rstrip('/')]
89 def Read(self, path):
90 '''Returns the contents of |path|. Raises a KeyError if it doesn't exist.
91 '''
92 return self._zipball.read(posixpath.join(self._name_prefix, path))
95 class GithubFileSystem(FileSystem):
96 '''Allows reading from a github.com repository.
97 '''
98 @staticmethod
99 def Create(owner, repo, object_store_creator):
100 '''Creates a GithubFileSystem that corresponds to a single github repository
101 specified by |owner| and |repo|.
103 return GithubFileSystem(
104 url_constants.GITHUB_REPOS,
105 owner,
106 repo,
107 object_store_creator,
108 AppEngineUrlFetcher)
110 @staticmethod
111 def ForTest(repo, fake_fetcher, path=None, object_store_creator=None):
112 '''Creates a GithubFileSystem that can be used for testing. It reads zip
113 files and commit data from server2/test_data/github_file_system/test_owner
114 instead of github.com. It reads from files specified by |repo|.
116 return GithubFileSystem(
117 path if path is not None else 'test_data/github_file_system',
118 'test_owner',
119 repo,
120 object_store_creator or ObjectStoreCreator.ForTest(),
121 fake_fetcher)
123 def __init__(self, base_url, owner, repo, object_store_creator, Fetcher):
124 self._repo_key = posixpath.join(owner, repo)
125 self._repo_url = posixpath.join(base_url, owner, repo)
126 self._username, self._password = _LoadCredentials(object_store_creator)
127 self._blobstore = blobstore.AppEngineBlobstore()
128 self._fetcher = Fetcher(self._repo_url)
129 # Stores whether the github is up-to-date. This will either be True or
130 # empty, the emptiness most likely due to this being a cron run.
131 self._up_to_date_cache = object_store_creator.Create(
132 GithubFileSystem, category='up-to-date')
133 # Caches the zip file's stat. Overrides start_empty=False and use
134 # |self._up_to_date_cache| to determine whether we need to refresh.
135 self._stat_cache = object_store_creator.Create(
136 GithubFileSystem, category='stat-cache', start_empty=False)
138 # Created lazily in |_EnsureRepoZip|.
139 self._repo_zip = None
141 def _EnsureRepoZip(self):
142 '''Initializes |self._repo_zip| if it hasn't already been (i.e. if
143 _EnsureRepoZip has never been called before). In that case |self._repo_zip|
144 will be set to a Future of _GithubZipFile and the fetch process started,
145 whether that be from a blobstore or if necessary all the way from GitHub.
147 if self._repo_zip is not None:
148 return
150 repo_key, repo_url, username, password = (
151 self._repo_key, self._repo_url, self._username, self._password)
153 def fetch_from_blobstore(version):
154 '''Returns a Future which resolves to the _GithubZipFile for this repo
155 fetched from blobstore.
157 blob = None
158 try:
159 blob = self._blobstore.Get(repo_url, _GITHUB_REPOS_NAMESPACE)
160 except blobstore.BlobNotFoundError:
161 pass
163 if blob is None:
164 logging.warning('No blob for %s found in datastore' % repo_key)
165 return fetch_from_github(version)
167 repo_zip = _GithubZipFile.Create(repo_key, blob)
168 if repo_zip is None:
169 logging.warning('Blob for %s was corrupted in blobstore!?' % repo_key)
170 return fetch_from_github(version)
172 return Future(value=repo_zip)
174 def fetch_from_github(version):
175 '''Returns a Future which resolves to the _GithubZipFile for this repo
176 fetched new from GitHub, then writes it to blobstore and |version| to the
177 stat caches.
179 github_future = self._fetcher.FetchAsync(
180 'zipball', username=username, password=password)
181 def resolve():
182 try:
183 blob = github_future.Get().content
184 except urlfetch.DownloadError:
185 raise FileSystemError('Failed to download repo %s file from %s' %
186 (repo_key, repo_url))
188 repo_zip = _GithubZipFile.Create(repo_key, blob)
189 if repo_zip is None:
190 raise FileSystemError('Blob for %s was fetched corrupted from %s' %
191 (repo_key, repo_url))
193 self._blobstore.Set(self._repo_url, blob, _GITHUB_REPOS_NAMESPACE)
194 self._up_to_date_cache.Set(repo_key, True)
195 self._stat_cache.Set(repo_key, version)
196 return repo_zip
197 return Future(callback=resolve)
199 # To decide whether we need to re-stat, and from there whether to re-fetch,
200 # make use of ObjectStore's start-empty configuration. If
201 # |object_store_creator| is configured to start empty then our creator
202 # wants to refresh (e.g. running a cron), so fetch the live stat from
203 # GitHub. If the stat hasn't changed since last time then no reason to
204 # re-fetch from GitHub, just take from blobstore.
206 cached_version = self._stat_cache.Get(repo_key).Get()
207 if self._up_to_date_cache.Get(repo_key).Get() is None:
208 # This is either a cron or an instance where a cron has never been run.
209 live_version = self._FetchLiveVersion(username, password)
210 if cached_version != live_version:
211 # Note: branch intentionally triggered if |cached_version| is None.
212 logging.info('%s has changed, fetching from GitHub.' % repo_url)
213 self._repo_zip = fetch_from_github(live_version)
214 else:
215 # Already up to date. Fetch from blobstore. No need to set up-to-date
216 # to True here since it'll already be set for instances, and it'll
217 # never be set for crons.
218 logging.info('%s is up to date.' % repo_url)
219 self._repo_zip = fetch_from_blobstore(cached_version)
220 else:
221 # Instance where cron has been run. It should be in blobstore.
222 self._repo_zip = fetch_from_blobstore(cached_version)
224 assert self._repo_zip is not None
226 def _FetchLiveVersion(self, username, password):
227 '''Fetches the current repository version from github.com and returns it.
228 The version is a 'sha' hash value.
230 # TODO(kalman): Do this asynchronously (use FetchAsync).
231 result = self._fetcher.Fetch(
232 'commits/HEAD', username=username, password=password)
234 try:
235 return json.loads(result.content)['sha']
236 except (KeyError, ValueError):
237 raise FileSystemError('Error parsing JSON from repo %s: %s' %
238 (self._repo_url, traceback.format_exc()))
240 def Refresh(self):
241 return self.ReadSingle('')
243 def Read(self, paths, skip_not_found=False):
244 '''Returns a directory mapping |paths| to the contents of the file at each
245 path. If path ends with a '/', it is treated as a directory and is mapped to
246 a list of filenames in that directory.
248 self._EnsureRepoZip()
249 def resolve():
250 repo_zip = self._repo_zip.Get()
251 reads = {}
252 for path in paths:
253 if path not in repo_zip.Paths():
254 raise FileNotFoundError('"%s": %s not found' % (self._repo_key, path))
255 if IsDirectory(path):
256 reads[path] = repo_zip.List(path)
257 else:
258 reads[path] = repo_zip.Read(path)
259 return reads
260 return Future(callback=resolve)
262 def Stat(self, path):
263 '''Stats |path| returning its version as as StatInfo object. If |path| ends
264 with a '/', it is assumed to be a directory and the StatInfo object returned
265 includes child_versions for all paths in the directory.
267 File paths do not include the name of the zip file, which is arbitrary and
268 useless to consumers.
270 Because the repository will only be downloaded once per server version, all
271 stat versions are always 0.
273 self._EnsureRepoZip()
274 repo_zip = self._repo_zip.Get()
276 if path not in repo_zip.Paths():
277 raise FileNotFoundError('"%s" does not contain file "%s"' %
278 (self._repo_key, path))
280 version = self._stat_cache.Get(self._repo_key).Get()
281 assert version is not None, ('There was a zipball in datastore; there '
282 'should be a version cached for it')
284 stat_info = StatInfo(version)
285 if IsDirectory(path):
286 stat_info.child_versions = dict((p, StatInfo(version))
287 for p in repo_zip.List(path))
288 return stat_info
290 def GetIdentity(self):
291 return '%s' % StringIdentity(self.__class__.__name__ + self._repo_key)
293 def __repr__(self):
294 return '%s(key=%s, url=%s)' % (type(self).__name__,
295 self._repo_key,
296 self._repo_url)