chrome/common/extensions/docs/server2/gcs_file_system.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import json
   6 import logging
   7 import posixpath
   8 import traceback
   9 import urllib
  10
  11 from docs_server_utils import StringIdentity
  12 from environment_wrappers import CreateUrlFetcher
  13 from file_system import FileSystem, FileNotFoundError, StatInfo
  14 from future import Future
  15 from path_util import (
  16     AssertIsDirectory, AssertIsFile, AssertIsValid, IsDirectory, Join)
  17
  18
  19 # See gcs_file_system_provider.py for documentation on using Google Cloud
  20 # Storage as a filesystem.
  21 #
  22 # Note that the path requirements for GCS are different for the docserver;
  23 # GCS requires that paths start with a /, we require that they don't.
  24
  25
  26 # Name of the file containing the Git hash of the latest commit sync'ed
  27 # to Cloud Storage. This file is generated by the Github->GCS sync script
  28 _LAST_COMMIT_HASH_FILENAME = '.__lastcommit.txt'
  29
  30
  31 # Base URL for GCS requests.
  32 _STORAGE_API_BASE = 'https://www.googleapis.com/storage/v1'
  33
  34
  35 class CloudStorageFileSystem(FileSystem):
  36   '''FileSystem implementation which fetches resources from Google Cloud
  37   Storage.
  38   '''
  39   def __init__(self, bucket, debug_bucket_prefix=None):
  40     self._bucket = bucket
  41     self._access_token = None
  42     self._last_commit_hash = None
  43     AssertIsValid(self._bucket)
  44
  45   def Read(self, paths, skip_not_found=False):
  46     def resolve():
  47       result = {}
  48       for path in paths:
  49         if IsDirectory(path):
  50           result[path] = self._ListDir(path)
  51         else:
  52           result[path] = self._ReadFile(path)
  53       return result
  54
  55     return Future(callback=resolve)
  56
  57   def Refresh(self):
  58     return Future(value=())
  59
  60   def Stat(self, path):
  61     AssertIsValid(path)
  62     return self._CreateStatInfo(path)
  63
  64   def GetIdentity(self):
  65     return '@'.join((self.__class__.__name__, StringIdentity(self._bucket)))
  66
  67   def _CreateStatInfo(self, path):
  68     if not self._last_commit_hash:
  69       self._last_commit_hash = self._ReadFile(_LAST_COMMIT_HASH_FILENAME)
  70     if IsDirectory(path):
  71       child_versions = dict((filename, self._last_commit_hash)
  72                             for filename in self._ListDir(path))
  73     else:
  74       child_versions = None
  75     return StatInfo(self._last_commit_hash, child_versions)
  76
  77   def _ReadFile(self, path):
  78     AssertIsFile(path)
  79     return self._FetchObjectData(path)
  80
  81   def _ListDir(self, path, recursive=False):
  82     AssertIsDirectory(path)
  83     # The listbucket method uses a prefix approach to simulate hierarchy.
  84     # Calling it with the "delimiter" argument set to '/' gets only files
  85     # directly inside the directory, not all recursive content.
  86
  87     # Subdirectories are returned in the 'prefixes' property, but they are
  88     # full paths from the root. This plucks off the name of the leaf with a
  89     # trailing slash.
  90     def path_from_prefix(prefix):
  91       return posixpath.split(posixpath.split(prefix)[0])[1] + '/'
  92
  93     query = { 'prefix': path }
  94     if not recursive:
  95       query['delimiter'] = '/'
  96     root_object = json.loads(self._FetchObject('', query=query))
  97     files = [posixpath.basename(o['name'])
  98              for o in root_object.get('items', [])]
  99     dirs = [path_from_prefix(prefix)
 100             for prefix in root_object.get('prefixes', [])]
 101     return files + dirs
 102
 103   def _FetchObject(self, path, query={}):
 104     # Escape the path, including slashes.
 105     url_path = urllib.quote(path.lstrip('/'), safe='')
 106     fetcher = CreateUrlFetcher()
 107     object_url = '%s/b/%s/o/%s' % (_STORAGE_API_BASE, self._bucket, url_path)
 108     response = fetcher.Fetch(object_url, query=query)
 109     if response.status_code != 200:
 110       raise FileNotFoundError(
 111           'Path %s not found in GCS bucket %s' % (path, self._bucket))
 112     return response.content
 113
 114   def _FetchObjectData(self, path, query={}):
 115     q = query.copy()
 116     q.update({ 'alt': 'media' })
 117     return self._FetchObject(path, query=q)
 118
 119   def __repr__(self):
 120     return 'CloudStorageFileSystem(%s)' % self._bucket