chrome/common/extensions/docs/server2/subversion_file_system.py

   1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import posixpath
   6 import traceback
   7 import xml.dom.minidom as xml
   8 from xml.parsers.expat import ExpatError
   9
  10 from appengine_url_fetcher import AppEngineUrlFetcher
  11 from docs_server_utils import StringIdentity
  12 from file_system import (
  13     FileNotFoundError, FileSystem, FileSystemError, StatInfo)
  14 from future import Future
  15 import url_constants
  16
  17
  18 def _ParseHTML(html):
  19   '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
  20   of all mismatched tags.
  21   '''
  22   try:
  23     return xml.parseString(html)
  24   except ExpatError as e:
  25     return _ParseHTML('\n'.join(
  26         line for (i, line) in enumerate(html.split('\n'))
  27         if e.lineno != i + 1))
  28
  29 def _InnerText(node):
  30   '''Like node.innerText in JS DOM, but strips surrounding whitespace.
  31   '''
  32   text = []
  33   if node.nodeValue:
  34     text.append(node.nodeValue)
  35   if hasattr(node, 'childNodes'):
  36     for child_node in node.childNodes:
  37       text.append(_InnerText(child_node))
  38   return ''.join(text).strip()
  39
  40 def _CreateStatInfo(html):
  41   parent_version = None
  42   child_versions = {}
  43
  44   # Try all of the tables until we find the ones that contain the data (the
  45   # directory and file versions are in different tables).
  46   for table in _ParseHTML(html).getElementsByTagName('table'):
  47     # Within the table there is a list of files. However, there may be some
  48     # things beforehand; a header, "parent directory" list, etc. We will deal
  49     # with that below by being generous and just ignoring such rows.
  50     rows = table.getElementsByTagName('tr')
  51
  52     for row in rows:
  53       cells = row.getElementsByTagName('td')
  54
  55       # The version of the directory will eventually appear in the soup of
  56       # table rows, like this:
  57       #
  58       # <tr>
  59       #   <td>Directory revision:</td>
  60       #   <td><a href=... title="Revision 214692">214692</a> (of...)</td>
  61       # </tr>
  62       #
  63       # So look out for that.
  64       if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
  65         links = cells[1].getElementsByTagName('a')
  66         if len(links) != 2:
  67           raise FileSystemError('ViewVC assumption invalid: directory ' +
  68                                 'revision content did not have 2 <a> ' +
  69                                 ' elements, instead %s' % _InnerText(cells[1]))
  70         this_parent_version = _InnerText(links[0])
  71         int(this_parent_version)  # sanity check
  72         if parent_version is not None:
  73           raise FileSystemError('There was already a parent version %s, and ' +
  74                                 ' we just found a second at %s' %
  75                                 (parent_version, this_parent_version))
  76         parent_version = this_parent_version
  77
  78       # The version of each file is a list of rows with 5 cells: name, version,
  79       # age, author, and last log entry. Maybe the columns will change; we're
  80       # at the mercy viewvc, but this constant can be easily updated.
  81       if len(cells) != 5:
  82         continue
  83       name_element, version_element, _, __, ___ = cells
  84
  85       name = _InnerText(name_element)  # note: will end in / for directories
  86       try:
  87         version = int(_InnerText(version_element))
  88       except StandardError:
  89         continue
  90       child_versions[name] = str(version)
  91
  92     if parent_version and child_versions:
  93       break
  94
  95   return StatInfo(parent_version, child_versions)
  96
  97 class _AsyncFetchFuture(object):
  98   def __init__(self, paths, fetcher, args=None):
  99     def apply_args(path):
 100       return path if args is None else '%s?%s' % (path, args)
 101     # A list of tuples of the form (path, Future).
 102     self._fetches = [(path, fetcher.FetchAsync(apply_args(path)))
 103                      for path in paths]
 104     self._value = {}
 105     self._error = None
 106
 107   def _ListDir(self, directory):
 108     dom = xml.parseString(directory)
 109     files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')]
 110     if '..' in files:
 111       files.remove('..')
 112     return files
 113
 114   def Get(self):
 115     for path, future in self._fetches:
 116       try:
 117         result = future.Get()
 118       except Exception as e:
 119         raise FileSystemError('Error fetching %s for Get: %s' %
 120             (path, traceback.format_exc()))
 121
 122       if result.status_code == 404:
 123         raise FileNotFoundError('Got 404 when fetching %s for Get, content %s' %
 124             (path, result.content))
 125       if result.status_code != 200:
 126         raise FileSystemError('Got %s when fetching %s for Get, content %s' %
 127             (result.status_code, path, result.content))
 128
 129       if path.endswith('/'):
 130         self._value[path] = self._ListDir(result.content)
 131       else:
 132         self._value[path] = result.content
 133     if self._error is not None:
 134       raise self._error
 135     return self._value
 136
 137 class SubversionFileSystem(FileSystem):
 138   '''Class to fetch resources from src.chromium.org.
 139   '''
 140   @staticmethod
 141   def Create(branch='trunk', revision=None):
 142     if branch == 'trunk':
 143       svn_path = 'trunk/src'
 144     else:
 145       svn_path = 'branches/%s/src' % branch
 146     return SubversionFileSystem(
 147         AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
 148         AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
 149         svn_path,
 150         revision=revision)
 151
 152   def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
 153     self._file_fetcher = file_fetcher
 154     self._stat_fetcher = stat_fetcher
 155     self._svn_path = svn_path
 156     self._revision = revision
 157
 158   def Read(self, paths):
 159     args = None
 160     if self._revision is not None:
 161       # |fetcher| gets from svn.chromium.org which uses p= for version.
 162       args = 'p=%s' % self._revision
 163     return Future(delegate=_AsyncFetchFuture(paths,
 164                                              self._file_fetcher,
 165                                              args=args))
 166
 167   def Refresh(self):
 168     return Future(value=())
 169
 170   def Stat(self, path):
 171     directory, filename = posixpath.split(path)
 172     if self._revision is not None:
 173       # |stat_fetch| uses viewvc which uses pathrev= for version.
 174       directory += '?pathrev=%s' % self._revision
 175
 176     try:
 177       result = self._stat_fetcher.Fetch(directory)
 178     except Exception as e:
 179       raise FileSystemError('Error fetching %s for Stat: %s' %
 180           (path, traceback.format_exc()))
 181
 182     if result.status_code == 404:
 183       raise FileNotFoundError('Got 404 when fetching %s for Stat, content %s' %
 184           (path, result.content))
 185     if result.status_code != 200:
 186       raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
 187           (result.status_code, path, result.content))
 188
 189     stat_info = _CreateStatInfo(result.content)
 190     if stat_info.version is None:
 191       raise FileSystemError('Failed to find version of dir %s' % directory)
 192     if path == '' or path.endswith('/'):
 193       return stat_info
 194     if filename not in stat_info.child_versions:
 195       raise FileNotFoundError(
 196           '%s from %s was not in child versions for Stat' % (filename, path))
 197     return StatInfo(stat_info.child_versions[filename])
 198
 199   def GetIdentity(self):
 200     # NOTE: no revision here, since it would mess up the caching of reads. It
 201     # probably doesn't matter since all the caching classes will use the result
 202     # of Stat to decide whether to re-read - and Stat has a ceiling of the
 203     # revision - so when the revision changes, so might Stat. That is enough.
 204     return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))