1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
7 import xml
.dom
.minidom
as xml
8 from xml
.parsers
.expat
import ExpatError
10 from appengine_url_fetcher
import AppEngineUrlFetcher
11 from docs_server_utils
import StringIdentity
12 from file_system
import (
13 FileNotFoundError
, FileSystem
, FileSystemError
, StatInfo
)
14 from future
import Future
19 '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
20 of all mismatched tags.
23 return xml
.parseString(html
)
24 except ExpatError
as e
:
25 return _ParseHTML('\n'.join(
26 line
for (i
, line
) in enumerate(html
.split('\n'))
27 if e
.lineno
!= i
+ 1))
30 '''Like node.innerText in JS DOM, but strips surrounding whitespace.
34 text
.append(node
.nodeValue
)
35 if hasattr(node
, 'childNodes'):
36 for child_node
in node
.childNodes
:
37 text
.append(_InnerText(child_node
))
38 return ''.join(text
).strip()
40 def _CreateStatInfo(html
):
44 # Try all of the tables until we find the ones that contain the data (the
45 # directory and file versions are in different tables).
46 for table
in _ParseHTML(html
).getElementsByTagName('table'):
47 # Within the table there is a list of files. However, there may be some
48 # things beforehand; a header, "parent directory" list, etc. We will deal
49 # with that below by being generous and just ignoring such rows.
50 rows
= table
.getElementsByTagName('tr')
53 cells
= row
.getElementsByTagName('td')
55 # The version of the directory will eventually appear in the soup of
56 # table rows, like this:
59 # <td>Directory revision:</td>
60 # <td><a href=... title="Revision 214692">214692</a> (of...)</td>
63 # So look out for that.
64 if len(cells
) == 2 and _InnerText(cells
[0]) == 'Directory revision:':
65 links
= cells
[1].getElementsByTagName('a')
67 raise FileSystemError('ViewVC assumption invalid: directory ' +
68 'revision content did not have 2 <a> ' +
69 ' elements, instead %s' % _InnerText(cells
[1]))
70 this_parent_version
= _InnerText(links
[0])
71 int(this_parent_version
) # sanity check
72 if parent_version
is not None:
73 raise FileSystemError('There was already a parent version %s, and ' +
74 ' we just found a second at %s' %
75 (parent_version
, this_parent_version
))
76 parent_version
= this_parent_version
78 # The version of each file is a list of rows with 5 cells: name, version,
79 # age, author, and last log entry. Maybe the columns will change; we're
80 # at the mercy viewvc, but this constant can be easily updated.
83 name_element
, version_element
, _
, __
, ___
= cells
85 name
= _InnerText(name_element
) # note: will end in / for directories
87 version
= int(_InnerText(version_element
))
90 child_versions
[name
] = str(version
)
92 if parent_version
and child_versions
:
95 return StatInfo(parent_version
, child_versions
)
97 class _AsyncFetchFuture(object):
98 def __init__(self
, paths
, fetcher
, args
=None):
100 return path
if args
is None else '%s?%s' % (path
, args
)
101 # A list of tuples of the form (path, Future).
102 self
._fetches
= [(path
, fetcher
.FetchAsync(apply_args(path
)))
107 def _ListDir(self
, directory
):
108 dom
= xml
.parseString(directory
)
109 files
= [elem
.childNodes
[0].data
for elem
in dom
.getElementsByTagName('a')]
115 for path
, future
in self
._fetches
:
117 result
= future
.Get()
118 except Exception as e
:
119 raise FileSystemError('Error fetching %s for Get: %s' %
120 (path
, traceback
.format_exc()))
122 if result
.status_code
== 404:
123 raise FileNotFoundError('Got 404 when fetching %s for Get, content %s' %
124 (path
, result
.content
))
125 if result
.status_code
!= 200:
126 raise FileSystemError('Got %s when fetching %s for Get, content %s' %
127 (result
.status_code
, path
, result
.content
))
129 if path
.endswith('/'):
130 self
._value
[path
] = self
._ListDir
(result
.content
)
132 self
._value
[path
] = result
.content
133 if self
._error
is not None:
137 class SubversionFileSystem(FileSystem
):
138 '''Class to fetch resources from src.chromium.org.
141 def Create(branch
='trunk', revision
=None):
142 if branch
== 'trunk':
143 svn_path
= 'trunk/src'
145 svn_path
= 'branches/%s/src' % branch
146 return SubversionFileSystem(
147 AppEngineUrlFetcher('%s/%s' % (url_constants
.SVN_URL
, svn_path
)),
148 AppEngineUrlFetcher('%s/%s' % (url_constants
.VIEWVC_URL
, svn_path
)),
152 def __init__(self
, file_fetcher
, stat_fetcher
, svn_path
, revision
=None):
153 self
._file
_fetcher
= file_fetcher
154 self
._stat
_fetcher
= stat_fetcher
155 self
._svn
_path
= svn_path
156 self
._revision
= revision
158 def Read(self
, paths
):
160 if self
._revision
is not None:
161 # |fetcher| gets from svn.chromium.org which uses p= for version.
162 args
= 'p=%s' % self
._revision
163 return Future(delegate
=_AsyncFetchFuture(paths
,
168 return Future(value
=())
170 def Stat(self
, path
):
171 directory
, filename
= posixpath
.split(path
)
172 if self
._revision
is not None:
173 # |stat_fetch| uses viewvc which uses pathrev= for version.
174 directory
+= '?pathrev=%s' % self
._revision
177 result
= self
._stat
_fetcher
.Fetch(directory
)
178 except Exception as e
:
179 raise FileSystemError('Error fetching %s for Stat: %s' %
180 (path
, traceback
.format_exc()))
182 if result
.status_code
== 404:
183 raise FileNotFoundError('Got 404 when fetching %s for Stat, content %s' %
184 (path
, result
.content
))
185 if result
.status_code
!= 200:
186 raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
187 (result
.status_code
, path
, result
.content
))
189 stat_info
= _CreateStatInfo(result
.content
)
190 if stat_info
.version
is None:
191 raise FileSystemError('Failed to find version of dir %s' % directory
)
192 if path
== '' or path
.endswith('/'):
194 if filename
not in stat_info
.child_versions
:
195 raise FileNotFoundError(
196 '%s from %s was not in child versions for Stat' % (filename
, path
))
197 return StatInfo(stat_info
.child_versions
[filename
])
199 def GetIdentity(self
):
200 # NOTE: no revision here, since it would mess up the caching of reads. It
201 # probably doesn't matter since all the caching classes will use the result
202 # of Stat to decide whether to re-read - and Stat has a ceiling of the
203 # revision - so when the revision changes, so might Stat. That is enough.
204 return '@'.join((self
.__class
__.__name
__, StringIdentity(self
._svn
_path
)))