1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
7 import xml
.dom
.minidom
as xml
8 from xml
.parsers
.expat
import ExpatError
10 from appengine_url_fetcher
import AppEngineUrlFetcher
11 from appengine_wrappers
import IsDownloadError
12 from docs_server_utils
import StringIdentity
13 from file_system
import (
14 FileNotFoundError
, FileSystem
, FileSystemError
, StatInfo
)
15 from future
import Future
20 '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
21 of all mismatched tags.
24 return xml
.parseString(html
)
25 except ExpatError
as e
:
26 return _ParseHTML('\n'.join(
27 line
for (i
, line
) in enumerate(html
.split('\n'))
28 if e
.lineno
!= i
+ 1))
31 '''Like node.innerText in JS DOM, but strips surrounding whitespace.
35 text
.append(node
.nodeValue
)
36 if hasattr(node
, 'childNodes'):
37 for child_node
in node
.childNodes
:
38 text
.append(_InnerText(child_node
))
39 return ''.join(text
).strip()
41 def _CreateStatInfo(html
):
45 # Try all of the tables until we find the ones that contain the data (the
46 # directory and file versions are in different tables).
47 for table
in _ParseHTML(html
).getElementsByTagName('table'):
48 # Within the table there is a list of files. However, there may be some
49 # things beforehand; a header, "parent directory" list, etc. We will deal
50 # with that below by being generous and just ignoring such rows.
51 rows
= table
.getElementsByTagName('tr')
54 cells
= row
.getElementsByTagName('td')
56 # The version of the directory will eventually appear in the soup of
57 # table rows, like this:
60 # <td>Directory revision:</td>
61 # <td><a href=... title="Revision 214692">214692</a> (of...)</td>
64 # So look out for that.
65 if len(cells
) == 2 and _InnerText(cells
[0]) == 'Directory revision:':
66 links
= cells
[1].getElementsByTagName('a')
68 raise FileSystemError('ViewVC assumption invalid: directory ' +
69 'revision content did not have 2 <a> ' +
70 ' elements, instead %s' % _InnerText(cells
[1]))
71 this_parent_version
= _InnerText(links
[0])
72 int(this_parent_version
) # sanity check
73 if parent_version
is not None:
74 raise FileSystemError('There was already a parent version %s, and ' +
75 ' we just found a second at %s' %
76 (parent_version
, this_parent_version
))
77 parent_version
= this_parent_version
79 # The version of each file is a list of rows with 5 cells: name, version,
80 # age, author, and last log entry. Maybe the columns will change; we're
81 # at the mercy viewvc, but this constant can be easily updated.
84 name_element
, version_element
, _
, __
, ___
= cells
86 name
= _InnerText(name_element
) # note: will end in / for directories
88 version
= int(_InnerText(version_element
))
91 child_versions
[name
] = str(version
)
93 if parent_version
and child_versions
:
96 return StatInfo(parent_version
, child_versions
)
98 def _GetAsyncFetchCallback(paths
, fetcher
, args
=None, skip_not_found
=False):
100 return path
if args
is None else '%s?%s' % (path
, args
)
102 def list_dir(directory
):
103 dom
= xml
.parseString(directory
)
104 files
= [elem
.childNodes
[0].data
for elem
in dom
.getElementsByTagName('a')]
109 # A list of tuples of the form (path, Future).
110 fetches
= [(path
, fetcher
.FetchAsync(apply_args(path
))) for path
in paths
]
114 for path
, future
in fetches
:
116 result
= future
.Get()
117 except Exception as e
:
118 if skip_not_found
and IsDownloadError(e
): continue
119 exc_type
= FileNotFoundError
if IsDownloadError(e
) else FileSystemError
120 raise exc_type('%s fetching %s for Get: %s' %
121 (type(e
).__name
__, path
, traceback
.format_exc()))
122 if result
.status_code
== 404:
123 if skip_not_found
: continue
124 raise FileNotFoundError('Got 404 when fetching %s for Get, content %s' %
125 (path
, result
.content
))
126 if result
.status_code
!= 200:
127 raise FileSystemError('Got %s when fetching %s for Get, content %s' %
128 (result
.status_code
, path
, result
.content
))
129 if path
.endswith('/'):
130 value
[path
] = list_dir(result
.content
)
132 value
[path
] = result
.content
137 class SubversionFileSystem(FileSystem
):
138 '''Class to fetch resources from src.chromium.org.
141 def Create(branch
='trunk', revision
=None):
142 if branch
== 'trunk':
143 svn_path
= 'trunk/src'
145 svn_path
= 'branches/%s/src' % branch
146 return SubversionFileSystem(
147 AppEngineUrlFetcher('%s/%s' % (url_constants
.SVN_URL
, svn_path
)),
148 AppEngineUrlFetcher('%s/%s' % (url_constants
.VIEWVC_URL
, svn_path
)),
152 def __init__(self
, file_fetcher
, stat_fetcher
, svn_path
, revision
=None):
153 self
._file
_fetcher
= file_fetcher
154 self
._stat
_fetcher
= stat_fetcher
155 self
._svn
_path
= svn_path
156 self
._revision
= revision
158 def Read(self
, paths
, skip_not_found
=False):
160 if self
._revision
is not None:
161 # |fetcher| gets from svn.chromium.org which uses p= for version.
162 args
= 'p=%s' % self
._revision
163 return Future(callback
=_GetAsyncFetchCallback(
167 skip_not_found
=skip_not_found
))
170 return Future(value
=())
172 def Stat(self
, path
):
173 return self
.StatAsync(path
).Get()
175 def StatAsync(self
, path
):
176 directory
, filename
= posixpath
.split(path
)
177 if self
._revision
is not None:
178 # |stat_fetch| uses viewvc which uses pathrev= for version.
179 directory
+= '?pathrev=%s' % self
._revision
181 result_future
= self
._stat
_fetcher
.FetchAsync(directory
)
184 result
= result_future
.Get()
185 except Exception as e
:
186 exc_type
= FileNotFoundError
if IsDownloadError(e
) else FileSystemError
187 raise exc_type('%s fetching %s for Stat: %s' %
188 (type(e
).__name
__, path
, traceback
.format_exc()))
190 if result
.status_code
== 404:
191 raise FileNotFoundError('Got 404 when fetching %s for Stat, '
192 'content %s' % (path
, result
.content
))
193 if result
.status_code
!= 200:
194 raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
195 (result
.status_code
, path
, result
.content
))
197 stat_info
= _CreateStatInfo(result
.content
)
198 if stat_info
.version
is None:
199 raise FileSystemError('Failed to find version of dir %s' % directory
)
200 if path
== '' or path
.endswith('/'):
202 if filename
not in stat_info
.child_versions
:
203 raise FileNotFoundError(
204 '%s from %s was not in child versions for Stat' % (filename
, path
))
205 return StatInfo(stat_info
.child_versions
[filename
])
207 return Future(callback
=resolve
)
209 def GetIdentity(self
):
210 # NOTE: no revision here, since it would mess up the caching of reads. It
211 # probably doesn't matter since all the caching classes will use the result
212 # of Stat to decide whether to re-read - and Stat has a ceiling of the
213 # revision - so when the revision changes, so might Stat. That is enough.
214 return '@'.join((self
.__class
__.__name
__, StringIdentity(self
._svn
_path
)))