2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
15 # Where all the data lives.
16 ROOT_URL
= "http://build.chromium.org/p/chromium.memory.fyi/builders"
18 # TODO(groby) - support multi-line search from the command line. Useful when
19 # scanning for classes of failures, see below.
20 SEARCH_STRING
= """<p class=\"failure result\">
21 Failed memory test: content
24 # Location of the log cache.
25 CACHE_DIR
= "buildlogs.tmp"
27 # If we don't find anything after searching |CUTOFF| logs, we're probably done.
31 """Makes sure |path| does exist, tries to create it if it doesn't."""
34 except OSError as exception
:
35 if exception
.errno
!= errno
.EEXIST
:
40 def __init__(self
, root_dir
):
41 self
._root
_dir
= os
.path
.abspath(root_dir
)
43 def _LocalName(self
, name
):
44 """If name is a relative path, treat it as relative to cache root.
45 If it is absolute and under cache root, pass it through.
46 Otherwise, raise error.
48 if os
.path
.isabs(name
):
49 assert os
.path
.commonprefix([name
, self
._root
_dir
]) == self
._root
_dir
51 name
= os
.path
.join(self
._root
_dir
, name
)
54 def _FetchLocal(self
, local_name
):
55 local_name
= self
._LocalName
(local_name
)
56 EnsurePath(os
.path
.dirname(local_name
))
57 if os
.path
.exists(local_name
):
58 f
= open(local_name
, 'r')
62 def _FetchRemote(self
, remote_name
):
64 response
= urllib2
.urlopen(remote_name
)
66 print "Could not fetch", remote_name
68 return response
.read()
70 def Update(self
, local_name
, remote_name
):
71 local_name
= self
._LocalName
(local_name
)
72 EnsurePath(os
.path
.dirname(local_name
))
73 blob
= self
._FetchRemote
(remote_name
)
74 f
= open(local_name
, "w")
76 return blob
.splitlines()
78 def FetchData(self
, local_name
, remote_name
):
79 result
= self
._FetchLocal
(local_name
)
82 # If we get here, the local cache does not exist yet. Fetch, and store.
83 return self
.Update(local_name
, remote_name
)
86 class Builder(object):
87 def __init__(self
, waterfall
, name
):
89 self
._waterfall
= waterfall
94 def LatestBuild(self
):
95 return self
._waterfall
.GetLatestBuild(self
._name
)
97 def GetBuildPath(self
, build_num
):
98 return "%s/%s/builds/%d" % (
99 self
._waterfall
._root
_url
, urllib
.quote(self
._name
), build_num
)
101 def _FetchBuildLog(self
, build_num
):
102 local_build_path
= "builds/%s" % self
._name
103 local_build_file
= os
.path
.join(local_build_path
, "%d.log" % build_num
)
104 return self
._waterfall
._cache
.FetchData(local_build_file
,
105 self
.GetBuildPath(build_num
))
107 def _CheckLog(self
, build_num
, tester
):
108 log_lines
= self
._FetchBuildLog
(build_num
)
109 return any(tester(line
) for line
in log_lines
)
111 def ScanLogs(self
, tester
):
113 build
= self
.LatestBuild()
115 while build
!= 0 and no_results
< CUTOFF
:
116 if self
._CheckLog
(build
, tester
):
117 occurrences
.append(build
)
119 no_results
= no_results
+ 1
124 class Waterfall(object):
125 def __init__(self
, root_url
, cache_dir
):
126 self
._root
_url
= root_url
128 self
._top
_revision
= {}
129 self
._cache
= Cache(cache_dir
)
132 return self
._builders
.values()
135 self
._cache
.Update("builders", self
._root
_url
)
139 if self
._top
_revision
:
142 html
= self
._cache
.FetchData("builders", self
._root
_url
)
144 """ Search for both builders and latest build number in HTML
145 <td class="box"><a href="builders/<builder-name>"> identifies a builder
146 <a href="builders/<builder-name>/builds/<build-num>"> is the latest build.
148 box_matcher
= re
.compile('.*a href[^>]*>([^<]*)\<')
149 build_matcher
= re
.compile('.*a href=\"builders/(.*)/builds/([0-9]+)\".*')
152 if 'a href="builders/' in line
:
153 if 'td class="box"' in line
:
154 last_builder
= box_matcher
.match(line
).group(1)
155 self
._builders
[last_builder
] = Builder(self
, last_builder
)
157 result
= build_matcher
.match(line
)
158 builder
= result
.group(1)
159 assert builder
== urllib
.quote(last_builder
)
160 self
._top
_revision
[last_builder
] = int(result
.group(2))
162 def GetLatestBuild(self
, name
):
164 assert self
._top
_revision
165 return self
._top
_revision
[name
]
168 class MultiLineChange(object):
169 def __init__(self
, lines
):
170 self
._tracked
_lines
= lines
173 def __call__(self
, line
):
174 """ Test a single line against multi-line change.
176 If it matches the currently active line, advance one line.
177 If the current line is the last line, report a match.
179 if self
._tracked
_lines
[self
._current
] in line
:
180 self
._current
= self
._current
+ 1
181 if self
._current
== len(self
._tracked
_lines
):
190 # Create argument parser.
191 parser
= argparse
.ArgumentParser()
192 commands
= parser
.add_mutually_exclusive_group(required
=True)
193 commands
.add_argument("--update", action
='store_true')
194 commands
.add_argument("--find", metavar
='search term')
195 parser
.add_argument("--json", action
='store_true',
196 help="Output in JSON format")
197 args
= parser
.parse_args()
199 path
= os
.path
.abspath(os
.path
.dirname(argv
[0]))
200 cache_path
= os
.path
.join(path
, CACHE_DIR
)
202 fyi
= Waterfall(ROOT_URL
, cache_path
)
206 for builder
in fyi
.Builders():
207 print "Updating", builder
.Name()
208 builder
.ScanLogs(lambda x
:False)
212 tester
= MultiLineChange(args
.find
.splitlines())
216 print "SCANNING FOR ", args
.find
217 for builder
in fyi
.Builders():
219 print "Scanning", builder
.Name()
220 occurrences
= builder
.ScanLogs(tester
)
222 min_build
= min(occurrences
)
223 path
= builder
.GetBuildPath(min_build
)
226 data
['builder'] = builder
.Name()
227 data
['first_affected'] = min_build
228 data
['last_affected'] = max(occurrences
)
229 data
['last_build'] = builder
.LatestBuild()
230 data
['frequency'] = ((int(builder
.LatestBuild()) - int(min_build
)) /
232 data
['total'] = len(occurrences
)
233 data
['first_url'] = path
236 print "Earliest occurrence in build %d" % min_build
237 print "Latest occurrence in build %d" % max(occurrences
)
238 print "Latest build: %d" % builder
.LatestBuild()
240 print "%d total" % len(occurrences
)
242 json
.dump(result
, sys
.stdout
, indent
=2, sort_keys
=True)
244 if __name__
== "__main__":
245 sys
.exit(main(sys
.argv
))