We started redesigning GpuMemoryBuffer interface to handle multiple buffers [0].
[chromium-blink-merge.git] / gpu / tools / check_gpu_bots.py
blobbd7c9389e78864bf2beeffe5ff8285ea6d1bfe9f
1 #!/usr/bin/env python
3 # Copyright 2014 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 import argparse
8 import datetime
9 import getpass
10 import json
11 import os
12 import smtplib
13 import sys
14 import time
15 import urllib
16 import urllib2
18 class Emailer:
19 DEFAULT_EMAIL_PASSWORD_FILE = '.email_password'
20 GMAIL_SMTP_SERVER = 'smtp.gmail.com:587'
21 SUBJECT = 'Chrome GPU Bots Notification'
23 def __init__(self, email_from, email_to, email_password_file):
24 self.email_from = email_from
25 self.email_to = email_to
26 self.email_password = Emailer._getEmailPassword(email_password_file)
28 @staticmethod
29 def format_email_body(time_str, offline_str, failed_str, noteworthy_str):
30 return '%s%s%s%s' % (time_str, offline_str, failed_str, noteworthy_str)
32 def send_email(self, body):
33 message = 'From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s' % (self.email_from,
34 ','.join(self.email_to), Emailer.SUBJECT, body)
36 try:
37 server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
38 server.starttls()
39 server.login(self.email_from, self.email_password)
40 server.sendmail(self.email_from, self.email_to, message)
41 server.quit()
42 except Exception as e:
43 print 'Error sending email: %s' % str(e)
45 def testEmailLogin(self):
46 server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
47 server.starttls()
48 server.login(self.email_from, self.email_password)
49 server.quit()
51 @staticmethod
52 def _getEmailPassword(email_password_file):
53 password = ''
55 password_file = (email_password_file if email_password_file is not None
56 else Emailer.DEFAULT_EMAIL_PASSWORD_FILE)
58 if os.path.isfile(password_file):
59 with open(password_file, 'r') as f:
60 password = f.read().strip()
61 else:
62 password = getpass.getpass(
63 'Please enter email password for source email account: ')
65 return password
67 class GpuBot:
68 def __init__(self, waterfall_name, bot_name, bot_data):
69 self.waterfall_name = waterfall_name
70 self.bot_name = bot_name
71 self.bot_data = bot_data
72 self._end_time = None
73 self._hours_since_last_run = None
74 self.failure_string = None
75 self.bot_url = None
76 self.build_url = None
78 def getEndTime(self):
79 return self._end_time
81 def setEndTime(self, end_time):
82 self._end_time = end_time
83 self._hours_since_last_run = \
84 roughTimeDiffInHours(end_time, time.localtime())
86 def getHoursSinceLastRun(self):
87 return self._hours_since_last_run
89 def toDict(self):
90 dict = {'waterfall_name': self.waterfall_name, 'bot_name': self.bot_name}
92 if self._end_time is not None:
93 dict['end_time'] = serialTime(self._end_time)
94 dict['hours_since_last_run'] = self._hours_since_last_run
96 if self.failure_string is not None:
97 dict['failure_string'] = self.failure_string
99 if self.bot_url is not None:
100 dict['bot_url'] = self.bot_url
102 if self.build_url is not None:
103 dict['build_url'] = self.build_url
105 return dict
107 @staticmethod
108 def fromDict(dict):
109 gpu_bot = GpuBot(dict['waterfall_name'], dict['bot_name'], None)
111 if 'end_time' in dict:
112 gpu_bot._end_time = unserializeTime(dict['end_time'])
114 if 'hours_since_last_run' in dict:
115 gpu_bot._hours_since_last_run = dict['hours_since_last_run']
117 if 'failure_string' in dict:
118 gpu_bot.failure_string = dict['failure_string']
120 if 'bot_url' in dict:
121 gpu_bot.bot_url = dict['bot_url']
123 if 'build_url' in dict:
124 gpu_bot.build_url = dict['build_url']
126 return gpu_bot
128 def errorNoMostRecentBuild(waterfall_name, bot_name):
129 print 'No most recent build available: %s::%s' % (waterfall_name, bot_name)
131 class Waterfall:
132 BASE_URL = 'http://build.chromium.org/p/'
133 BASE_BUILD_URL = BASE_URL + '%s/builders/%s'
134 SPECIFIC_BUILD_URL = BASE_URL + '%s/builders/%s/builds/%s'
135 BASE_JSON_BUILDERS_URL = BASE_URL + '%s/json/builders'
136 BASE_JSON_BUILDS_URL = BASE_URL + '%s/json/builders/%s/builds'
137 REGULAR_WATERFALLS = ['chromium.gpu', 'chromium.gpu.fyi']
138 WEBKIT_GPU_BOTS = ['GPU Win Builder',
139 'GPU Win Builder (dbg)',
140 'GPU Win7 (NVIDIA)',
141 'GPU Win7 (dbg) (NVIDIA)',
142 'GPU Mac Builder',
143 'GPU Mac Builder (dbg)',
144 'GPU Mac10.7',
145 'GPU Mac10.7 (dbg)',
146 'GPU Linux Builder',
147 'GPU Linux Builder (dbg)',
148 'GPU Linux (NVIDIA)',
149 'GPU Linux (dbg) (NVIDIA)']
150 FILTERED_WATERFALLS = [('chromium.webkit', WEBKIT_GPU_BOTS)]
152 @staticmethod
153 def getJsonFromUrl(url):
154 conn = urllib2.urlopen(url)
155 result = conn.read()
156 conn.close()
157 return json.loads(result)
159 @staticmethod
160 def getBuildersJsonForWaterfall(waterfall):
161 querystring = '?filter'
162 return (Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
163 % (waterfall, querystring)))
165 @staticmethod
166 def getLastNBuildsForBuilder(n, waterfall, builder):
167 if n <= 0:
168 return {}
170 querystring = '?'
172 for i in range(n):
173 querystring += 'select=-%d&' % (i + 1)
175 querystring += 'filter'
177 return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDS_URL + '%s') %
178 (waterfall, urllib.quote(builder), querystring))
180 @staticmethod
181 def getFilteredBuildersJsonForWaterfall(waterfall, filter):
182 querystring = '?'
184 for bot_name in filter:
185 querystring += 'select=%s&' % urllib.quote(bot_name)
187 querystring += 'filter'
189 return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
190 % (waterfall, querystring))
192 @staticmethod
193 def getAllGpuBots():
194 allbots = {k: Waterfall.getBuildersJsonForWaterfall(k)
195 for k in Waterfall.REGULAR_WATERFALLS}
197 filteredbots = {k[0]:
198 Waterfall.getFilteredBuildersJsonForWaterfall(k[0], k[1])
199 for k in Waterfall.FILTERED_WATERFALLS}
201 allbots.update(filteredbots)
203 return allbots
205 @staticmethod
206 def getOfflineBots(bots):
207 offline_bots = []
209 for waterfall_name in bots:
210 waterfall = bots[waterfall_name]
212 for bot_name in waterfall:
213 bot = waterfall[bot_name]
215 if bot['state'] != 'offline':
216 continue
218 gpu_bot = GpuBot(waterfall_name, bot_name, bot)
219 gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
220 urllib.quote(bot_name))
222 most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
223 gpu_bot)
225 if (most_recent_build and 'times' in most_recent_build and
226 most_recent_build['times']):
227 gpu_bot.setEndTime(time.localtime(most_recent_build['times'][1]))
228 else:
229 errorNoMostRecentBuild(waterfall_name, bot_name)
231 offline_bots.append(gpu_bot)
233 return offline_bots
235 @staticmethod
236 def getMostRecentlyCompletedBuildForBot(bot):
237 if bot.bot_data is not None and 'most_recent_build' in bot.bot_data:
238 return bot.bot_data['most_recent_build']
240 # Unfortunately, the JSON API doesn't provide a "most recent completed
241 # build" call. We just have to get some number of the most recent (including
242 # current, in-progress builds) and give up if that's not enough.
243 NUM_BUILDS = 10
244 builds = Waterfall.getLastNBuildsForBuilder(NUM_BUILDS, bot.waterfall_name,
245 bot.bot_name)
247 for i in range(NUM_BUILDS):
248 current_build_name = '-%d' % (i + 1)
249 current_build = builds[current_build_name]
251 if 'results' in current_build and current_build['results'] is not None:
252 if bot.bot_data is not None:
253 bot.bot_data['most_recent_build'] = current_build
255 return current_build
257 return None
259 @staticmethod
260 def getFailedBots(bots):
261 failed_bots = []
263 for waterfall_name in bots:
264 waterfall = bots[waterfall_name]
266 for bot_name in waterfall:
267 bot = waterfall[bot_name]
268 gpu_bot = GpuBot(waterfall_name, bot_name, bot)
269 gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
270 urllib.quote(bot_name))
272 most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
273 gpu_bot)
275 if (most_recent_build and 'text' in most_recent_build and
276 'failed' in most_recent_build['text']):
277 gpu_bot.failure_string = ' '.join(most_recent_build['text'])
278 gpu_bot.build_url = Waterfall.SPECIFIC_BUILD_URL % (waterfall_name,
279 urllib.quote(bot_name), most_recent_build['number'])
280 failed_bots.append(gpu_bot)
281 elif not most_recent_build:
282 errorNoMostRecentBuild(waterfall_name, bot_name)
284 return failed_bots
286 def formatTime(t):
287 return time.strftime("%a, %d %b %Y %H:%M:%S", t)
289 def roughTimeDiffInHours(t1, t2):
290 datetimes = []
292 for t in [t1, t2]:
293 datetimes.append(datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday,
294 t.tm_hour, t.tm_min, t.tm_sec))
296 datetime_diff = datetimes[0] - datetimes[1]
298 hours = float(datetime_diff.total_seconds()) / 3600.0
300 return abs(hours)
302 def getBotStr(bot):
303 s = ' %s::%s\n' % (bot.waterfall_name, bot.bot_name)
305 if bot.failure_string is not None:
306 s += ' failure: %s\n' % bot.failure_string
308 if bot.getEndTime() is not None:
309 s += (' last build end time: %s (roughly %f hours ago)\n' %
310 (formatTime(bot.getEndTime()), bot.getHoursSinceLastRun()))
312 if bot.bot_url is not None:
313 s += ' bot url: %s\n' % bot.bot_url
315 if bot.build_url is not None:
316 s += ' build url: %s\n' % bot.build_url
318 s += '\n'
319 return s
321 def getBotsStr(bots):
322 s = ''
324 for bot in bots:
325 s += getBotStr(bot)
327 s += '\n'
328 return s
330 def getOfflineBotsStr(offline_bots):
331 return 'Offline bots:\n%s' % getBotsStr(offline_bots)
333 def getFailedBotsStr(failed_bots):
334 return 'Failed bots:\n%s' % getBotsStr(failed_bots)
336 def getBotDicts(bots):
337 dicts = []
339 for bot in bots:
340 dicts.append(bot.toDict())
342 return dicts
344 def unserializeTime(t):
345 return time.struct_time((t['year'], t['mon'], t['day'], t['hour'], t['min'],
346 t['sec'], 0, 0, 0))
348 def serialTime(t):
349 return {'year': t.tm_year, 'mon': t.tm_mon, 'day': t.tm_mday,
350 'hour': t.tm_hour, 'min': t.tm_min, 'sec': t.tm_sec}
352 def getSummary(offline_bots, failed_bots):
353 offline_bot_dict = getBotDicts(offline_bots)
354 failed_bot_dict = getBotDicts(failed_bots)
355 return {'offline': offline_bot_dict, 'failed': failed_bot_dict}
357 def findBot(name, lst):
358 for bot in lst:
359 if bot.bot_name == name:
360 return bot
362 return None
364 def getNoteworthyEvents(offline_bots, failed_bots, previous_results):
365 CRITICAL_NUM_HOURS = 1.0
367 previous_offline = (previous_results['offline'] if 'offline'
368 in previous_results else [])
370 previous_failures = (previous_results['failed'] if 'failed'
371 in previous_results else [])
373 noteworthy_offline = []
374 for bot in offline_bots:
375 if bot.getHoursSinceLastRun() >= CRITICAL_NUM_HOURS:
376 previous_bot = findBot(bot.bot_name, previous_offline)
378 if (previous_bot is None or
379 previous_bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS):
380 noteworthy_offline.append(bot)
382 noteworthy_new_failures = []
383 for bot in failed_bots:
384 previous_bot = findBot(bot.bot_name, previous_failures)
386 if previous_bot is None:
387 noteworthy_new_failures.append(bot)
389 noteworthy_new_offline_recoveries = []
390 for bot in previous_offline:
391 if bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS:
392 continue
394 current_bot = findBot(bot.bot_name, offline_bots)
395 if current_bot is None:
396 noteworthy_new_offline_recoveries.append(bot)
398 noteworthy_new_failure_recoveries = []
399 for bot in previous_failures:
400 current_bot = findBot(bot.bot_name, failed_bots)
402 if current_bot is None:
403 noteworthy_new_failure_recoveries.append(bot)
405 return {'offline': noteworthy_offline, 'failed': noteworthy_new_failures,
406 'recovered_failures': noteworthy_new_failure_recoveries,
407 'recovered_offline': noteworthy_new_offline_recoveries}
409 def getNoteworthyStr(noteworthy_events):
410 s = ''
412 if noteworthy_events['offline']:
413 s += 'IMPORTANT bots newly offline for over an hour:\n'
415 for bot in noteworthy_events['offline']:
416 s += getBotStr(bot)
418 s += '\n'
420 if noteworthy_events['failed']:
421 s += 'IMPORTANT new failing bots:\n'
423 for bot in noteworthy_events['failed']:
424 s += getBotStr(bot)
426 s += '\n'
428 if noteworthy_events['recovered_offline']:
429 s += 'IMPORTANT newly recovered previously offline bots:\n'
431 for bot in noteworthy_events['recovered_offline']:
432 s += getBotStr(bot)
434 s += '\n'
436 if noteworthy_events['recovered_failures']:
437 s += 'IMPORTANT newly recovered failing bots:\n'
439 for bot in noteworthy_events['recovered_failures']:
440 s += getBotStr(bot)
442 s += '\n'
444 return s
446 def dictsToBots(bots):
447 offline_bots = []
448 for bot in bots['offline']:
449 offline_bots.append(GpuBot.fromDict(bot))
451 failed_bots = []
452 for bot in bots['failed']:
453 failed_bots.append(GpuBot.fromDict(bot))
455 return {'offline': offline_bots, 'failed': failed_bots}
457 class GpuBotPoller:
458 DEFAULT_PREVIOUS_RESULTS_FILE = '.check_gpu_bots_previous_results'
460 def __init__(self, emailer, send_email_for_recovered_offline_bots,
461 send_email_for_recovered_failing_bots, send_email_on_error,
462 previous_results_file):
463 self.emailer = emailer
465 self.send_email_for_recovered_offline_bots = \
466 send_email_for_recovered_offline_bots
468 self.send_email_for_recovered_failing_bots = \
469 send_email_for_recovered_failing_bots
471 self.send_email_on_error = send_email_on_error
472 self.previous_results_file = previous_results_file
474 def shouldEmail(self, noteworthy_events):
475 if noteworthy_events['offline'] or noteworthy_events['failed']:
476 return True
478 if (self.send_email_for_recovered_offline_bots and
479 noteworthy_events['recovered_offline']):
480 return True
482 if (self.send_email_for_recovered_failing_bots and
483 noteworthy_events['recovered_failures']):
484 return True
486 return False
488 def writeResults(self, summary):
489 results_file = (self.previous_results_file
490 if self.previous_results_file is not None
491 else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
493 with open(results_file, 'w') as f:
494 f.write(json.dumps(summary))
496 def getPreviousResults(self):
497 previous_results_file = (self.previous_results_file
498 if self.previous_results_file is not None
499 else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
501 previous_results = {}
502 if os.path.isfile(previous_results_file):
503 with open(previous_results_file, 'r') as f:
504 previous_results = dictsToBots(json.loads(f.read()))
506 return previous_results
508 def checkBots(self):
509 time_str = 'Current time: %s\n\n' % (formatTime(time.localtime()))
510 print time_str
512 try:
513 bots = Waterfall.getAllGpuBots()
515 offline_bots = Waterfall.getOfflineBots(bots)
516 offline_str = getOfflineBotsStr(offline_bots)
517 print offline_str
519 failed_bots = Waterfall.getFailedBots(bots)
520 failed_str = getFailedBotsStr(failed_bots)
521 print failed_str
523 previous_results = self.getPreviousResults()
524 noteworthy_events = getNoteworthyEvents(offline_bots, failed_bots,
525 previous_results)
527 noteworthy_str = getNoteworthyStr(noteworthy_events)
528 print noteworthy_str
530 summary = getSummary(offline_bots, failed_bots)
531 self.writeResults(summary)
533 if (self.emailer is not None and self.shouldEmail(noteworthy_events)):
534 self.emailer.send_email(Emailer.format_email_body(time_str, offline_str,
535 failed_str, noteworthy_str))
536 except Exception as e:
537 error_str = 'Error: %s' % str(e)
538 print error_str
540 if self.send_email_on_error:
541 self.emailer.send_email(error_str)
543 def parseArgs(sys_args):
544 parser = argparse.ArgumentParser(prog=sys_args[0],
545 description='Query the Chromium GPU Bots Waterfall, output ' +
546 'potential problems, and optionally repeat automatically and/or ' +
547 'email notifications of results.')
549 parser.add_argument('--repeat-delay', type=int, dest='repeat_delay',
550 required=False,
551 help='How often to automatically re-run the script, in minutes.')
553 parser.add_argument('--email-from', type=str, dest='email_from',
554 required=False,
555 help='Email address to send from. Requires also specifying ' +
556 '\'--email-to\'.')
558 parser.add_argument('--email-to', type=str, dest='email_to', required=False,
559 nargs='+',
560 help='Email address(es) to send to. Requires also specifying ' +
561 '\'--email-from\'')
563 parser.add_argument('--send-email-for-recovered-offline-bots',
564 dest='send_email_for_recovered_offline_bots', action='store_true',
565 default=False,
566 help='Send an email out when a bot which has been offline for more ' +
567 'than 1 hour goes back online.')
569 parser.add_argument('--send-email-for-recovered-failing-bots',
570 dest='send_email_for_recovered_failing_bots',
571 action='store_true', default=False,
572 help='Send an email when a failing bot recovers.')
574 parser.add_argument('--send-email-on-error',
575 dest='send_email_on_error',
576 action='store_true', default=False,
577 help='Send an email when the script has an error. For example, if ' +
578 'the server is unreachable.')
580 parser.add_argument('--email-password-file',
581 dest='email_password_file',
582 required=False,
583 help=(('File containing the plaintext password of the source email ' +
584 'account. By default, \'%s\' will be tried. If it does not exist, ' +
585 'you will be prompted. If you opt to store your password on disk ' +
586 'in plaintext, use of a dummy account is strongly recommended.')
587 % Emailer.DEFAULT_EMAIL_PASSWORD_FILE))
589 parser.add_argument('--previous-results-file',
590 dest='previous_results_file',
591 required=False,
592 help=(('File to store the results of the previous invocation of ' +
593 'this script. By default, \'%s\' will be used.')
594 % GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE))
596 args = parser.parse_args(sys_args[1:])
598 if args.email_from is not None and args.email_to is None:
599 parser.error('--email-from requires --email-to.')
600 elif args.email_to is not None and args.email_from is None:
601 parser.error('--email-to requires --email-from.')
602 elif args.email_from is None and args.send_email_for_recovered_offline_bots:
603 parser.error('--send-email-for-recovered-offline-bots requires ' +
604 '--email-to and --email-from.')
605 elif (args.email_from is None and args.send_email_for_recovered_failing_bots):
606 parser.error('--send-email-for-recovered-failing-bots ' +
607 'requires --email-to and --email-from.')
608 elif (args.email_from is None and args.send_email_on_error):
609 parser.error('--send-email-on-error ' +
610 'requires --email-to and --email-from.')
611 elif (args.email_password_file and
612 not os.path.isfile(args.email_password_file)):
613 parser.error('File does not exist: %s' % args.email_password_file)
615 return args
617 def main(sys_args):
618 args = parseArgs(sys_args)
620 emailer = None
621 if args.email_from is not None and args.email_to is not None:
622 emailer = Emailer(args.email_from, args.email_to, args.email_password_file)
624 try:
625 emailer.testEmailLogin()
626 except Exception as e:
627 print 'Error logging into email account: %s' % str(e)
628 return 1
630 poller = GpuBotPoller(emailer,
631 args.send_email_for_recovered_offline_bots,
632 args.send_email_for_recovered_failing_bots,
633 args.send_email_on_error,
634 args.previous_results_file)
636 while True:
637 poller.checkBots()
639 if args.repeat_delay is None:
640 break
642 print 'Will run again in %d minutes...\n' % args.repeat_delay
643 time.sleep(args.repeat_delay * 60)
645 return 0
647 if __name__ == '__main__':
648 sys.exit(main(sys.argv))