Unify expand and collapse icons.
[chromium-blink-merge.git] / gpu / tools / check_gpu_bots.py
bloba072eae3eeb691da28bab15f2b78256cc2255ca3
1 #!/usr/bin/env python
3 # Copyright 2014 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 import argparse
8 import datetime
9 import getpass
10 import json
11 import os
12 import smtplib
13 import sys
14 import time
15 import urllib
16 import urllib2
18 class Emailer:
19 DEFAULT_EMAIL_PASSWORD_FILE = '.email_password'
20 GMAIL_SMTP_SERVER = 'smtp.gmail.com:587'
21 SUBJECT = 'Chrome GPU Bots Notification'
23 def __init__(self, email_from, email_to, email_password_file):
24 self.email_from = email_from
25 self.email_to = email_to
26 self.email_password = Emailer._getEmailPassword(email_password_file)
28 @staticmethod
29 def format_email_body(time_str, offline_str, failed_str, noteworthy_str):
30 return '%s%s%s%s' % (time_str, offline_str, failed_str, noteworthy_str)
32 def send_email(self, body):
33 message = 'From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s' % (self.email_from,
34 ','.join(self.email_to), Emailer.SUBJECT, body)
36 try:
37 server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
38 server.starttls()
39 server.login(self.email_from, self.email_password)
40 server.sendmail(self.email_from, self.email_to, message)
41 server.quit()
42 except Exception as e:
43 print 'Error sending email: %s' % str(e)
45 def testEmailLogin(self):
46 server = smtplib.SMTP(Emailer.GMAIL_SMTP_SERVER)
47 server.starttls()
48 server.login(self.email_from, self.email_password)
49 server.quit()
51 @staticmethod
52 def _getEmailPassword(email_password_file):
53 password = ''
55 password_file = (email_password_file if email_password_file is not None
56 else Emailer.DEFAULT_EMAIL_PASSWORD_FILE)
58 if os.path.isfile(password_file):
59 with open(password_file, 'r') as f:
60 password = f.read().strip()
61 else:
62 password = getpass.getpass(
63 'Please enter email password for source email account: ')
65 return password
67 class GpuBot:
68 def __init__(self, waterfall_name, bot_name, bot_data):
69 self.waterfall_name = waterfall_name
70 self.bot_name = bot_name
71 self.bot_data = bot_data
72 self._end_time = None
73 self._hours_since_last_run = None
74 self.failure_string = None
75 self.bot_url = None
76 self.build_url = None
78 def getEndTime(self):
79 return self._end_time
81 def setEndTime(self, end_time):
82 self._end_time = end_time
83 self._hours_since_last_run = \
84 roughTimeDiffInHours(end_time, time.localtime())
86 def getHoursSinceLastRun(self):
87 return self._hours_since_last_run
89 def toDict(self):
90 dict = {'waterfall_name': self.waterfall_name, 'bot_name': self.bot_name}
92 if self._end_time is not None:
93 dict['end_time'] = serialTime(self._end_time)
94 dict['hours_since_last_run'] = self._hours_since_last_run
96 if self.failure_string is not None:
97 dict['failure_string'] = self.failure_string
99 if self.bot_url is not None:
100 dict['bot_url'] = self.bot_url
102 if self.build_url is not None:
103 dict['build_url'] = self.build_url
105 return dict
107 @staticmethod
108 def fromDict(dict):
109 gpu_bot = GpuBot(dict['waterfall_name'], dict['bot_name'], None)
111 if 'end_time' in dict:
112 gpu_bot._end_time = unserializeTime(dict['end_time'])
114 if 'hours_since_last_run' in dict:
115 gpu_bot._hours_since_last_run = dict['hours_since_last_run']
117 if 'failure_string' in dict:
118 gpu_bot.failure_string = dict['failure_string']
120 if 'bot_url' in dict:
121 gpu_bot.bot_url = dict['bot_url']
123 if 'build_url' in dict:
124 gpu_bot.build_url = dict['build_url']
126 return gpu_bot
128 def errorNoMostRecentBuild(waterfall_name, bot_name):
129 print 'No most recent build available: %s::%s' % (waterfall_name, bot_name)
131 class Waterfall:
132 BASE_URL = 'http://build.chromium.org/p/'
133 BASE_BUILD_URL = BASE_URL + '%s/builders/%s'
134 SPECIFIC_BUILD_URL = BASE_URL + '%s/builders/%s/builds/%s'
135 BASE_JSON_BUILDERS_URL = BASE_URL + '%s/json/builders'
136 BASE_JSON_BUILDS_URL = BASE_URL + '%s/json/builders/%s/builds'
137 REGULAR_WATERFALLS = ['chromium.gpu',
138 'tryserver.chromium.gpu',
139 'chromium.gpu.fyi']
140 WEBKIT_GPU_BOTS = ['GPU Win Builder',
141 'GPU Win Builder (dbg)',
142 'GPU Win7 (NVIDIA)',
143 'GPU Win7 (dbg) (NVIDIA)',
144 'GPU Mac Builder',
145 'GPU Mac Builder (dbg)',
146 'GPU Mac10.7',
147 'GPU Mac10.7 (dbg)',
148 'GPU Linux Builder',
149 'GPU Linux Builder (dbg)',
150 'GPU Linux (NVIDIA)',
151 'GPU Linux (dbg) (NVIDIA)']
152 FILTERED_WATERFALLS = [('chromium.webkit', WEBKIT_GPU_BOTS)]
154 @staticmethod
155 def getJsonFromUrl(url):
156 conn = urllib2.urlopen(url)
157 result = conn.read()
158 conn.close()
159 return json.loads(result)
161 @staticmethod
162 def getBuildersJsonForWaterfall(waterfall):
163 querystring = '?filter'
164 return (Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
165 % (waterfall, querystring)))
167 @staticmethod
168 def getLastNBuildsForBuilder(n, waterfall, builder):
169 if n <= 0:
170 return {}
172 querystring = '?'
174 for i in range(n):
175 querystring += 'select=-%d&' % (i + 1)
177 querystring += 'filter'
179 return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDS_URL + '%s') %
180 (waterfall, urllib.quote(builder), querystring))
182 @staticmethod
183 def getFilteredBuildersJsonForWaterfall(waterfall, filter):
184 querystring = '?'
186 for bot_name in filter:
187 querystring += 'select=%s&' % urllib.quote(bot_name)
189 querystring += 'filter'
191 return Waterfall.getJsonFromUrl((Waterfall.BASE_JSON_BUILDERS_URL + '%s')
192 % (waterfall, querystring))
194 @staticmethod
195 def getAllGpuBots():
196 allbots = {k: Waterfall.getBuildersJsonForWaterfall(k)
197 for k in Waterfall.REGULAR_WATERFALLS}
199 filteredbots = {k[0]:
200 Waterfall.getFilteredBuildersJsonForWaterfall(k[0], k[1])
201 for k in Waterfall.FILTERED_WATERFALLS}
203 allbots.update(filteredbots)
205 return allbots
207 @staticmethod
208 def getOfflineBots(bots):
209 offline_bots = []
211 for waterfall_name in bots:
212 waterfall = bots[waterfall_name]
214 for bot_name in waterfall:
215 bot = waterfall[bot_name]
217 if bot['state'] != 'offline':
218 continue
220 gpu_bot = GpuBot(waterfall_name, bot_name, bot)
221 gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
222 urllib.quote(bot_name))
224 most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
225 gpu_bot)
227 if (most_recent_build and 'times' in most_recent_build and
228 most_recent_build['times']):
229 gpu_bot.setEndTime(time.localtime(most_recent_build['times'][1]))
230 else:
231 errorNoMostRecentBuild(waterfall_name, bot_name)
233 offline_bots.append(gpu_bot)
235 return offline_bots
237 @staticmethod
238 def getMostRecentlyCompletedBuildForBot(bot):
239 if bot.bot_data is not None and 'most_recent_build' in bot.bot_data:
240 return bot.bot_data['most_recent_build']
242 # Unfortunately, the JSON API doesn't provide a "most recent completed
243 # build" call. We just have to get some number of the most recent (including
244 # current, in-progress builds) and give up if that's not enough.
245 NUM_BUILDS = 10
246 builds = Waterfall.getLastNBuildsForBuilder(NUM_BUILDS, bot.waterfall_name,
247 bot.bot_name)
249 for i in range(NUM_BUILDS):
250 current_build_name = '-%d' % (i + 1)
251 current_build = builds[current_build_name]
253 if 'results' in current_build and current_build['results'] is not None:
254 if bot.bot_data is not None:
255 bot.bot_data['most_recent_build'] = current_build
257 return current_build
259 return None
261 @staticmethod
262 def getFailedBots(bots):
263 failed_bots = []
265 for waterfall_name in bots:
266 waterfall = bots[waterfall_name]
268 for bot_name in waterfall:
269 bot = waterfall[bot_name]
270 gpu_bot = GpuBot(waterfall_name, bot_name, bot)
271 gpu_bot.bot_url = Waterfall.BASE_BUILD_URL % (waterfall_name,
272 urllib.quote(bot_name))
274 most_recent_build = Waterfall.getMostRecentlyCompletedBuildForBot(
275 gpu_bot)
277 if (most_recent_build and 'text' in most_recent_build and
278 'failed' in most_recent_build['text']):
279 gpu_bot.failure_string = ' '.join(most_recent_build['text'])
280 gpu_bot.build_url = Waterfall.SPECIFIC_BUILD_URL % (waterfall_name,
281 urllib.quote(bot_name), most_recent_build['number'])
282 failed_bots.append(gpu_bot)
283 elif not most_recent_build:
284 errorNoMostRecentBuild(waterfall_name, bot_name)
286 return failed_bots
288 def formatTime(t):
289 return time.strftime("%a, %d %b %Y %H:%M:%S", t)
291 def roughTimeDiffInHours(t1, t2):
292 datetimes = []
294 for t in [t1, t2]:
295 datetimes.append(datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday,
296 t.tm_hour, t.tm_min, t.tm_sec))
298 datetime_diff = datetimes[0] - datetimes[1]
300 hours = float(datetime_diff.total_seconds()) / 3600.0
302 return abs(hours)
304 def getBotStr(bot):
305 s = ' %s::%s\n' % (bot.waterfall_name, bot.bot_name)
307 if bot.failure_string is not None:
308 s += ' failure: %s\n' % bot.failure_string
310 if bot.getEndTime() is not None:
311 s += (' last build end time: %s (roughly %f hours ago)\n' %
312 (formatTime(bot.getEndTime()), bot.getHoursSinceLastRun()))
314 if bot.bot_url is not None:
315 s += ' bot url: %s\n' % bot.bot_url
317 if bot.build_url is not None:
318 s += ' build url: %s\n' % bot.build_url
320 s += '\n'
321 return s
323 def getBotsStr(bots):
324 s = ''
326 for bot in bots:
327 s += getBotStr(bot)
329 s += '\n'
330 return s
332 def getOfflineBotsStr(offline_bots):
333 return 'Offline bots:\n%s' % getBotsStr(offline_bots)
335 def getFailedBotsStr(failed_bots):
336 return 'Failed bots:\n%s' % getBotsStr(failed_bots)
338 def getBotDicts(bots):
339 dicts = []
341 for bot in bots:
342 dicts.append(bot.toDict())
344 return dicts
346 def unserializeTime(t):
347 return time.struct_time((t['year'], t['mon'], t['day'], t['hour'], t['min'],
348 t['sec'], 0, 0, 0))
350 def serialTime(t):
351 return {'year': t.tm_year, 'mon': t.tm_mon, 'day': t.tm_mday,
352 'hour': t.tm_hour, 'min': t.tm_min, 'sec': t.tm_sec}
354 def getSummary(offline_bots, failed_bots):
355 offline_bot_dict = getBotDicts(offline_bots)
356 failed_bot_dict = getBotDicts(failed_bots)
357 return {'offline': offline_bot_dict, 'failed': failed_bot_dict}
359 def findBot(name, lst):
360 for bot in lst:
361 if bot.bot_name == name:
362 return bot
364 return None
366 def getNoteworthyEvents(offline_bots, failed_bots, previous_results):
367 CRITICAL_NUM_HOURS = 1.0
369 previous_offline = (previous_results['offline'] if 'offline'
370 in previous_results else [])
372 previous_failures = (previous_results['failed'] if 'failed'
373 in previous_results else [])
375 noteworthy_offline = []
376 for bot in offline_bots:
377 if bot.getHoursSinceLastRun() >= CRITICAL_NUM_HOURS:
378 previous_bot = findBot(bot.bot_name, previous_offline)
380 if (previous_bot is None or
381 previous_bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS):
382 noteworthy_offline.append(bot)
384 noteworthy_new_failures = []
385 for bot in failed_bots:
386 previous_bot = findBot(bot.bot_name, previous_failures)
388 if previous_bot is None:
389 noteworthy_new_failures.append(bot)
391 noteworthy_new_offline_recoveries = []
392 for bot in previous_offline:
393 if bot.getHoursSinceLastRun() < CRITICAL_NUM_HOURS:
394 continue
396 current_bot = findBot(bot.bot_name, offline_bots)
397 if current_bot is None:
398 noteworthy_new_offline_recoveries.append(bot)
400 noteworthy_new_failure_recoveries = []
401 for bot in previous_failures:
402 current_bot = findBot(bot.bot_name, failed_bots)
404 if current_bot is None:
405 noteworthy_new_failure_recoveries.append(bot)
407 return {'offline': noteworthy_offline, 'failed': noteworthy_new_failures,
408 'recovered_failures': noteworthy_new_failure_recoveries,
409 'recovered_offline': noteworthy_new_offline_recoveries}
411 def getNoteworthyStr(noteworthy_events):
412 s = ''
414 if noteworthy_events['offline']:
415 s += 'IMPORTANT bots newly offline for over an hour:\n'
417 for bot in noteworthy_events['offline']:
418 s += getBotStr(bot)
420 s += '\n'
422 if noteworthy_events['failed']:
423 s += 'IMPORTANT new failing bots:\n'
425 for bot in noteworthy_events['failed']:
426 s += getBotStr(bot)
428 s += '\n'
430 if noteworthy_events['recovered_offline']:
431 s += 'IMPORTANT newly recovered previously offline bots:\n'
433 for bot in noteworthy_events['recovered_offline']:
434 s += getBotStr(bot)
436 s += '\n'
438 if noteworthy_events['recovered_failures']:
439 s += 'IMPORTANT newly recovered failing bots:\n'
441 for bot in noteworthy_events['recovered_failures']:
442 s += getBotStr(bot)
444 s += '\n'
446 return s
448 def dictsToBots(bots):
449 offline_bots = []
450 for bot in bots['offline']:
451 offline_bots.append(GpuBot.fromDict(bot))
453 failed_bots = []
454 for bot in bots['failed']:
455 failed_bots.append(GpuBot.fromDict(bot))
457 return {'offline': offline_bots, 'failed': failed_bots}
459 class GpuBotPoller:
460 DEFAULT_PREVIOUS_RESULTS_FILE = '.check_gpu_bots_previous_results'
462 def __init__(self, emailer, send_email_for_recovered_offline_bots,
463 send_email_for_recovered_failing_bots, send_email_on_error,
464 previous_results_file):
465 self.emailer = emailer
467 self.send_email_for_recovered_offline_bots = \
468 send_email_for_recovered_offline_bots
470 self.send_email_for_recovered_failing_bots = \
471 send_email_for_recovered_failing_bots
473 self.send_email_on_error = send_email_on_error
474 self.previous_results_file = previous_results_file
476 def shouldEmail(self, noteworthy_events):
477 if noteworthy_events['offline'] or noteworthy_events['failed']:
478 return True
480 if (self.send_email_for_recovered_offline_bots and
481 noteworthy_events['recovered_offline']):
482 return True
484 if (self.send_email_for_recovered_failing_bots and
485 noteworthy_events['recovered_failures']):
486 return True
488 return False
490 def writeResults(self, summary):
491 results_file = (self.previous_results_file
492 if self.previous_results_file is not None
493 else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
495 with open(results_file, 'w') as f:
496 f.write(json.dumps(summary))
498 def getPreviousResults(self):
499 previous_results_file = (self.previous_results_file
500 if self.previous_results_file is not None
501 else GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE)
503 previous_results = {}
504 if os.path.isfile(previous_results_file):
505 with open(previous_results_file, 'r') as f:
506 previous_results = dictsToBots(json.loads(f.read()))
508 return previous_results
510 def checkBots(self):
511 time_str = 'Current time: %s\n\n' % (formatTime(time.localtime()))
512 print time_str
514 try:
515 bots = Waterfall.getAllGpuBots()
517 offline_bots = Waterfall.getOfflineBots(bots)
518 offline_str = getOfflineBotsStr(offline_bots)
519 print offline_str
521 failed_bots = Waterfall.getFailedBots(bots)
522 failed_str = getFailedBotsStr(failed_bots)
523 print failed_str
525 previous_results = self.getPreviousResults()
526 noteworthy_events = getNoteworthyEvents(offline_bots, failed_bots,
527 previous_results)
529 noteworthy_str = getNoteworthyStr(noteworthy_events)
530 print noteworthy_str
532 summary = getSummary(offline_bots, failed_bots)
533 self.writeResults(summary)
535 if (self.emailer is not None and self.shouldEmail(noteworthy_events)):
536 self.emailer.send_email(Emailer.format_email_body(time_str, offline_str,
537 failed_str, noteworthy_str))
538 except Exception as e:
539 error_str = 'Error: %s' % str(e)
540 print error_str
542 if self.send_email_on_error:
543 self.emailer.send_email(error_str)
545 def parseArgs(sys_args):
546 parser = argparse.ArgumentParser(prog=sys_args[0],
547 description='Query the Chromium GPU Bots Waterfall, output ' +
548 'potential problems, and optionally repeat automatically and/or ' +
549 'email notifications of results.')
551 parser.add_argument('--repeat-delay', type=int, dest='repeat_delay',
552 required=False,
553 help='How often to automatically re-run the script, in minutes.')
555 parser.add_argument('--email-from', type=str, dest='email_from',
556 required=False,
557 help='Email address to send from. Requires also specifying ' +
558 '\'--email-to\'.')
560 parser.add_argument('--email-to', type=str, dest='email_to', required=False,
561 nargs='+',
562 help='Email address(es) to send to. Requires also specifying ' +
563 '\'--email-from\'')
565 parser.add_argument('--send-email-for-recovered-offline-bots',
566 dest='send_email_for_recovered_offline_bots', action='store_true',
567 default=False,
568 help='Send an email out when a bot which has been offline for more ' +
569 'than 1 hour goes back online.')
571 parser.add_argument('--send-email-for-recovered-failing-bots',
572 dest='send_email_for_recovered_failing_bots',
573 action='store_true', default=False,
574 help='Send an email when a failing bot recovers.')
576 parser.add_argument('--send-email-on-error',
577 dest='send_email_on_error',
578 action='store_true', default=False,
579 help='Send an email when the script has an error. For example, if ' +
580 'the server is unreachable.')
582 parser.add_argument('--email-password-file',
583 dest='email_password_file',
584 required=False,
585 help=(('File containing the plaintext password of the source email ' +
586 'account. By default, \'%s\' will be tried. If it does not exist, ' +
587 'you will be prompted. If you opt to store your password on disk ' +
588 'in plaintext, use of a dummy account is strongly recommended.')
589 % Emailer.DEFAULT_EMAIL_PASSWORD_FILE))
591 parser.add_argument('--previous-results-file',
592 dest='previous_results_file',
593 required=False,
594 help=(('File to store the results of the previous invocation of ' +
595 'this script. By default, \'%s\' will be used.')
596 % GpuBotPoller.DEFAULT_PREVIOUS_RESULTS_FILE))
598 args = parser.parse_args(sys_args[1:])
600 if args.email_from is not None and args.email_to is None:
601 parser.error('--email-from requires --email-to.')
602 elif args.email_to is not None and args.email_from is None:
603 parser.error('--email-to requires --email-from.')
604 elif args.email_from is None and args.send_email_for_recovered_offline_bots:
605 parser.error('--send-email-for-recovered-offline-bots requires ' +
606 '--email-to and --email-from.')
607 elif (args.email_from is None and args.send_email_for_recovered_failing_bots):
608 parser.error('--send-email-for-recovered-failing-bots ' +
609 'requires --email-to and --email-from.')
610 elif (args.email_from is None and args.send_email_on_error):
611 parser.error('--send-email-on-error ' +
612 'requires --email-to and --email-from.')
613 elif (args.email_password_file and
614 not os.path.isfile(args.email_password_file)):
615 parser.error('File does not exist: %s' % args.email_password_file)
617 return args
619 def main(sys_args):
620 args = parseArgs(sys_args)
622 emailer = None
623 if args.email_from is not None and args.email_to is not None:
624 emailer = Emailer(args.email_from, args.email_to, args.email_password_file)
626 try:
627 emailer.testEmailLogin()
628 except Exception as e:
629 print 'Error logging into email account: %s' % str(e)
630 return 1
632 poller = GpuBotPoller(emailer,
633 args.send_email_for_recovered_offline_bots,
634 args.send_email_for_recovered_failing_bots,
635 args.send_email_on_error,
636 args.previous_results_file)
638 while True:
639 poller.checkBots()
641 if args.repeat_delay is None:
642 break
644 print 'Will run again in %d minutes...\n' % args.repeat_delay
645 time.sleep(args.repeat_delay * 60)
647 return 0
649 if __name__ == '__main__':
650 sys.exit(main(sys.argv))