3 # Copyright 2014 The Chromium Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
19 DEFAULT_EMAIL_PASSWORD_FILE
= '.email_password'
20 GMAIL_SMTP_SERVER
= 'smtp.gmail.com:587'
21 SUBJECT
= 'Chrome GPU Bots Notification'
23 def __init__(self
, email_from
, email_to
, email_password_file
):
24 self
.email_from
= email_from
25 self
.email_to
= email_to
26 self
.email_password
= Emailer
._getEmailPassword
(email_password_file
)
29 def format_email_body(time_str
, offline_str
, failed_str
, noteworthy_str
):
30 return '%s%s%s%s' % (time_str
, offline_str
, failed_str
, noteworthy_str
)
32 def send_email(self
, body
):
33 message
= 'From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s' % (self
.email_from
,
34 ','.join(self
.email_to
), Emailer
.SUBJECT
, body
)
37 server
= smtplib
.SMTP(Emailer
.GMAIL_SMTP_SERVER
)
39 server
.login(self
.email_from
, self
.email_password
)
40 server
.sendmail(self
.email_from
, self
.email_to
, message
)
42 except Exception as e
:
43 print 'Error sending email: %s' % str(e
)
45 def testEmailLogin(self
):
46 server
= smtplib
.SMTP(Emailer
.GMAIL_SMTP_SERVER
)
48 server
.login(self
.email_from
, self
.email_password
)
52 def _getEmailPassword(email_password_file
):
55 password_file
= (email_password_file
if email_password_file
is not None
56 else Emailer
.DEFAULT_EMAIL_PASSWORD_FILE
)
58 if os
.path
.isfile(password_file
):
59 with
open(password_file
, 'r') as f
:
60 password
= f
.read().strip()
62 password
= getpass
.getpass(
63 'Please enter email password for source email account: ')
68 def __init__(self
, waterfall_name
, bot_name
, bot_data
):
69 self
.waterfall_name
= waterfall_name
70 self
.bot_name
= bot_name
71 self
.bot_data
= bot_data
73 self
._hours
_since
_last
_run
= None
74 self
.failure_string
= None
81 def setEndTime(self
, end_time
):
82 self
._end
_time
= end_time
83 self
._hours
_since
_last
_run
= \
84 roughTimeDiffInHours(end_time
, time
.localtime())
86 def getHoursSinceLastRun(self
):
87 return self
._hours
_since
_last
_run
90 dict = {'waterfall_name': self
.waterfall_name
, 'bot_name': self
.bot_name
}
92 if self
._end
_time
is not None:
93 dict['end_time'] = serialTime(self
._end
_time
)
94 dict['hours_since_last_run'] = self
._hours
_since
_last
_run
96 if self
.failure_string
is not None:
97 dict['failure_string'] = self
.failure_string
99 if self
.bot_url
is not None:
100 dict['bot_url'] = self
.bot_url
102 if self
.build_url
is not None:
103 dict['build_url'] = self
.build_url
109 gpu_bot
= GpuBot(dict['waterfall_name'], dict['bot_name'], None)
111 if 'end_time' in dict:
112 gpu_bot
._end
_time
= unserializeTime(dict['end_time'])
114 if 'hours_since_last_run' in dict:
115 gpu_bot
._hours
_since
_last
_run
= dict['hours_since_last_run']
117 if 'failure_string' in dict:
118 gpu_bot
.failure_string
= dict['failure_string']
120 if 'bot_url' in dict:
121 gpu_bot
.bot_url
= dict['bot_url']
123 if 'build_url' in dict:
124 gpu_bot
.build_url
= dict['build_url']
128 def errorNoMostRecentBuild(waterfall_name
, bot_name
):
129 print 'No most recent build available: %s::%s' % (waterfall_name
, bot_name
)
132 BASE_URL
= 'http://build.chromium.org/p/'
133 BASE_BUILD_URL
= BASE_URL
+ '%s/builders/%s'
134 SPECIFIC_BUILD_URL
= BASE_URL
+ '%s/builders/%s/builds/%s'
135 BASE_JSON_BUILDERS_URL
= BASE_URL
+ '%s/json/builders'
136 BASE_JSON_BUILDS_URL
= BASE_URL
+ '%s/json/builders/%s/builds'
137 REGULAR_WATERFALLS
= ['chromium.gpu', 'chromium.gpu.fyi']
138 WEBKIT_GPU_BOTS
= ['GPU Win Builder',
139 'GPU Win Builder (dbg)',
141 'GPU Win7 (dbg) (NVIDIA)',
143 'GPU Mac Builder (dbg)',
147 'GPU Linux Builder (dbg)',
148 'GPU Linux (NVIDIA)',
149 'GPU Linux (dbg) (NVIDIA)']
150 FILTERED_WATERFALLS
= [('chromium.webkit', WEBKIT_GPU_BOTS
)]
153 def getJsonFromUrl(url
):
154 conn
= urllib2
.urlopen(url
)
157 return json
.loads(result
)
160 def getBuildersJsonForWaterfall(waterfall
):
161 querystring
= '?filter'
162 return (Waterfall
.getJsonFromUrl((Waterfall
.BASE_JSON_BUILDERS_URL
+ '%s')
163 % (waterfall
, querystring
)))
166 def getLastNBuildsForBuilder(n
, waterfall
, builder
):
173 querystring
+= 'select=-%d&' % (i
+ 1)
175 querystring
+= 'filter'
177 return Waterfall
.getJsonFromUrl((Waterfall
.BASE_JSON_BUILDS_URL
+ '%s') %
178 (waterfall
, urllib
.quote(builder
), querystring
))
181 def getFilteredBuildersJsonForWaterfall(waterfall
, filter):
184 for bot_name
in filter:
185 querystring
+= 'select=%s&' % urllib
.quote(bot_name
)
187 querystring
+= 'filter'
189 return Waterfall
.getJsonFromUrl((Waterfall
.BASE_JSON_BUILDERS_URL
+ '%s')
190 % (waterfall
, querystring
))
194 allbots
= {k
: Waterfall
.getBuildersJsonForWaterfall(k
)
195 for k
in Waterfall
.REGULAR_WATERFALLS
}
197 filteredbots
= {k
[0]:
198 Waterfall
.getFilteredBuildersJsonForWaterfall(k
[0], k
[1])
199 for k
in Waterfall
.FILTERED_WATERFALLS
}
201 allbots
.update(filteredbots
)
206 def getOfflineBots(bots
):
209 for waterfall_name
in bots
:
210 waterfall
= bots
[waterfall_name
]
212 for bot_name
in waterfall
:
213 bot
= waterfall
[bot_name
]
215 if bot
['state'] != 'offline':
218 gpu_bot
= GpuBot(waterfall_name
, bot_name
, bot
)
219 gpu_bot
.bot_url
= Waterfall
.BASE_BUILD_URL
% (waterfall_name
,
220 urllib
.quote(bot_name
))
222 most_recent_build
= Waterfall
.getMostRecentlyCompletedBuildForBot(
225 if (most_recent_build
and 'times' in most_recent_build
and
226 most_recent_build
['times']):
227 gpu_bot
.setEndTime(time
.localtime(most_recent_build
['times'][1]))
229 errorNoMostRecentBuild(waterfall_name
, bot_name
)
231 offline_bots
.append(gpu_bot
)
236 def getMostRecentlyCompletedBuildForBot(bot
):
237 if bot
.bot_data
is not None and 'most_recent_build' in bot
.bot_data
:
238 return bot
.bot_data
['most_recent_build']
240 # Unfortunately, the JSON API doesn't provide a "most recent completed
241 # build" call. We just have to get some number of the most recent (including
242 # current, in-progress builds) and give up if that's not enough.
244 builds
= Waterfall
.getLastNBuildsForBuilder(NUM_BUILDS
, bot
.waterfall_name
,
247 for i
in range(NUM_BUILDS
):
248 current_build_name
= '-%d' % (i
+ 1)
249 current_build
= builds
[current_build_name
]
251 if 'results' in current_build
and current_build
['results'] is not None:
252 if bot
.bot_data
is not None:
253 bot
.bot_data
['most_recent_build'] = current_build
260 def getFailedBots(bots
):
263 for waterfall_name
in bots
:
264 waterfall
= bots
[waterfall_name
]
266 for bot_name
in waterfall
:
267 bot
= waterfall
[bot_name
]
268 gpu_bot
= GpuBot(waterfall_name
, bot_name
, bot
)
269 gpu_bot
.bot_url
= Waterfall
.BASE_BUILD_URL
% (waterfall_name
,
270 urllib
.quote(bot_name
))
272 most_recent_build
= Waterfall
.getMostRecentlyCompletedBuildForBot(
275 if (most_recent_build
and 'text' in most_recent_build
and
276 'failed' in most_recent_build
['text']):
277 gpu_bot
.failure_string
= ' '.join(most_recent_build
['text'])
278 gpu_bot
.build_url
= Waterfall
.SPECIFIC_BUILD_URL
% (waterfall_name
,
279 urllib
.quote(bot_name
), most_recent_build
['number'])
280 failed_bots
.append(gpu_bot
)
281 elif not most_recent_build
:
282 errorNoMostRecentBuild(waterfall_name
, bot_name
)
287 return time
.strftime("%a, %d %b %Y %H:%M:%S", t
)
289 def roughTimeDiffInHours(t1
, t2
):
293 datetimes
.append(datetime
.datetime(t
.tm_year
, t
.tm_mon
, t
.tm_mday
,
294 t
.tm_hour
, t
.tm_min
, t
.tm_sec
))
296 datetime_diff
= datetimes
[0] - datetimes
[1]
298 hours
= float(datetime_diff
.total_seconds()) / 3600.0
303 s
= ' %s::%s\n' % (bot
.waterfall_name
, bot
.bot_name
)
305 if bot
.failure_string
is not None:
306 s
+= ' failure: %s\n' % bot
.failure_string
308 if bot
.getEndTime() is not None:
309 s
+= (' last build end time: %s (roughly %f hours ago)\n' %
310 (formatTime(bot
.getEndTime()), bot
.getHoursSinceLastRun()))
312 if bot
.bot_url
is not None:
313 s
+= ' bot url: %s\n' % bot
.bot_url
315 if bot
.build_url
is not None:
316 s
+= ' build url: %s\n' % bot
.build_url
321 def getBotsStr(bots
):
330 def getOfflineBotsStr(offline_bots
):
331 return 'Offline bots:\n%s' % getBotsStr(offline_bots
)
333 def getFailedBotsStr(failed_bots
):
334 return 'Failed bots:\n%s' % getBotsStr(failed_bots
)
336 def getBotDicts(bots
):
340 dicts
.append(bot
.toDict())
344 def unserializeTime(t
):
345 return time
.struct_time((t
['year'], t
['mon'], t
['day'], t
['hour'], t
['min'],
349 return {'year': t
.tm_year
, 'mon': t
.tm_mon
, 'day': t
.tm_mday
,
350 'hour': t
.tm_hour
, 'min': t
.tm_min
, 'sec': t
.tm_sec
}
352 def getSummary(offline_bots
, failed_bots
):
353 offline_bot_dict
= getBotDicts(offline_bots
)
354 failed_bot_dict
= getBotDicts(failed_bots
)
355 return {'offline': offline_bot_dict
, 'failed': failed_bot_dict
}
357 def findBot(name
, lst
):
359 if bot
.bot_name
== name
:
364 def getNoteworthyEvents(offline_bots
, failed_bots
, previous_results
):
365 CRITICAL_NUM_HOURS
= 1.0
367 previous_offline
= (previous_results
['offline'] if 'offline'
368 in previous_results
else [])
370 previous_failures
= (previous_results
['failed'] if 'failed'
371 in previous_results
else [])
373 noteworthy_offline
= []
374 for bot
in offline_bots
:
375 if bot
.getHoursSinceLastRun() >= CRITICAL_NUM_HOURS
:
376 previous_bot
= findBot(bot
.bot_name
, previous_offline
)
378 if (previous_bot
is None or
379 previous_bot
.getHoursSinceLastRun() < CRITICAL_NUM_HOURS
):
380 noteworthy_offline
.append(bot
)
382 noteworthy_new_failures
= []
383 for bot
in failed_bots
:
384 previous_bot
= findBot(bot
.bot_name
, previous_failures
)
386 if previous_bot
is None:
387 noteworthy_new_failures
.append(bot
)
389 noteworthy_new_offline_recoveries
= []
390 for bot
in previous_offline
:
391 if bot
.getHoursSinceLastRun() < CRITICAL_NUM_HOURS
:
394 current_bot
= findBot(bot
.bot_name
, offline_bots
)
395 if current_bot
is None:
396 noteworthy_new_offline_recoveries
.append(bot
)
398 noteworthy_new_failure_recoveries
= []
399 for bot
in previous_failures
:
400 current_bot
= findBot(bot
.bot_name
, failed_bots
)
402 if current_bot
is None:
403 noteworthy_new_failure_recoveries
.append(bot
)
405 return {'offline': noteworthy_offline
, 'failed': noteworthy_new_failures
,
406 'recovered_failures': noteworthy_new_failure_recoveries
,
407 'recovered_offline': noteworthy_new_offline_recoveries
}
409 def getNoteworthyStr(noteworthy_events
):
412 if noteworthy_events
['offline']:
413 s
+= 'IMPORTANT bots newly offline for over an hour:\n'
415 for bot
in noteworthy_events
['offline']:
420 if noteworthy_events
['failed']:
421 s
+= 'IMPORTANT new failing bots:\n'
423 for bot
in noteworthy_events
['failed']:
428 if noteworthy_events
['recovered_offline']:
429 s
+= 'IMPORTANT newly recovered previously offline bots:\n'
431 for bot
in noteworthy_events
['recovered_offline']:
436 if noteworthy_events
['recovered_failures']:
437 s
+= 'IMPORTANT newly recovered failing bots:\n'
439 for bot
in noteworthy_events
['recovered_failures']:
446 def dictsToBots(bots
):
448 for bot
in bots
['offline']:
449 offline_bots
.append(GpuBot
.fromDict(bot
))
452 for bot
in bots
['failed']:
453 failed_bots
.append(GpuBot
.fromDict(bot
))
455 return {'offline': offline_bots
, 'failed': failed_bots
}
458 DEFAULT_PREVIOUS_RESULTS_FILE
= '.check_gpu_bots_previous_results'
460 def __init__(self
, emailer
, send_email_for_recovered_offline_bots
,
461 send_email_for_recovered_failing_bots
, send_email_on_error
,
462 previous_results_file
):
463 self
.emailer
= emailer
465 self
.send_email_for_recovered_offline_bots
= \
466 send_email_for_recovered_offline_bots
468 self
.send_email_for_recovered_failing_bots
= \
469 send_email_for_recovered_failing_bots
471 self
.send_email_on_error
= send_email_on_error
472 self
.previous_results_file
= previous_results_file
474 def shouldEmail(self
, noteworthy_events
):
475 if noteworthy_events
['offline'] or noteworthy_events
['failed']:
478 if (self
.send_email_for_recovered_offline_bots
and
479 noteworthy_events
['recovered_offline']):
482 if (self
.send_email_for_recovered_failing_bots
and
483 noteworthy_events
['recovered_failures']):
488 def writeResults(self
, summary
):
489 results_file
= (self
.previous_results_file
490 if self
.previous_results_file
is not None
491 else GpuBotPoller
.DEFAULT_PREVIOUS_RESULTS_FILE
)
493 with
open(results_file
, 'w') as f
:
494 f
.write(json
.dumps(summary
))
496 def getPreviousResults(self
):
497 previous_results_file
= (self
.previous_results_file
498 if self
.previous_results_file
is not None
499 else GpuBotPoller
.DEFAULT_PREVIOUS_RESULTS_FILE
)
501 previous_results
= {}
502 if os
.path
.isfile(previous_results_file
):
503 with
open(previous_results_file
, 'r') as f
:
504 previous_results
= dictsToBots(json
.loads(f
.read()))
506 return previous_results
509 time_str
= 'Current time: %s\n\n' % (formatTime(time
.localtime()))
513 bots
= Waterfall
.getAllGpuBots()
515 offline_bots
= Waterfall
.getOfflineBots(bots
)
516 offline_str
= getOfflineBotsStr(offline_bots
)
519 failed_bots
= Waterfall
.getFailedBots(bots
)
520 failed_str
= getFailedBotsStr(failed_bots
)
523 previous_results
= self
.getPreviousResults()
524 noteworthy_events
= getNoteworthyEvents(offline_bots
, failed_bots
,
527 noteworthy_str
= getNoteworthyStr(noteworthy_events
)
530 summary
= getSummary(offline_bots
, failed_bots
)
531 self
.writeResults(summary
)
533 if (self
.emailer
is not None and self
.shouldEmail(noteworthy_events
)):
534 self
.emailer
.send_email(Emailer
.format_email_body(time_str
, offline_str
,
535 failed_str
, noteworthy_str
))
536 except Exception as e
:
537 error_str
= 'Error: %s' % str(e
)
540 if self
.send_email_on_error
:
541 self
.emailer
.send_email(error_str
)
543 def parseArgs(sys_args
):
544 parser
= argparse
.ArgumentParser(prog
=sys_args
[0],
545 description
='Query the Chromium GPU Bots Waterfall, output ' +
546 'potential problems, and optionally repeat automatically and/or ' +
547 'email notifications of results.')
549 parser
.add_argument('--repeat-delay', type=int, dest
='repeat_delay',
551 help='How often to automatically re-run the script, in minutes.')
553 parser
.add_argument('--email-from', type=str, dest
='email_from',
555 help='Email address to send from. Requires also specifying ' +
558 parser
.add_argument('--email-to', type=str, dest
='email_to', required
=False,
560 help='Email address(es) to send to. Requires also specifying ' +
563 parser
.add_argument('--send-email-for-recovered-offline-bots',
564 dest
='send_email_for_recovered_offline_bots', action
='store_true',
566 help='Send an email out when a bot which has been offline for more ' +
567 'than 1 hour goes back online.')
569 parser
.add_argument('--send-email-for-recovered-failing-bots',
570 dest
='send_email_for_recovered_failing_bots',
571 action
='store_true', default
=False,
572 help='Send an email when a failing bot recovers.')
574 parser
.add_argument('--send-email-on-error',
575 dest
='send_email_on_error',
576 action
='store_true', default
=False,
577 help='Send an email when the script has an error. For example, if ' +
578 'the server is unreachable.')
580 parser
.add_argument('--email-password-file',
581 dest
='email_password_file',
583 help=(('File containing the plaintext password of the source email ' +
584 'account. By default, \'%s\' will be tried. If it does not exist, ' +
585 'you will be prompted. If you opt to store your password on disk ' +
586 'in plaintext, use of a dummy account is strongly recommended.')
587 % Emailer
.DEFAULT_EMAIL_PASSWORD_FILE
))
589 parser
.add_argument('--previous-results-file',
590 dest
='previous_results_file',
592 help=(('File to store the results of the previous invocation of ' +
593 'this script. By default, \'%s\' will be used.')
594 % GpuBotPoller
.DEFAULT_PREVIOUS_RESULTS_FILE
))
596 args
= parser
.parse_args(sys_args
[1:])
598 if args
.email_from
is not None and args
.email_to
is None:
599 parser
.error('--email-from requires --email-to.')
600 elif args
.email_to
is not None and args
.email_from
is None:
601 parser
.error('--email-to requires --email-from.')
602 elif args
.email_from
is None and args
.send_email_for_recovered_offline_bots
:
603 parser
.error('--send-email-for-recovered-offline-bots requires ' +
604 '--email-to and --email-from.')
605 elif (args
.email_from
is None and args
.send_email_for_recovered_failing_bots
):
606 parser
.error('--send-email-for-recovered-failing-bots ' +
607 'requires --email-to and --email-from.')
608 elif (args
.email_from
is None and args
.send_email_on_error
):
609 parser
.error('--send-email-on-error ' +
610 'requires --email-to and --email-from.')
611 elif (args
.email_password_file
and
612 not os
.path
.isfile(args
.email_password_file
)):
613 parser
.error('File does not exist: %s' % args
.email_password_file
)
618 args
= parseArgs(sys_args
)
621 if args
.email_from
is not None and args
.email_to
is not None:
622 emailer
= Emailer(args
.email_from
, args
.email_to
, args
.email_password_file
)
625 emailer
.testEmailLogin()
626 except Exception as e
:
627 print 'Error logging into email account: %s' % str(e
)
630 poller
= GpuBotPoller(emailer
,
631 args
.send_email_for_recovered_offline_bots
,
632 args
.send_email_for_recovered_failing_bots
,
633 args
.send_email_on_error
,
634 args
.previous_results_file
)
639 if args
.repeat_delay
is None:
642 print 'Will run again in %d minutes...\n' % args
.repeat_delay
643 time
.sleep(args
.repeat_delay
* 60)
647 if __name__
== '__main__':
648 sys
.exit(main(sys
.argv
))