dev-support/flaky-tests/report-flakies.py

   1 #!/usr/bin/env python
   2 ##
   3 # Licensed to the Apache Software Foundation (ASF) under one
   4 # or more contributor license agreements.  See the NOTICE file
   5 # distributed with this work for additional information
   6 # regarding copyright ownership.  The ASF licenses this file
   7 # to you under the Apache License, Version 2.0 (the
   8 # "License"); you may not use this file except in compliance
   9 # with the License.  You may obtain a copy of the License at
  10 #
  11 #     http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS,
  15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 # See the License for the specific language governing permissions and
  17 # limitations under the License.
  18
  19 # pylint: disable=invalid-name
  20 # To disable 'invalid constant name' warnings.
  21 # pylint: disable=import-error
  22 # Testing environment may not have all dependencies.
  23
  24 """
  25 This script uses Jenkins REST api to collect test result(s) of given build/builds and generates
  26 flakyness data about unittests.
  27 Print help: report-flakies.py -h
  28 """
  29
  30 import argparse
  31 import logging
  32 import os
  33 import time
  34 from collections import OrderedDict
  35 from jinja2 import Template
  36
  37 import requests
  38
  39 import findHangingTests
  40
  41 parser = argparse.ArgumentParser()
  42 parser.add_argument(
  43     '--urls', metavar='URL', action='append', required=True,
  44     help='Urls to analyze, which can refer to simple projects, multi-configuration projects or '
  45          'individual build run.')
  46 parser.add_argument('--excluded-builds', metavar='n1,n2', action='append',
  47                     help='List of build numbers to exclude (or "None"). Not required, '
  48                          'but if specified, number of uses should be same as that of --urls '
  49                          'since the values are matched.')
  50 parser.add_argument('--max-builds', metavar='n', action='append', type=int,
  51                     help='The maximum number of builds to use (if available on jenkins). Specify '
  52                          '0 to analyze all builds. Not required, but if specified, number of uses '
  53                          'should be same as that of --urls since the values are matched.')
  54 parser.add_argument('--is-yetus', metavar='True/False', action='append', choices=['True', 'False'],
  55                     help='True, if build is yetus style i.e. look for maven output in artifacts; '
  56                          'False, if maven output is in <url>/consoleText itself.')
  57 parser.add_argument(
  58     "--mvn", action="store_true",
  59     help="Writes two strings for including/excluding these flaky tests using maven flags. These "
  60          "strings are written to files so they can be saved as artifacts and easily imported in "
  61          "other projects. Also writes timeout and failing tests in separate files for "
  62          "reference.")
  63 parser.add_argument("-v", "--verbose", help="Prints more logs.", action="store_true")
  64 args = parser.parse_args()
  65
  66 logging.basicConfig()
  67 logger = logging.getLogger(__name__)
  68 if args.verbose:
  69     logger.setLevel(logging.INFO)
  70
  71
  72 def get_bad_tests(build_url, is_yetus):
  73     """
  74     Given url of an executed build, analyzes its maven output, and returns
  75     [list of all tests, list of timeout tests, list of failed tests].
  76     Returns None if can't get maven output from the build or if there is any other error.
  77     """
  78     logger.info("Analyzing %s", build_url)
  79     needed_fields="_class,building"
  80     if is_yetus:
  81         needed_fields+=",artifacts[fileName,relativePath]"
  82     response = requests.get(build_url + "/api/json?tree=" + needed_fields).json()
  83     if response["building"]:
  84         logger.info("Skipping this build since it is in progress.")
  85         return {}
  86     console_url = None
  87     if is_yetus:
  88         for artifact in response["artifacts"]:
  89             if artifact["fileName"] == "patch-unit-root.txt":
  90                 console_url = build_url + "/artifact/" + artifact["relativePath"]
  91                 break
  92         if console_url is None:
  93             logger.info("Can't find 'patch-unit-root.txt' artifact for Yetus build %s\n. Ignoring "
  94                         "this build.", build_url)
  95             return
  96     else:
  97         console_url = build_url + "/consoleText"
  98     build_result = findHangingTests.get_bad_tests(console_url)
  99     if not build_result:
 100         logger.info("Ignoring build %s", build_url)
 101         return
 102     return build_result
 103
 104
 105 def expand_multi_config_projects(cli_args):
 106     """
 107     If any url is of type multi-configuration project (i.e. has key 'activeConfigurations'),
 108     get urls for individual jobs.
 109     """
 110     job_urls = cli_args.urls
 111     excluded_builds_arg = cli_args.excluded_builds
 112     max_builds_arg = cli_args.max_builds
 113     is_yetus_arg = cli_args.is_yetus
 114     if excluded_builds_arg is not None and len(excluded_builds_arg) != len(job_urls):
 115         raise Exception("Number of --excluded-builds arguments should be same as that of --urls "
 116                         "since values are matched.")
 117     if max_builds_arg is not None and len(max_builds_arg) != len(job_urls):
 118         raise Exception("Number of --max-builds arguments should be same as that of --urls "
 119                         "since values are matched.")
 120     final_expanded_urls = []
 121     for (i, job_url) in enumerate(job_urls):
 122         max_builds = 10000  # Some high number
 123         is_yetus = False
 124         if is_yetus_arg is not None:
 125             is_yetus = is_yetus_arg[i] == "True"
 126         if max_builds_arg is not None and max_builds_arg[i] != 0:
 127             max_builds = int(max_builds_arg[i])
 128         excluded_builds = []
 129         if excluded_builds_arg is not None and excluded_builds_arg[i] != "None":
 130             excluded_builds = [int(x) for x in excluded_builds_arg[i].split(",")]
 131         request = requests.get(job_url + "/api/json?tree=_class,activeConfigurations%5Burl%5D")
 132         if request.status_code != 200:
 133             raise Exception("Failed to get job information from jenkins for url '" + job_url +
 134                             "'. Jenkins returned HTTP status " + str(request.status_code))
 135         response = request.json()
 136         if response.has_key("activeConfigurations"):
 137             for config in response["activeConfigurations"]:
 138                 final_expanded_urls.append({'url':config["url"], 'max_builds': max_builds,
 139                                             'excludes': excluded_builds, 'is_yetus': is_yetus})
 140         else:
 141             final_expanded_urls.append({'url':job_url, 'max_builds': max_builds,
 142                                         'excludes': excluded_builds, 'is_yetus': is_yetus})
 143     return final_expanded_urls
 144
 145
 146 # Set of timeout/failed tests across all given urls.
 147 all_timeout_tests = set()
 148 all_failed_tests = set()
 149 all_hanging_tests = set()
 150 # Contains { <url> : { <bad_test> : { 'all': [<build ids>], 'failed': [<build ids>],
 151 #                                     'timeout': [<build ids>], 'hanging': [<builds ids>] } } }
 152 url_to_bad_test_results = OrderedDict()
 153 # Contains { <url> : [run_ids] }
 154 # Used for common min/max build ids when generating sparklines.
 155 url_to_build_ids = OrderedDict()
 156
 157 # Iterates over each url, gets test results and prints flaky tests.
 158 expanded_urls = expand_multi_config_projects(args)
 159 for url_max_build in expanded_urls:
 160     url = url_max_build["url"]
 161     excludes = url_max_build["excludes"]
 162     json_response = requests.get(url + "/api/json?tree=id,builds%5Bnumber,url%5D").json()
 163     if json_response.has_key("builds"):
 164         builds = json_response["builds"]
 165         logger.info("Analyzing job: %s", url)
 166     else:
 167         builds = [{'number': json_response["id"], 'url': url}]
 168         logger.info("Analyzing build : %s", url)
 169     build_id_to_results = {}
 170     num_builds = 0
 171     url_to_build_ids[url] = []
 172     build_ids_without_tests_run = []
 173     for build in builds:
 174         build_id = build["number"]
 175         if build_id in excludes:
 176             continue
 177         result = get_bad_tests(build["url"], url_max_build['is_yetus'])
 178         if not result:
 179             continue
 180         if len(result[0]) > 0:
 181             build_id_to_results[build_id] = result
 182         else:
 183             build_ids_without_tests_run.append(build_id)
 184         num_builds += 1
 185         url_to_build_ids[url].append(build_id)
 186         if num_builds == url_max_build["max_builds"]:
 187             break
 188     url_to_build_ids[url].sort()
 189
 190     # Collect list of bad tests.
 191     bad_tests = set()
 192     for build in build_id_to_results:
 193         [_, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
 194         all_timeout_tests.update(timeout_tests)
 195         all_failed_tests.update(failed_tests)
 196         all_hanging_tests.update(hanging_tests)
 197         # Note that timedout tests are already included in failed tests.
 198         bad_tests.update(failed_tests.union(hanging_tests))
 199
 200     # For each bad test, get build ids where it ran, timed out, failed or hanged.
 201     test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(),
 202                                 'hanging' : set(), 'bad_count' : 0}
 203                          for key in bad_tests}
 204     for build in build_id_to_results:
 205         [all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
 206         for bad_test in test_to_build_ids:
 207             is_bad = False
 208             if all_tests.issuperset([bad_test]):
 209                 test_to_build_ids[bad_test]["all"].add(build)
 210             if timeout_tests.issuperset([bad_test]):
 211                 test_to_build_ids[bad_test]['timeout'].add(build)
 212                 is_bad = True
 213             if failed_tests.issuperset([bad_test]):
 214                 test_to_build_ids[bad_test]['failed'].add(build)
 215                 is_bad = True
 216             if hanging_tests.issuperset([bad_test]):
 217                 test_to_build_ids[bad_test]['hanging'].add(build)
 218                 is_bad = True
 219             if is_bad:
 220                 test_to_build_ids[bad_test]['bad_count'] += 1
 221
 222     # Calculate flakyness % and successful builds for each test. Also sort build ids.
 223     for bad_test in test_to_build_ids:
 224         test_result = test_to_build_ids[bad_test]
 225         test_result['flakyness'] = test_result['bad_count'] * 100.0 / len(test_result['all'])
 226         test_result['success'] = (test_result['all'].difference(
 227             test_result['failed'].union(test_result['hanging'])))
 228         for key in ['all', 'timeout', 'failed', 'hanging', 'success']:
 229             test_result[key] = sorted(test_result[key])
 230
 231
 232     # Sort tests in descending order by flakyness.
 233     sorted_test_to_build_ids = OrderedDict(
 234         sorted(test_to_build_ids.iteritems(), key=lambda x: x[1]['flakyness'], reverse=True))
 235     url_to_bad_test_results[url] = sorted_test_to_build_ids
 236
 237     if len(sorted_test_to_build_ids) > 0:
 238         print "URL: {}".format(url)
 239         print "{:>60}  {:10}  {:25}  {}".format(
 240             "Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness")
 241         for bad_test in sorted_test_to_build_ids:
 242             test_status = sorted_test_to_build_ids[bad_test]
 243             print "{:>60}  {:10}  {:7} ( {:4} / {:5} / {:5} )  {:2.0f}%".format(
 244                 bad_test, len(test_status['all']), test_status['bad_count'],
 245                 len(test_status['failed']), len(test_status['timeout']),
 246                 len(test_status['hanging']), test_status['flakyness'])
 247     else:
 248         print "No flaky tests founds."
 249         if len(url_to_build_ids[url]) == len(build_ids_without_tests_run):
 250             print "None of the analyzed builds have test result."
 251
 252     print "Builds analyzed: {}".format(url_to_build_ids[url])
 253     print "Builds without any test runs: {}".format(build_ids_without_tests_run)
 254     print ""
 255
 256
 257 all_bad_tests = all_hanging_tests.union(all_failed_tests)
 258 if args.mvn:
 259     includes = ",".join(all_bad_tests)
 260     with open("./includes", "w") as inc_file:
 261         inc_file.write(includes)
 262
 263     excludes = ["**/{0}.java".format(bad_test) for bad_test in all_bad_tests]
 264     with open("./excludes", "w") as exc_file:
 265         exc_file.write(",".join(excludes))
 266
 267     with open("./timeout", "w") as timeout_file:
 268         timeout_file.write(",".join(all_timeout_tests))
 269
 270     with open("./failed", "w") as failed_file:
 271         failed_file.write(",".join(all_failed_tests))
 272
 273 dev_support_dir = os.path.dirname(os.path.abspath(__file__))
 274 with open(os.path.join(dev_support_dir, "flaky-dashboard-template.html"), "r") as f:
 275     template = Template(f.read())
 276
 277 with open("dashboard.html", "w") as f:
 278     datetime = time.strftime("%m/%d/%Y %H:%M:%S")
 279     f.write(template.render(datetime=datetime, bad_tests_count=len(all_bad_tests),
 280                             results=url_to_bad_test_results, build_ids=url_to_build_ids))