dev-support/flaky-tests/report-flakies.py

   1 #!/usr/bin/env python
   2 ##
   3 # Licensed to the Apache Software Foundation (ASF) under one
   4 # or more contributor license agreements.  See the NOTICE file
   5 # distributed with this work for additional information
   6 # regarding copyright ownership.  The ASF licenses this file
   7 # to you under the Apache License, Version 2.0 (the
   8 # "License"); you may not use this file except in compliance
   9 # with the License.  You may obtain a copy of the License at
  10 #
  11 #     http://www.apache.org/licenses/LICENSE-2.0
  12 #
  13 # Unless required by applicable law or agreed to in writing, software
  14 # distributed under the License is distributed on an "AS IS" BASIS,
  15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16 # See the License for the specific language governing permissions and
  17 # limitations under the License.
  18
  19 # pylint: disable=invalid-name
  20 # To disable 'invalid constant name' warnings.
  21 # pylint: disable=import-error
  22 # Testing environment may not have all dependencies.
  23
  24 """
  25 This script uses Jenkins REST api to collect test result(s) of given build/builds and generates
  26 flakyness data about unittests.
  27 Print help: report-flakies.py -h
  28 """
  29
  30 import argparse
  31 import logging
  32 import os
  33 import time
  34 from collections import OrderedDict
  35 from jinja2 import Template
  36
  37 import requests
  38
  39 import findHangingTests
  40
  41 parser = argparse.ArgumentParser()
  42 parser.add_argument(
  43     '--urls', metavar='URL', action='append', required=True,
  44     help='Urls to analyze, which can refer to simple projects, multi-configuration projects or '
  45          'individual build run.')
  46 parser.add_argument('--excluded-builds', metavar='n1,n2', action='append',
  47                     help='List of build numbers to exclude (or "None"). Not required, '
  48                          'but if specified, number of uses should be same as that of --urls '
  49                          'since the values are matched.')
  50 parser.add_argument('--max-builds', metavar='n', action='append', type=int,
  51                     help='The maximum number of builds to use (if available on jenkins). Specify '
  52                          '0 to analyze all builds. Not required, but if specified, number of uses '
  53                          'should be same as that of --urls since the values are matched.')
  54 parser.add_argument('--is-yetus', metavar='True/False', action='append', choices=['True', 'False'],
  55                     help='True, if build is yetus style i.e. look for maven output in artifacts; '
  56                          'False, if maven output is in <url>/consoleText itself.')
  57 parser.add_argument(
  58     "--mvn", action="store_true",
  59     help="Writes two strings for including/excluding these flaky tests using maven flags. These "
  60          "strings are written to files so they can be saved as artifacts and easily imported in "
  61          "other projects. Also writes timeout and failing tests in separate files for "
  62          "reference.")
  63 parser.add_argument("-o", "--output", metavar='dir', action='store', required=False,
  64                     help="the output directory")
  65 parser.add_argument("-v", "--verbose", help="Prints more logs.", action="store_true")
  66 args = parser.parse_args()
  67
  68 logging.basicConfig()
  69 logger = logging.getLogger(__name__)
  70 if args.verbose:
  71     logger.setLevel(logging.INFO)
  72
  73 output_dir = '.'
  74 if args.output is not None:
  75     output_dir = args.output
  76     if not os.path.exists(output_dir):
  77       os.makedirs(output_dir)
  78
  79 def get_bad_tests(build_url, is_yetus):
  80     """
  81     Given url of an executed build, analyzes its maven output, and returns
  82     [list of all tests, list of timeout tests, list of failed tests].
  83     Returns None if can't get maven output from the build or if there is any other error.
  84     """
  85     logger.info("Analyzing %s", build_url)
  86     needed_fields="_class,building"
  87     if is_yetus:
  88         needed_fields+=",artifacts[fileName,relativePath]"
  89     response = requests.get(build_url + "/api/json?tree=" + needed_fields).json()
  90     if response["building"]:
  91         logger.info("Skipping this build since it is in progress.")
  92         return {}
  93     console_url = None
  94     if is_yetus:
  95         for artifact in response["artifacts"]:
  96             if artifact["fileName"] == "patch-unit-root.txt":
  97                 console_url = build_url + "/artifact/" + artifact["relativePath"]
  98                 break
  99         if console_url is None:
 100             logger.info("Can't find 'patch-unit-root.txt' artifact for Yetus build %s\n. Ignoring "
 101                         "this build.", build_url)
 102             return
 103     else:
 104         console_url = build_url + "/consoleText"
 105     build_result = findHangingTests.get_bad_tests(console_url)
 106     if not build_result:
 107         logger.info("Ignoring build %s", build_url)
 108         return
 109     return build_result
 110
 111
 112 def expand_multi_config_projects(cli_args):
 113     """
 114     If any url is of type multi-configuration project (i.e. has key 'activeConfigurations'),
 115     get urls for individual jobs.
 116     """
 117     job_urls = cli_args.urls
 118     excluded_builds_arg = cli_args.excluded_builds
 119     max_builds_arg = cli_args.max_builds
 120     is_yetus_arg = cli_args.is_yetus
 121     if excluded_builds_arg is not None and len(excluded_builds_arg) != len(job_urls):
 122         raise Exception("Number of --excluded-builds arguments should be same as that of --urls "
 123                         "since values are matched.")
 124     if max_builds_arg is not None and len(max_builds_arg) != len(job_urls):
 125         raise Exception("Number of --max-builds arguments should be same as that of --urls "
 126                         "since values are matched.")
 127     final_expanded_urls = []
 128     for (i, job_url) in enumerate(job_urls):
 129         max_builds = 10000  # Some high number
 130         is_yetus = False
 131         if is_yetus_arg is not None:
 132             is_yetus = is_yetus_arg[i] == "True"
 133         if max_builds_arg is not None and max_builds_arg[i] != 0:
 134             max_builds = int(max_builds_arg[i])
 135         excluded_builds = []
 136         if excluded_builds_arg is not None and excluded_builds_arg[i] != "None":
 137             excluded_builds = [int(x) for x in excluded_builds_arg[i].split(",")]
 138         request = requests.get(job_url + "/api/json?tree=_class,activeConfigurations%5Burl%5D")
 139         if request.status_code != 200:
 140             raise Exception("Failed to get job information from jenkins for url '" + job_url +
 141                             "'. Jenkins returned HTTP status " + str(request.status_code))
 142         response = request.json()
 143         if response.has_key("activeConfigurations"):
 144             for config in response["activeConfigurations"]:
 145                 final_expanded_urls.append({'url':config["url"], 'max_builds': max_builds,
 146                                             'excludes': excluded_builds, 'is_yetus': is_yetus})
 147         else:
 148             final_expanded_urls.append({'url':job_url, 'max_builds': max_builds,
 149                                         'excludes': excluded_builds, 'is_yetus': is_yetus})
 150     return final_expanded_urls
 151
 152
 153 # Set of timeout/failed tests across all given urls.
 154 all_timeout_tests = set()
 155 all_failed_tests = set()
 156 all_hanging_tests = set()
 157 # Contains { <url> : { <bad_test> : { 'all': [<build ids>], 'failed': [<build ids>],
 158 #                                     'timeout': [<build ids>], 'hanging': [<builds ids>] } } }
 159 url_to_bad_test_results = OrderedDict()
 160 # Contains { <url> : [run_ids] }
 161 # Used for common min/max build ids when generating sparklines.
 162 url_to_build_ids = OrderedDict()
 163
 164 # Iterates over each url, gets test results and prints flaky tests.
 165 expanded_urls = expand_multi_config_projects(args)
 166 for url_max_build in expanded_urls:
 167     url = url_max_build["url"]
 168     excludes = url_max_build["excludes"]
 169     json_response = requests.get(url + "/api/json?tree=id,builds%5Bnumber,url%5D").json()
 170     if json_response.has_key("builds"):
 171         builds = json_response["builds"]
 172         logger.info("Analyzing job: %s", url)
 173     else:
 174         builds = [{'number': json_response["id"], 'url': url}]
 175         logger.info("Analyzing build : %s", url)
 176     build_id_to_results = {}
 177     num_builds = 0
 178     url_to_build_ids[url] = []
 179     build_ids_without_tests_run = []
 180     for build in builds:
 181         build_id = build["number"]
 182         if build_id in excludes:
 183             continue
 184         result = get_bad_tests(build["url"], url_max_build['is_yetus'])
 185         if not result:
 186             continue
 187         if len(result[0]) > 0:
 188             build_id_to_results[build_id] = result
 189         else:
 190             build_ids_without_tests_run.append(build_id)
 191         num_builds += 1
 192         url_to_build_ids[url].append(build_id)
 193         if num_builds == url_max_build["max_builds"]:
 194             break
 195     url_to_build_ids[url].sort()
 196
 197     # Collect list of bad tests.
 198     bad_tests = set()
 199     for build in build_id_to_results:
 200         [_, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
 201         all_timeout_tests.update(timeout_tests)
 202         all_failed_tests.update(failed_tests)
 203         all_hanging_tests.update(hanging_tests)
 204         # Note that timedout tests are already included in failed tests.
 205         bad_tests.update(failed_tests.union(hanging_tests))
 206
 207     # For each bad test, get build ids where it ran, timed out, failed or hanged.
 208     test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(),
 209                                 'hanging' : set(), 'bad_count' : 0}
 210                          for key in bad_tests}
 211     for build in build_id_to_results:
 212         [all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
 213         for bad_test in test_to_build_ids:
 214             is_bad = False
 215             if all_tests.issuperset([bad_test]):
 216                 test_to_build_ids[bad_test]["all"].add(build)
 217             if timeout_tests.issuperset([bad_test]):
 218                 test_to_build_ids[bad_test]['timeout'].add(build)
 219                 is_bad = True
 220             if failed_tests.issuperset([bad_test]):
 221                 test_to_build_ids[bad_test]['failed'].add(build)
 222                 is_bad = True
 223             if hanging_tests.issuperset([bad_test]):
 224                 test_to_build_ids[bad_test]['hanging'].add(build)
 225                 is_bad = True
 226             if is_bad:
 227                 test_to_build_ids[bad_test]['bad_count'] += 1
 228
 229     # Calculate flakyness % and successful builds for each test. Also sort build ids.
 230     for bad_test in test_to_build_ids:
 231         test_result = test_to_build_ids[bad_test]
 232         test_result['flakyness'] = test_result['bad_count'] * 100.0 / len(test_result['all'])
 233         test_result['success'] = (test_result['all'].difference(
 234             test_result['failed'].union(test_result['hanging'])))
 235         for key in ['all', 'timeout', 'failed', 'hanging', 'success']:
 236             test_result[key] = sorted(test_result[key])
 237
 238
 239     # Sort tests in descending order by flakyness.
 240     sorted_test_to_build_ids = OrderedDict(
 241         sorted(test_to_build_ids.iteritems(), key=lambda x: x[1]['flakyness'], reverse=True))
 242     url_to_bad_test_results[url] = sorted_test_to_build_ids
 243
 244     if len(sorted_test_to_build_ids) > 0:
 245         print "URL: {}".format(url)
 246         print "{:>60}  {:10}  {:25}  {}".format(
 247             "Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness")
 248         for bad_test in sorted_test_to_build_ids:
 249             test_status = sorted_test_to_build_ids[bad_test]
 250             print "{:>60}  {:10}  {:7} ( {:4} / {:5} / {:5} )  {:2.0f}%".format(
 251                 bad_test, len(test_status['all']), test_status['bad_count'],
 252                 len(test_status['failed']), len(test_status['timeout']),
 253                 len(test_status['hanging']), test_status['flakyness'])
 254     else:
 255         print "No flaky tests founds."
 256         if len(url_to_build_ids[url]) == len(build_ids_without_tests_run):
 257             print "None of the analyzed builds have test result."
 258
 259     print "Builds analyzed: {}".format(url_to_build_ids[url])
 260     print "Builds without any test runs: {}".format(build_ids_without_tests_run)
 261     print ""
 262
 263
 264 all_bad_tests = all_hanging_tests.union(all_failed_tests)
 265 if args.mvn:
 266     includes = ",".join(all_bad_tests)
 267     with open(output_dir + "/includes", "w") as inc_file:
 268         inc_file.write(includes)
 269
 270     excludes = ["**/{0}.java".format(bad_test) for bad_test in all_bad_tests]
 271     with open(output_dir + "/excludes", "w") as exc_file:
 272         exc_file.write(",".join(excludes))
 273
 274     with open(output_dir + "/timeout", "w") as timeout_file:
 275         timeout_file.write(",".join(all_timeout_tests))
 276
 277     with open(output_dir + "/failed", "w") as failed_file:
 278         failed_file.write(",".join(all_failed_tests))
 279
 280 dev_support_dir = os.path.dirname(os.path.abspath(__file__))
 281 with open(os.path.join(dev_support_dir, "flaky-dashboard-template.html"), "r") as f:
 282     template = Template(f.read())
 283
 284 with open(output_dir + "/dashboard.html", "w") as f:
 285     datetime = time.strftime("%m/%d/%Y %H:%M:%S")
 286     f.write(template.render(datetime=datetime, bad_tests_count=len(all_bad_tests),
 287                             results=url_to_bad_test_results, build_ids=url_to_build_ids))