tools/lint/perfdocs/verifier.py

   1 # This Source Code Form is subject to the terms of the Mozilla Public
   2 # License, v. 2.0. If a copy of the MPL was not distributed with this
   3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
   4 import os
   5 import pathlib
   6 import re
   7
   8 import jsonschema
   9
  10 from perfdocs.gatherer import Gatherer
  11 from perfdocs.logger import PerfDocLogger
  12 from perfdocs.utils import read_file, read_yaml
  13
  14 logger = PerfDocLogger()
  15
  16 """
  17 Schema for the config.yml file.
  18 Expecting a YAML file with a format such as this:
  19
  20 name: raptor
  21 manifest: testing/raptor/raptor/raptor.toml
  22 static-only: False
  23 suites:
  24     desktop:
  25         description: "Desktop tests."
  26         tests:
  27             raptor-tp6: "Raptor TP6 tests."
  28     mobile:
  29         description: "Mobile tests"
  30     benchmarks:
  31         description: "Benchmark tests."
  32         tests:
  33             wasm: "All wasm tests."
  34
  35 """
  36 CONFIG_SCHEMA = {
  37     "definitions": {
  38         "metrics_schema": {
  39             "metric_name": {
  40                 "type": "object",
  41                 "properties": {
  42                     "aliases": {"type": "array", "items": {"type": "string"}},
  43                     "description": {"type": "string"},
  44                     "matcher": {"type": "string"},
  45                 },
  46                 "required": ["description", "aliases"],
  47             },
  48         },
  49     },
  50     "type": "object",
  51     "properties": {
  52         "name": {"type": "string"},
  53         "manifest": {"type": "string"},
  54         "static-only": {"type": "boolean"},
  55         "metrics": {"$ref": "#/definitions/metrics_schema"},
  56         "suites": {
  57             "type": "object",
  58             "properties": {
  59                 "suite_name": {
  60                     "type": "object",
  61                     "properties": {
  62                         "tests": {
  63                             "type": "object",
  64                             "properties": {
  65                                 "test_name": {"type": "string"},
  66                             },
  67                         },
  68                         "description": {"type": "string"},
  69                         "owner": {"type": "string"},
  70                     },
  71                     "required": ["description"],
  72                 }
  73             },
  74         },
  75     },
  76     "required": ["name", "manifest", "static-only", "suites"],
  77 }
  78
  79
  80 class Verifier(object):
  81     """
  82     Verifier is used for validating the perfdocs folders/tree. In the future,
  83     the generator will make use of this class to obtain a validated set of
  84     descriptions that can be used to build up a document.
  85     """
  86
  87     def __init__(self, workspace_dir, taskgraph=None):
  88         """
  89         Initialize the Verifier.
  90
  91         :param str workspace_dir: Path to the top-level checkout directory.
  92         """
  93         self.workspace_dir = workspace_dir
  94         self.metrics_info = {}
  95         self._gatherer = Gatherer(workspace_dir, taskgraph)
  96         self._compiled_matchers = {}
  97
  98     def _is_yaml_test_match(
  99         self, target_test_name, test_name, suite="", global_descriptions={}
 100     ):
 101         """Determine if a target name (from a YAML) matches with a test."""
 102         tb = os.path.basename(target_test_name)
 103         tb = re.sub(r"\..*", "", tb)
 104         if test_name == tb:
 105             # Found an exact match for the test_name
 106             return True
 107         if test_name in tb:
 108             # Found a 'fuzzy' match for the test_name
 109             # i.e. 'wasm' could exist for all raptor wasm tests
 110             global_descriptions.setdefault(suite, []).append(test_name)
 111             return True
 112
 113     def _validate_desc_yaml_direction(
 114         self, suite, framework_info, yaml_content, global_descriptions
 115     ):
 116         """Validate the descriptions in the YAML.
 117
 118         This validation ensures that all tests defined in the YAML exist in the test
 119         harness. Failures here suggest that there's a typo in the YAML or that
 120         a test was removed.
 121         """
 122         ytests = yaml_content["suites"][suite]
 123         global_descriptions[suite] = []
 124         if not ytests.get("tests"):
 125             # It's possible a suite entry has no tests
 126             return True
 127
 128         # Suite found - now check if any tests in YAML
 129         # definitions don't exist
 130         ytests = ytests["tests"]
 131         for test_name in ytests:
 132             foundtest = False
 133             for t in framework_info["test_list"][suite]:
 134                 if self._is_yaml_test_match(
 135                     t, test_name, suite=suite, global_descriptions=global_descriptions
 136                 ):
 137                     foundtest = True
 138                     break
 139             if not foundtest:
 140                 logger.warning(
 141                     "Could not find an existing test for {} - bad test name?".format(
 142                         test_name
 143                     ),
 144                     framework_info["yml_path"],
 145                 )
 146                 return False
 147
 148     def _validate_desc_harness_direction(
 149         self, suite, test_list, yaml_content, global_descriptions
 150     ):
 151         """Validate that the tests have a description in the YAML.
 152
 153         This stage of validation ensures that all the tests have some
 154         form of description, or that global descriptions are available.
 155         Failures here suggest a new test was added, or the config.yml
 156         file was changed.
 157         """
 158         # If only a description is provided for the suite, assume
 159         # that this is a suite-wide description and don't check for
 160         # it's tests
 161         stests = yaml_content["suites"][suite].get("tests", None)
 162         if not stests:
 163             return
 164
 165         tests_found = 0
 166         missing_tests = []
 167         test_to_manifest = {}
 168         for test_name, test_info in test_list.items():
 169             manifest_path = test_info.get("path", test_info.get("manifest", ""))
 170             tb = os.path.basename(manifest_path)
 171             tb = re.sub(r"\..*", "", tb)
 172             if (
 173                 stests.get(tb, None) is not None
 174                 or stests.get(test_name, None) is not None
 175             ):
 176                 # Test description exists, continue with the next test
 177                 tests_found += 1
 178                 continue
 179             test_to_manifest[test_name] = manifest_path
 180             missing_tests.append(test_name)
 181
 182         # Check if global test descriptions exist (i.e.
 183         # ones that cover all of tp6) for the missing tests
 184         new_mtests = []
 185         for mt in missing_tests:
 186             found = False
 187             for test_name in global_descriptions[suite]:
 188                 # Global test exists for this missing test
 189                 if mt.startswith(test_name):
 190                     found = True
 191                     break
 192                 if test_name in mt:
 193                     found = True
 194                     break
 195             if not found:
 196                 new_mtests.append(mt)
 197
 198         if len(new_mtests):
 199             # Output an error for each manifest with a missing
 200             # test description
 201             for test_name in new_mtests:
 202                 logger.warning(
 203                     "Could not find a test description for {}".format(test_name),
 204                     test_to_manifest[test_name],
 205                 )
 206
 207     def _match_metrics(self, target_metric_name, target_metric_info, measured_metrics):
 208         """Find all metrics that match the given information.
 209
 210         It either checks for the metric through a direct equality check, and if
 211         a regex matcher was provided, we will use that afterwards.
 212         """
 213         verified_metrics = []
 214
 215         metric_names = target_metric_info["aliases"] + [target_metric_name]
 216         for measured_metric in measured_metrics:
 217             if measured_metric in metric_names:
 218                 verified_metrics.append(measured_metric)
 219
 220         if target_metric_info.get("matcher", ""):
 221             # Compile the regex separately to capture issues in the regex
 222             # compilation
 223             matcher = self._compiled_matchers.get(target_metric_name, None)
 224             if not matcher:
 225                 matcher = re.compile(target_metric_info.get("matcher"))
 226                 self._compiled_matchers[target_metric_name] = matcher
 227
 228             # Search the measured metrics
 229             for measured_metric in measured_metrics:
 230                 if matcher.search(measured_metric):
 231                     verified_metrics.append(measured_metric)
 232
 233         return verified_metrics
 234
 235     def _validate_metrics_yaml_direction(
 236         self, suite, framework_info, yaml_content, global_metrics
 237     ):
 238         """Validate the metric descriptions in the YAML.
 239
 240         This direction (`yaml_direction`) checks that the YAML definitions exist in
 241         the test harness as real metrics. Failures here suggest that a metric
 242         changed name, is missing an alias, is misnamed, duplicated, or was removed.
 243         """
 244         for global_metric_name, global_metric_info in global_metrics["global"].items():
 245             for test, test_info in framework_info["test_list"][suite].items():
 246                 verified_metrics = self._match_metrics(
 247                     global_metric_name, global_metric_info, test_info.get("metrics", [])
 248                 )
 249                 if len(verified_metrics) == 0:
 250                     continue
 251
 252                 if global_metric_info.get("verified", False):
 253                     # We already verified this global metric, but add any
 254                     # extra verified metrics here
 255                     global_metrics["verified"].extend(verified_metrics)
 256                 else:
 257                     global_metric_info["verified"] = True
 258                     global_metrics["yaml-verified"].extend(
 259                         [global_metric_name] + global_metric_info["aliases"]
 260                     )
 261                     global_metrics["verified"].extend(
 262                         [global_metric_name]
 263                         + global_metric_info["aliases"]
 264                         + verified_metrics
 265                     )
 266
 267                 global_metric_info.setdefault("location", {}).setdefault(
 268                     suite, []
 269                 ).append(test)
 270
 271     def _validate_metrics_harness_direction(
 272         self, suite, test_list, yaml_content, global_metrics
 273     ):
 274         """Validate that metrics in the harness are documented."""
 275         # Gather all the metrics being measured
 276         all_measured_metrics = {}
 277         for test_name, test_info in test_list.items():
 278             metrics = test_info.get("metrics", [])
 279             for metric in metrics:
 280                 all_measured_metrics.setdefault(metric, []).append(test_name)
 281
 282         if len(all_measured_metrics) == 0:
 283             # There are no metrics measured by this suite
 284             return
 285
 286         for metric, tests in all_measured_metrics.items():
 287             if metric not in global_metrics["verified"]:
 288                 # Log a warning in all files that have this metric
 289                 for test in tests:
 290                     logger.warning(
 291                         "Missing description for the metric `{}` in test `{}`".format(
 292                             metric, test
 293                         ),
 294                         test_list[test].get(
 295                             "path", test_list[test].get("manifest", "")
 296                         ),
 297                     )
 298
 299     def validate_descriptions(self, framework_info):
 300         """
 301         Cross-validate the tests found in the manifests and the YAML
 302         test definitions. This function doesn't return a valid flag. Instead,
 303         the StructDocLogger.VALIDATION_LOG is used to determine validity.
 304
 305         The validation proceeds as follows:
 306             1. Check that all tests/suites in the YAML exist in the manifests.
 307                 - At the same time, build a list of global descriptions which
 308                    define descriptions for groupings of tests.
 309             2. Check that all tests/suites found in the manifests exist in the YAML.
 310                 - For missing tests, check if a global description for them exists.
 311
 312         As the validation is completed, errors are output into the validation log
 313         for any issues that are found.
 314
 315         The same is done for the metrics field expect it also has regex matching,
 316         and the definitions cannot be duplicated in a single harness. We make use
 317         of two `*verified` fields to simplify the two stages/directions, and checking
 318         for any duplication.
 319
 320         :param dict framework_info: Contains information about the framework. See
 321             `Gatherer.get_test_list` for information about its structure.
 322         """
 323         yaml_content = framework_info["yml_content"]
 324
 325         # Check for any bad test/suite names in the yaml config file
 326         # TODO: Combine global settings into a single dictionary
 327         global_descriptions = {}
 328         global_metrics = {
 329             "global": yaml_content.get("metrics", {}),
 330             "verified": [],
 331             "yaml-verified": [],
 332         }
 333         for suite, ytests in yaml_content["suites"].items():
 334             # Find the suite, then check against the tests within it
 335             if framework_info["test_list"].get(suite, None) is None:
 336                 logger.warning(
 337                     "Could not find an existing suite for {} - bad suite name?".format(
 338                         suite
 339                     ),
 340                     framework_info["yml_path"],
 341                 )
 342                 continue
 343
 344             # Validate descriptions
 345             self._validate_desc_yaml_direction(
 346                 suite, framework_info, yaml_content, global_descriptions
 347             )
 348
 349             # Validate metrics
 350             self._validate_metrics_yaml_direction(
 351                 suite, framework_info, yaml_content, global_metrics
 352             )
 353
 354         # The suite and test levels were properly checked, but we can only
 355         # check the global level after all suites were checked. If the metric
 356         # isn't in the verified
 357         for global_metric_name, _ in global_metrics["global"].items():
 358             if global_metric_name not in global_metrics["verified"]:
 359                 logger.warning(
 360                     (
 361                         "Cannot find documented metric `{}` "
 362                         "being used in the specified harness `{}`."
 363                     ).format(global_metric_name, yaml_content["name"]),
 364                     framework_info["yml_path"],
 365                 )
 366
 367         # Check for duplicate metrics/aliases in the verified metrics
 368         unique_metrics = set()
 369         warned = set()
 370         for metric in global_metrics["yaml-verified"]:
 371             if (
 372                 metric in unique_metrics or unique_metrics.add(metric)
 373             ) and metric not in warned:
 374                 logger.warning(
 375                     "Duplicate definitions found for `{}`.".format(metric),
 376                     framework_info["yml_path"],
 377                 )
 378                 warned.add(metric)
 379
 380         # Check for duplicate metrics in the global level
 381         unique_metrics = set()
 382         warned = set()
 383         for metric, metric_info in global_metrics["global"].items():
 384             if (
 385                 metric in unique_metrics or unique_metrics.add(metric)
 386             ) and metric not in warned:
 387                 logger.warning(
 388                     "Duplicate definitions found for `{}`.".format(metric),
 389                     framework_info["yml_path"],
 390                 )
 391                 for alias in metric_info.get("aliases", []):
 392                     unique_metrics.add(alias)
 393                     warned.add(alias)
 394                 warned.add(metric)
 395
 396         # Check for any missing tests/suites
 397         for suite, test_list in framework_info["test_list"].items():
 398             if not yaml_content["suites"].get(suite):
 399                 # Description doesn't exist for the suite
 400                 logger.warning(
 401                     "Missing suite description for {}".format(suite),
 402                     [t.get("path") for _, t in test_list.items()],
 403                     False,
 404                 )
 405                 continue
 406
 407             self._validate_desc_harness_direction(
 408                 suite, test_list, yaml_content, global_descriptions
 409             )
 410
 411             self._validate_metrics_harness_direction(
 412                 suite, test_list, yaml_content, global_metrics
 413             )
 414
 415         self.metrics_info[framework_info["name"]] = global_metrics["global"]
 416
 417     def validate_yaml(self, yaml_path):
 418         """
 419         Validate that the YAML file has all the fields that are
 420         required and parse the descriptions into strings in case
 421         some are give as relative file paths.
 422
 423         :param str yaml_path: Path to the YAML to validate.
 424         :return bool: True/False => Passed/Failed Validation
 425         """
 426
 427         def _get_description(desc):
 428             """
 429             Recompute the description in case it's a file.
 430             """
 431             desc_path = pathlib.Path(self.workspace_dir, desc)
 432
 433             try:
 434                 if desc_path.exists() and desc_path.is_file():
 435                     with open(desc_path, "r") as f:
 436                         desc = f.readlines()
 437             except OSError:
 438                 pass
 439
 440             return desc
 441
 442         def _parse_descriptions(content):
 443             for suite, sinfo in content.items():
 444                 desc = sinfo["description"]
 445                 sinfo["description"] = _get_description(desc)
 446
 447                 # It's possible that the suite has no tests and
 448                 # only a description. If they exist, then parse them.
 449                 if "tests" in sinfo:
 450                     for test, desc in sinfo["tests"].items():
 451                         sinfo["tests"][test] = _get_description(desc)
 452
 453         valid = False
 454         yaml_content = read_yaml(yaml_path)
 455
 456         try:
 457             jsonschema.validate(instance=yaml_content, schema=CONFIG_SCHEMA)
 458             _parse_descriptions(yaml_content["suites"])
 459             valid = True
 460         except Exception as e:
 461             logger.warning("YAML ValidationError: {}".format(str(e)), yaml_path)
 462
 463         return valid
 464
 465     def validate_rst_content(self, rst_path, expected_str):
 466         """
 467         Validate that a given RST has the expected string in it
 468         so that the generated documentation can be inserted there.
 469
 470         :param str rst_path: Path to the RST file.
 471         :return bool: True/False => Passed/Failed Validation
 472         """
 473         rst_content = read_file(rst_path)
 474
 475         # Check for a {documentation} entry in some line,
 476         # if we can't find one, then the validation fails.
 477         valid = False
 478         docs_match = re.compile(f".*{expected_str}.*")
 479         for line in rst_content:
 480             if docs_match.search(line):
 481                 valid = True
 482                 break
 483         if not valid:
 484             logger.warning(  # noqa: PLE1205
 485                 f"Cannot find a '{expected_str}' entry in the given index file",
 486                 rst_path,
 487             )
 488
 489         return valid
 490
 491     def _check_framework_descriptions(self, item):
 492         """
 493         Helper method for validating descriptions
 494         """
 495         framework_info = self._gatherer.get_test_list(item)
 496         self.validate_descriptions(framework_info)
 497
 498     def validate_tree(self):
 499         """
 500         Validate the `perfdocs` directory that was found.
 501         Returns True if it is good, false otherwise.
 502
 503         :return bool: True/False => Passed/Failed Validation
 504         """
 505         found_good = 0
 506
 507         # For each framework, check their files and validate descriptions
 508         for matched in self._gatherer.perfdocs_tree:
 509             # Get the paths to the YAML and RST for this framework
 510             matched_yml = pathlib.Path(matched["path"], matched["yml"])
 511             matched_rst = pathlib.Path(matched["path"], matched["rst"])
 512
 513             _valid_files = {
 514                 "yml": self.validate_yaml(matched_yml),
 515                 "rst": True,
 516                 "metrics": True,
 517             }
 518             if not read_yaml(matched_yml)["static-only"]:
 519                 _valid_files["rst"] = self.validate_rst_content(
 520                     matched_rst, "{documentation}"
 521                 )
 522
 523                 if matched.get("metrics"):
 524                     _valid_files["metrics"] = self.validate_rst_content(
 525                         pathlib.Path(matched["path"], matched["metrics"]),
 526                         "{metrics_documentation}",
 527                     )
 528
 529             # Log independently the errors found for the matched files
 530             for file_format, valid in _valid_files.items():
 531                 if not valid:
 532                     logger.log("File validation error: {}".format(file_format))
 533             if not all(_valid_files.values()):
 534                 continue
 535             found_good += 1
 536
 537             self._check_framework_descriptions(matched)
 538
 539         if not found_good:
 540             raise Exception("No valid perfdocs directories found")