clang/tools/scan-build-py/lib/libscanbuild/report.py

   1 # -*- coding: utf-8 -*-
   2 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   3 # See https://llvm.org/LICENSE.txt for license information.
   4 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   5 """ This module is responsible to generate 'index.html' for the report.
   6
   7 The input for this step is the output directory, where individual reports
   8 could be found. It parses those reports and generates 'index.html'. """
   9
  10 import re
  11 import os
  12 import os.path
  13 import sys
  14 import shutil
  15 import plistlib
  16 import glob
  17 import json
  18 import logging
  19 import datetime
  20 from libscanbuild import duplicate_check
  21 from libscanbuild.clang import get_version
  22
  23 __all__ = ["document"]
  24
  25
  26 def document(args):
  27     """Generates cover report and returns the number of bugs/crashes."""
  28
  29     html_reports_available = args.output_format in {"html", "plist-html", "sarif-html"}
  30     sarif_reports_available = args.output_format in {"sarif", "sarif-html"}
  31
  32     logging.debug("count crashes and bugs")
  33     crash_count = sum(1 for _ in read_crashes(args.output))
  34     bug_counter = create_counters()
  35     for bug in read_bugs(args.output, html_reports_available):
  36         bug_counter(bug)
  37     result = crash_count + bug_counter.total
  38
  39     if html_reports_available and result:
  40         use_cdb = os.path.exists(args.cdb)
  41
  42         logging.debug("generate index.html file")
  43         # common prefix for source files to have sorter path
  44         prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
  45         # assemble the cover from multiple fragments
  46         fragments = []
  47         try:
  48             if bug_counter.total:
  49                 fragments.append(bug_summary(args.output, bug_counter))
  50                 fragments.append(bug_report(args.output, prefix))
  51             if crash_count:
  52                 fragments.append(crash_report(args.output, prefix))
  53             assemble_cover(args, prefix, fragments)
  54             # copy additional files to the report
  55             copy_resource_files(args.output)
  56             if use_cdb:
  57                 shutil.copy(args.cdb, args.output)
  58         finally:
  59             for fragment in fragments:
  60                 os.remove(fragment)
  61
  62     if sarif_reports_available:
  63         logging.debug("merging sarif files")
  64         merge_sarif_files(args.output)
  65
  66     return result
  67
  68
  69 def assemble_cover(args, prefix, fragments):
  70     """Put together the fragments into a final report."""
  71
  72     import getpass
  73     import socket
  74
  75     if args.html_title is None:
  76         args.html_title = os.path.basename(prefix) + " - analyzer results"
  77
  78     with open(os.path.join(args.output, "index.html"), "w") as handle:
  79         indent = 0
  80         handle.write(
  81             reindent(
  82                 """
  83         |<!DOCTYPE html>
  84         |<html>
  85         |  <head>
  86         |    <title>{html_title}</title>
  87         |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
  88         |    <script type='text/javascript' src="sorttable.js"></script>
  89         |    <script type='text/javascript' src='selectable.js'></script>
  90         |  </head>""",
  91                 indent,
  92             ).format(html_title=args.html_title)
  93         )
  94         handle.write(comment("SUMMARYENDHEAD"))
  95         handle.write(
  96             reindent(
  97                 """
  98         |  <body>
  99         |    <h1>{html_title}</h1>
 100         |    <table>
 101         |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
 102         |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
 103         |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
 104         |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
 105         |      <tr><th>Date:</th><td>{date}</td></tr>
 106         |    </table>""",
 107                 indent,
 108             ).format(
 109                 html_title=args.html_title,
 110                 user_name=getpass.getuser(),
 111                 host_name=socket.gethostname(),
 112                 current_dir=prefix,
 113                 cmd_args=" ".join(sys.argv),
 114                 clang_version=get_version(args.clang),
 115                 date=datetime.datetime.today().strftime("%c"),
 116             )
 117         )
 118         for fragment in fragments:
 119             # copy the content of fragments
 120             with open(fragment, "r") as input_handle:
 121                 shutil.copyfileobj(input_handle, handle)
 122         handle.write(
 123             reindent(
 124                 """
 125         |  </body>
 126         |</html>""",
 127                 indent,
 128             )
 129         )
 130
 131
 132 def bug_summary(output_dir, bug_counter):
 133     """Bug summary is a HTML table to give a better overview of the bugs."""
 134
 135     name = os.path.join(output_dir, "summary.html.fragment")
 136     with open(name, "w") as handle:
 137         indent = 4
 138         handle.write(
 139             reindent(
 140                 """
 141         |<h2>Bug Summary</h2>
 142         |<table>
 143         |  <thead>
 144         |    <tr>
 145         |      <td>Bug Type</td>
 146         |      <td>Quantity</td>
 147         |      <td class="sorttable_nosort">Display?</td>
 148         |    </tr>
 149         |  </thead>
 150         |  <tbody>""",
 151                 indent,
 152             )
 153         )
 154         handle.write(
 155             reindent(
 156                 """
 157         |    <tr style="font-weight:bold">
 158         |      <td class="SUMM_DESC">All Bugs</td>
 159         |      <td class="Q">{0}</td>
 160         |      <td>
 161         |        <center>
 162         |          <input checked type="checkbox" id="AllBugsCheck"
 163         |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
 164         |        </center>
 165         |      </td>
 166         |    </tr>""",
 167                 indent,
 168             ).format(bug_counter.total)
 169         )
 170         for category, types in bug_counter.categories.items():
 171             handle.write(
 172                 reindent(
 173                     """
 174         |    <tr>
 175         |      <th>{0}</th><th colspan=2></th>
 176         |    </tr>""",
 177                     indent,
 178                 ).format(category)
 179             )
 180             for bug_type in types.values():
 181                 handle.write(
 182                     reindent(
 183                         """
 184         |    <tr>
 185         |      <td class="SUMM_DESC">{bug_type}</td>
 186         |      <td class="Q">{bug_count}</td>
 187         |      <td>
 188         |        <center>
 189         |          <input checked type="checkbox"
 190         |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
 191         |        </center>
 192         |      </td>
 193         |    </tr>""",
 194                         indent,
 195                     ).format(**bug_type)
 196                 )
 197         handle.write(
 198             reindent(
 199                 """
 200         |  </tbody>
 201         |</table>""",
 202                 indent,
 203             )
 204         )
 205         handle.write(comment("SUMMARYBUGEND"))
 206     return name
 207
 208
 209 def bug_report(output_dir, prefix):
 210     """Creates a fragment from the analyzer reports."""
 211
 212     pretty = prettify_bug(prefix, output_dir)
 213     bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
 214
 215     name = os.path.join(output_dir, "bugs.html.fragment")
 216     with open(name, "w") as handle:
 217         indent = 4
 218         handle.write(
 219             reindent(
 220                 """
 221         |<h2>Reports</h2>
 222         |<table class="sortable" style="table-layout:automatic">
 223         |  <thead>
 224         |    <tr>
 225         |      <td>Bug Group</td>
 226         |      <td class="sorttable_sorted">
 227         |        Bug Type
 228         |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
 229         |      </td>
 230         |      <td>File</td>
 231         |      <td>Function/Method</td>
 232         |      <td class="Q">Line</td>
 233         |      <td class="Q">Path Length</td>
 234         |      <td class="sorttable_nosort"></td>
 235         |    </tr>
 236         |  </thead>
 237         |  <tbody>""",
 238                 indent,
 239             )
 240         )
 241         handle.write(comment("REPORTBUGCOL"))
 242         for current in bugs:
 243             handle.write(
 244                 reindent(
 245                     """
 246         |    <tr class="{bug_type_class}">
 247         |      <td class="DESC">{bug_category}</td>
 248         |      <td class="DESC">{bug_type}</td>
 249         |      <td>{bug_file}</td>
 250         |      <td class="DESC">{bug_function}</td>
 251         |      <td class="Q">{bug_line}</td>
 252         |      <td class="Q">{bug_path_length}</td>
 253         |      <td><a href="{report_file}#EndPath">View Report</a></td>
 254         |    </tr>""",
 255                     indent,
 256                 ).format(**current)
 257             )
 258             handle.write(comment("REPORTBUG", {"id": current["report_file"]}))
 259         handle.write(
 260             reindent(
 261                 """
 262         |  </tbody>
 263         |</table>""",
 264                 indent,
 265             )
 266         )
 267         handle.write(comment("REPORTBUGEND"))
 268     return name
 269
 270
 271 def crash_report(output_dir, prefix):
 272     """Creates a fragment from the compiler crashes."""
 273
 274     pretty = prettify_crash(prefix, output_dir)
 275     crashes = (pretty(crash) for crash in read_crashes(output_dir))
 276
 277     name = os.path.join(output_dir, "crashes.html.fragment")
 278     with open(name, "w") as handle:
 279         indent = 4
 280         handle.write(
 281             reindent(
 282                 """
 283         |<h2>Analyzer Failures</h2>
 284         |<p>The analyzer had problems processing the following files:</p>
 285         |<table>
 286         |  <thead>
 287         |    <tr>
 288         |      <td>Problem</td>
 289         |      <td>Source File</td>
 290         |      <td>Preprocessed File</td>
 291         |      <td>STDERR Output</td>
 292         |    </tr>
 293         |  </thead>
 294         |  <tbody>""",
 295                 indent,
 296             )
 297         )
 298         for current in crashes:
 299             handle.write(
 300                 reindent(
 301                     """
 302         |    <tr>
 303         |      <td>{problem}</td>
 304         |      <td>{source}</td>
 305         |      <td><a href="{file}">preprocessor output</a></td>
 306         |      <td><a href="{stderr}">analyzer std err</a></td>
 307         |    </tr>""",
 308                     indent,
 309                 ).format(**current)
 310             )
 311             handle.write(comment("REPORTPROBLEM", current))
 312         handle.write(
 313             reindent(
 314                 """
 315         |  </tbody>
 316         |</table>""",
 317                 indent,
 318             )
 319         )
 320         handle.write(comment("REPORTCRASHES"))
 321     return name
 322
 323
 324 def read_crashes(output_dir):
 325     """Generate a unique sequence of crashes from given output directory."""
 326
 327     return (
 328         parse_crash(filename)
 329         for filename in glob.iglob(os.path.join(output_dir, "failures", "*.info.txt"))
 330     )
 331
 332
 333 def read_bugs(output_dir, html):
 334     # type: (str, bool) -> Generator[Dict[str, Any], None, None]
 335     """Generate a unique sequence of bugs from given output directory.
 336
 337     Duplicates can be in a project if the same module was compiled multiple
 338     times with different compiler options. These would be better to show in
 339     the final report (cover) only once."""
 340
 341     def empty(file_name):
 342         return os.stat(file_name).st_size == 0
 343
 344     duplicate = duplicate_check(
 345         lambda bug: "{bug_line}.{bug_path_length}:{bug_file}".format(**bug)
 346     )
 347
 348     # get the right parser for the job.
 349     parser = parse_bug_html if html else parse_bug_plist
 350     # get the input files, which are not empty.
 351     pattern = os.path.join(output_dir, "*.html" if html else "*.plist")
 352     bug_files = (file for file in glob.iglob(pattern) if not empty(file))
 353
 354     for bug_file in bug_files:
 355         for bug in parser(bug_file):
 356             if not duplicate(bug):
 357                 yield bug
 358
 359
 360 def merge_sarif_files(output_dir, sort_files=False):
 361     """Reads and merges all .sarif files in the given output directory.
 362
 363     Each sarif file in the output directory is understood as a single run
 364     and thus appear separate in the top level runs array. This requires
 365     modifying the run index of any embedded links in messages.
 366     """
 367
 368     def empty(file_name):
 369         return os.stat(file_name).st_size == 0
 370
 371     def update_sarif_object(sarif_object, runs_count_offset):
 372         """
 373         Given a SARIF object, checks its dictionary entries for a 'message' property.
 374         If it exists, updates the message index of embedded links in the run index.
 375
 376         Recursively looks through entries in the dictionary.
 377         """
 378         if not isinstance(sarif_object, dict):
 379             return sarif_object
 380
 381         if "message" in sarif_object:
 382             sarif_object["message"] = match_and_update_run(
 383                 sarif_object["message"], runs_count_offset
 384             )
 385
 386         for key in sarif_object:
 387             if isinstance(sarif_object[key], list):
 388                 # iterate through subobjects and update it.
 389                 arr = [
 390                     update_sarif_object(entry, runs_count_offset)
 391                     for entry in sarif_object[key]
 392                 ]
 393                 sarif_object[key] = arr
 394             elif isinstance(sarif_object[key], dict):
 395                 sarif_object[key] = update_sarif_object(
 396                     sarif_object[key], runs_count_offset
 397                 )
 398             else:
 399                 # do nothing
 400                 pass
 401
 402         return sarif_object
 403
 404     def match_and_update_run(message, runs_count_offset):
 405         """
 406         Given a SARIF message object, checks if the text property contains an embedded link and
 407         updates the run index if necessary.
 408         """
 409         if "text" not in message:
 410             return message
 411
 412         # we only merge runs, so we only need to update the run index
 413         pattern = re.compile(r"sarif:/runs/(\d+)")
 414
 415         text = message["text"]
 416         matches = re.finditer(pattern, text)
 417         matches_list = list(matches)
 418
 419         # update matches from right to left to make increasing character length (9->10) smoother
 420         for idx in range(len(matches_list) - 1, -1, -1):
 421             match = matches_list[idx]
 422             new_run_count = str(runs_count_offset + int(match.group(1)))
 423             text = text[0 : match.start(1)] + new_run_count + text[match.end(1) :]
 424
 425         message["text"] = text
 426         return message
 427
 428     sarif_files = (
 429         file
 430         for file in glob.iglob(os.path.join(output_dir, "*.sarif"))
 431         if not empty(file)
 432     )
 433     # exposed for testing since the order of files returned by glob is not guaranteed to be sorted
 434     if sort_files:
 435         sarif_files = list(sarif_files)
 436         sarif_files.sort()
 437
 438     runs_count = 0
 439     merged = {}
 440     for sarif_file in sarif_files:
 441         with open(sarif_file) as fp:
 442             sarif = json.load(fp)
 443             if "runs" not in sarif:
 444                 continue
 445
 446             # start with the first file
 447             if not merged:
 448                 merged = sarif
 449             else:
 450                 # extract the run and append it to the merged output
 451                 for run in sarif["runs"]:
 452                     new_run = update_sarif_object(run, runs_count)
 453                     merged["runs"].append(new_run)
 454
 455             runs_count += len(sarif["runs"])
 456
 457     with open(os.path.join(output_dir, "results-merged.sarif"), "w") as out:
 458         json.dump(merged, out, indent=4, sort_keys=True)
 459
 460
 461 def parse_bug_plist(filename):
 462     """Returns the generator of bugs from a single .plist file."""
 463
 464     with open(filename, "rb") as fp:
 465         content = plistlib.load(fp)
 466         files = content.get("files")
 467         for bug in content.get("diagnostics", []):
 468             if len(files) <= int(bug["location"]["file"]):
 469                 logging.warning('Parsing bug from "%s" failed', filename)
 470                 continue
 471
 472             yield {
 473                 "result": filename,
 474                 "bug_type": bug["type"],
 475                 "bug_category": bug["category"],
 476                 "bug_line": int(bug["location"]["line"]),
 477                 "bug_path_length": int(bug["location"]["col"]),
 478                 "bug_file": files[int(bug["location"]["file"])],
 479             }
 480
 481
 482 def parse_bug_html(filename):
 483     """Parse out the bug information from HTML output."""
 484
 485     patterns = [
 486         re.compile(r"<!-- BUGTYPE (?P<bug_type>.*) -->$"),
 487         re.compile(r"<!-- BUGFILE (?P<bug_file>.*) -->$"),
 488         re.compile(r"<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$"),
 489         re.compile(r"<!-- BUGLINE (?P<bug_line>.*) -->$"),
 490         re.compile(r"<!-- BUGCATEGORY (?P<bug_category>.*) -->$"),
 491         re.compile(r"<!-- BUGDESC (?P<bug_description>.*) -->$"),
 492         re.compile(r"<!-- FUNCTIONNAME (?P<bug_function>.*) -->$"),
 493     ]
 494     endsign = re.compile(r"<!-- BUGMETAEND -->")
 495
 496     bug = {
 497         "report_file": filename,
 498         "bug_function": "n/a",  # compatibility with < clang-3.5
 499         "bug_category": "Other",
 500         "bug_line": 0,
 501         "bug_path_length": 1,
 502     }
 503
 504     with open(filename, encoding="utf-8") as handler:
 505         for line in handler.readlines():
 506             # do not read the file further
 507             if endsign.match(line):
 508                 break
 509             # search for the right lines
 510             for regex in patterns:
 511                 match = regex.match(line.strip())
 512                 if match:
 513                     bug.update(match.groupdict())
 514                     break
 515
 516     encode_value(bug, "bug_line", int)
 517     encode_value(bug, "bug_path_length", int)
 518
 519     yield bug
 520
 521
 522 def parse_crash(filename):
 523     """Parse out the crash information from the report file."""
 524
 525     match = re.match(r"(.*)\.info\.txt", filename)
 526     name = match.group(1) if match else None
 527     with open(filename, mode="rb") as handler:
 528         # this is a workaround to fix windows read '\r\n' as new lines.
 529         lines = [line.decode().rstrip() for line in handler.readlines()]
 530         return {
 531             "source": lines[0],
 532             "problem": lines[1],
 533             "file": name,
 534             "info": name + ".info.txt",
 535             "stderr": name + ".stderr.txt",
 536         }
 537
 538
 539 def category_type_name(bug):
 540     """Create a new bug attribute from bug by category and type.
 541
 542     The result will be used as CSS class selector in the final report."""
 543
 544     def smash(key):
 545         """Make value ready to be HTML attribute value."""
 546
 547         return bug.get(key, "").lower().replace(" ", "_").replace("'", "")
 548
 549     return escape("bt_" + smash("bug_category") + "_" + smash("bug_type"))
 550
 551
 552 def create_counters():
 553     """Create counters for bug statistics.
 554
 555     Two entries are maintained: 'total' is an integer, represents the
 556     number of bugs. The 'categories' is a two level categorisation of bug
 557     counters. The first level is 'bug category' the second is 'bug type'.
 558     Each entry in this classification is a dictionary of 'count', 'type'
 559     and 'label'."""
 560
 561     def predicate(bug):
 562         bug_category = bug["bug_category"]
 563         bug_type = bug["bug_type"]
 564         current_category = predicate.categories.get(bug_category, dict())
 565         current_type = current_category.get(
 566             bug_type,
 567             {
 568                 "bug_type": bug_type,
 569                 "bug_type_class": category_type_name(bug),
 570                 "bug_count": 0,
 571             },
 572         )
 573         current_type.update({"bug_count": current_type["bug_count"] + 1})
 574         current_category.update({bug_type: current_type})
 575         predicate.categories.update({bug_category: current_category})
 576         predicate.total += 1
 577
 578     predicate.total = 0
 579     predicate.categories = dict()
 580     return predicate
 581
 582
 583 def prettify_bug(prefix, output_dir):
 584     def predicate(bug):
 585         """Make safe this values to embed into HTML."""
 586
 587         bug["bug_type_class"] = category_type_name(bug)
 588
 589         encode_value(bug, "bug_file", lambda x: escape(chop(prefix, x)))
 590         encode_value(bug, "bug_category", escape)
 591         encode_value(bug, "bug_type", escape)
 592         encode_value(bug, "report_file", lambda x: escape(chop(output_dir, x)))
 593         return bug
 594
 595     return predicate
 596
 597
 598 def prettify_crash(prefix, output_dir):
 599     def predicate(crash):
 600         """Make safe this values to embed into HTML."""
 601
 602         encode_value(crash, "source", lambda x: escape(chop(prefix, x)))
 603         encode_value(crash, "problem", escape)
 604         encode_value(crash, "file", lambda x: escape(chop(output_dir, x)))
 605         encode_value(crash, "info", lambda x: escape(chop(output_dir, x)))
 606         encode_value(crash, "stderr", lambda x: escape(chop(output_dir, x)))
 607         return crash
 608
 609     return predicate
 610
 611
 612 def copy_resource_files(output_dir):
 613     """Copy the javascript and css files to the report directory."""
 614
 615     this_dir = os.path.dirname(os.path.realpath(__file__))
 616     for resource in os.listdir(os.path.join(this_dir, "resources")):
 617         shutil.copy(os.path.join(this_dir, "resources", resource), output_dir)
 618
 619
 620 def encode_value(container, key, encode):
 621     """Run 'encode' on 'container[key]' value and update it."""
 622
 623     if key in container:
 624         value = encode(container[key])
 625         container.update({key: value})
 626
 627
 628 def chop(prefix, filename):
 629     """Create 'filename' from '/prefix/filename'"""
 630
 631     return filename if not len(prefix) else os.path.relpath(filename, prefix)
 632
 633
 634 def escape(text):
 635     """Paranoid HTML escape method. (Python version independent)"""
 636
 637     escape_table = {
 638         "&": "&amp;",
 639         '"': "&quot;",
 640         "'": "&apos;",
 641         ">": "&gt;",
 642         "<": "&lt;",
 643     }
 644     return "".join(escape_table.get(c, c) for c in text)
 645
 646
 647 def reindent(text, indent):
 648     """Utility function to format html output and keep indentation."""
 649
 650     result = ""
 651     for line in text.splitlines():
 652         if len(line.strip()):
 653             result += " " * indent + line.split("|")[1] + os.linesep
 654     return result
 655
 656
 657 def comment(name, opts=dict()):
 658     """Utility function to format meta information as comment."""
 659
 660     attributes = ""
 661     for key, value in opts.items():
 662         attributes += ' {0}="{1}"'.format(key, value)
 663
 664     return "<!-- {0}{1} -->{2}".format(name, attributes, os.linesep)
 665
 666
 667 def commonprefix_from(filename):
 668     """Create file prefix from a compilation database entries."""
 669
 670     with open(filename, "r") as handle:
 671         return commonprefix(item["file"] for item in json.load(handle))
 672
 673
 674 def commonprefix(files):
 675     """Fixed version of os.path.commonprefix.
 676
 677     :param files: list of file names.
 678     :return: the longest path prefix that is a prefix of all files."""
 679     result = None
 680     for current in files:
 681         if result is not None:
 682             result = os.path.commonprefix([result, current])
 683         else:
 684             result = current
 685
 686     if result is None:
 687         return ""
 688     elif not os.path.isdir(result):
 689         return os.path.dirname(result)
 690     else:
 691         return os.path.abspath(result)