[SandboxVec][BottomUpVec] Fix packing when PHIs are present (#124206)
[llvm-project.git] / clang / tools / scan-build-py / lib / libscanbuild / report.py
blobc745d765aa53821dad20a5be6ad4db202a602798
1 # -*- coding: utf-8 -*-
2 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 # See https://llvm.org/LICENSE.txt for license information.
4 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 """ This module is responsible to generate 'index.html' for the report.
7 The input for this step is the output directory, where individual reports
8 could be found. It parses those reports and generates 'index.html'. """
10 import re
11 import os
12 import os.path
13 import sys
14 import shutil
15 import plistlib
16 import glob
17 import json
18 import logging
19 import datetime
20 from libscanbuild import duplicate_check
21 from libscanbuild.clang import get_version
23 __all__ = ["document"]
26 def document(args):
27 """Generates cover report and returns the number of bugs/crashes."""
29 html_reports_available = args.output_format in {"html", "plist-html", "sarif-html"}
30 sarif_reports_available = args.output_format in {"sarif", "sarif-html"}
32 logging.debug("count crashes and bugs")
33 crash_count = sum(1 for _ in read_crashes(args.output))
34 bug_counter = create_counters()
35 for bug in read_bugs(args.output, html_reports_available):
36 bug_counter(bug)
37 result = crash_count + bug_counter.total
39 if html_reports_available and result:
40 use_cdb = os.path.exists(args.cdb)
42 logging.debug("generate index.html file")
43 # common prefix for source files to have sorter path
44 prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
45 # assemble the cover from multiple fragments
46 fragments = []
47 try:
48 if bug_counter.total:
49 fragments.append(bug_summary(args.output, bug_counter))
50 fragments.append(bug_report(args.output, prefix))
51 if crash_count:
52 fragments.append(crash_report(args.output, prefix))
53 assemble_cover(args, prefix, fragments)
54 # copy additional files to the report
55 copy_resource_files(args.output)
56 if use_cdb:
57 shutil.copy(args.cdb, args.output)
58 finally:
59 for fragment in fragments:
60 os.remove(fragment)
62 if sarif_reports_available:
63 logging.debug("merging sarif files")
64 merge_sarif_files(args.output)
66 return result
69 def assemble_cover(args, prefix, fragments):
70 """Put together the fragments into a final report."""
72 import getpass
73 import socket
75 if args.html_title is None:
76 args.html_title = os.path.basename(prefix) + " - analyzer results"
78 with open(os.path.join(args.output, "index.html"), "w") as handle:
79 indent = 0
80 handle.write(
81 reindent(
82 """
83 |<!DOCTYPE html>
84 |<html>
85 | <head>
86 | <title>{html_title}</title>
87 | <link type="text/css" rel="stylesheet" href="scanview.css"/>
88 | <script type='text/javascript' src="sorttable.js"></script>
89 | <script type='text/javascript' src='selectable.js'></script>
90 | </head>""",
91 indent,
92 ).format(html_title=args.html_title)
94 handle.write(comment("SUMMARYENDHEAD"))
95 handle.write(
96 reindent(
97 """
98 | <body>
99 | <h1>{html_title}</h1>
100 | <table>
101 | <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
102 | <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
103 | <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
104 | <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
105 | <tr><th>Date:</th><td>{date}</td></tr>
106 | </table>""",
107 indent,
108 ).format(
109 html_title=args.html_title,
110 user_name=getpass.getuser(),
111 host_name=socket.gethostname(),
112 current_dir=prefix,
113 cmd_args=" ".join(sys.argv),
114 clang_version=get_version(args.clang),
115 date=datetime.datetime.today().strftime("%c"),
118 for fragment in fragments:
119 # copy the content of fragments
120 with open(fragment, "r") as input_handle:
121 shutil.copyfileobj(input_handle, handle)
122 handle.write(
123 reindent(
125 | </body>
126 |</html>""",
127 indent,
132 def bug_summary(output_dir, bug_counter):
133 """Bug summary is a HTML table to give a better overview of the bugs."""
135 name = os.path.join(output_dir, "summary.html.fragment")
136 with open(name, "w") as handle:
137 indent = 4
138 handle.write(
139 reindent(
141 |<h2>Bug Summary</h2>
142 |<table>
143 | <thead>
144 | <tr>
145 | <td>Bug Type</td>
146 | <td>Quantity</td>
147 | <td class="sorttable_nosort">Display?</td>
148 | </tr>
149 | </thead>
150 | <tbody>""",
151 indent,
154 handle.write(
155 reindent(
157 | <tr style="font-weight:bold">
158 | <td class="SUMM_DESC">All Bugs</td>
159 | <td class="Q">{0}</td>
160 | <td>
161 | <center>
162 | <input checked type="checkbox" id="AllBugsCheck"
163 | onClick="CopyCheckedStateToCheckButtons(this);"/>
164 | </center>
165 | </td>
166 | </tr>""",
167 indent,
168 ).format(bug_counter.total)
170 for category, types in bug_counter.categories.items():
171 handle.write(
172 reindent(
174 | <tr>
175 | <th>{0}</th><th colspan=2></th>
176 | </tr>""",
177 indent,
178 ).format(category)
180 for bug_type in types.values():
181 handle.write(
182 reindent(
184 | <tr>
185 | <td class="SUMM_DESC">{bug_type}</td>
186 | <td class="Q">{bug_count}</td>
187 | <td>
188 | <center>
189 | <input checked type="checkbox"
190 | onClick="ToggleDisplay(this,'{bug_type_class}');"/>
191 | </center>
192 | </td>
193 | </tr>""",
194 indent,
195 ).format(**bug_type)
197 handle.write(
198 reindent(
200 | </tbody>
201 |</table>""",
202 indent,
205 handle.write(comment("SUMMARYBUGEND"))
206 return name
209 def bug_report(output_dir, prefix):
210 """Creates a fragment from the analyzer reports."""
212 pretty = prettify_bug(prefix, output_dir)
213 bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
215 name = os.path.join(output_dir, "bugs.html.fragment")
216 with open(name, "w") as handle:
217 indent = 4
218 handle.write(
219 reindent(
221 |<h2>Reports</h2>
222 |<table class="sortable" style="table-layout:automatic">
223 | <thead>
224 | <tr>
225 | <td>Bug Group</td>
226 | <td class="sorttable_sorted">
227 | Bug Type
228 | <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
229 | </td>
230 | <td>File</td>
231 | <td>Function/Method</td>
232 | <td class="Q">Line</td>
233 | <td class="Q">Path Length</td>
234 | <td class="sorttable_nosort"></td>
235 | </tr>
236 | </thead>
237 | <tbody>""",
238 indent,
241 handle.write(comment("REPORTBUGCOL"))
242 for current in bugs:
243 handle.write(
244 reindent(
246 | <tr class="{bug_type_class}">
247 | <td class="DESC">{bug_category}</td>
248 | <td class="DESC">{bug_type}</td>
249 | <td>{bug_file}</td>
250 | <td class="DESC">{bug_function}</td>
251 | <td class="Q">{bug_line}</td>
252 | <td class="Q">{bug_path_length}</td>
253 | <td><a href="{report_file}#EndPath">View Report</a></td>
254 | </tr>""",
255 indent,
256 ).format(**current)
258 handle.write(comment("REPORTBUG", {"id": current["report_file"]}))
259 handle.write(
260 reindent(
262 | </tbody>
263 |</table>""",
264 indent,
267 handle.write(comment("REPORTBUGEND"))
268 return name
271 def crash_report(output_dir, prefix):
272 """Creates a fragment from the compiler crashes."""
274 pretty = prettify_crash(prefix, output_dir)
275 crashes = (pretty(crash) for crash in read_crashes(output_dir))
277 name = os.path.join(output_dir, "crashes.html.fragment")
278 with open(name, "w") as handle:
279 indent = 4
280 handle.write(
281 reindent(
283 |<h2>Analyzer Failures</h2>
284 |<p>The analyzer had problems processing the following files:</p>
285 |<table>
286 | <thead>
287 | <tr>
288 | <td>Problem</td>
289 | <td>Source File</td>
290 | <td>Preprocessed File</td>
291 | <td>STDERR Output</td>
292 | </tr>
293 | </thead>
294 | <tbody>""",
295 indent,
298 for current in crashes:
299 handle.write(
300 reindent(
302 | <tr>
303 | <td>{problem}</td>
304 | <td>{source}</td>
305 | <td><a href="{file}">preprocessor output</a></td>
306 | <td><a href="{stderr}">analyzer std err</a></td>
307 | </tr>""",
308 indent,
309 ).format(**current)
311 handle.write(comment("REPORTPROBLEM", current))
312 handle.write(
313 reindent(
315 | </tbody>
316 |</table>""",
317 indent,
320 handle.write(comment("REPORTCRASHES"))
321 return name
324 def read_crashes(output_dir):
325 """Generate a unique sequence of crashes from given output directory."""
327 return (
328 parse_crash(filename)
329 for filename in glob.iglob(os.path.join(output_dir, "failures", "*.info.txt"))
333 def read_bugs(output_dir, html):
334 # type: (str, bool) -> Generator[Dict[str, Any], None, None]
335 """Generate a unique sequence of bugs from given output directory.
337 Duplicates can be in a project if the same module was compiled multiple
338 times with different compiler options. These would be better to show in
339 the final report (cover) only once."""
341 def empty(file_name):
342 return os.stat(file_name).st_size == 0
344 duplicate = duplicate_check(
345 lambda bug: "{bug_line}.{bug_path_length}:{bug_file}".format(**bug)
348 # get the right parser for the job.
349 parser = parse_bug_html if html else parse_bug_plist
350 # get the input files, which are not empty.
351 pattern = os.path.join(output_dir, "*.html" if html else "*.plist")
352 bug_files = (file for file in glob.iglob(pattern) if not empty(file))
354 for bug_file in bug_files:
355 for bug in parser(bug_file):
356 if not duplicate(bug):
357 yield bug
360 def merge_sarif_files(output_dir, sort_files=False):
361 """Reads and merges all .sarif files in the given output directory.
363 Each sarif file in the output directory is understood as a single run
364 and thus appear separate in the top level runs array. This requires
365 modifying the run index of any embedded links in messages.
368 def empty(file_name):
369 return os.stat(file_name).st_size == 0
371 def update_sarif_object(sarif_object, runs_count_offset):
373 Given a SARIF object, checks its dictionary entries for a 'message' property.
374 If it exists, updates the message index of embedded links in the run index.
376 Recursively looks through entries in the dictionary.
378 if not isinstance(sarif_object, dict):
379 return sarif_object
381 if "message" in sarif_object:
382 sarif_object["message"] = match_and_update_run(
383 sarif_object["message"], runs_count_offset
386 for key in sarif_object:
387 if isinstance(sarif_object[key], list):
388 # iterate through subobjects and update it.
389 arr = [
390 update_sarif_object(entry, runs_count_offset)
391 for entry in sarif_object[key]
393 sarif_object[key] = arr
394 elif isinstance(sarif_object[key], dict):
395 sarif_object[key] = update_sarif_object(
396 sarif_object[key], runs_count_offset
398 else:
399 # do nothing
400 pass
402 return sarif_object
404 def match_and_update_run(message, runs_count_offset):
406 Given a SARIF message object, checks if the text property contains an embedded link and
407 updates the run index if necessary.
409 if "text" not in message:
410 return message
412 # we only merge runs, so we only need to update the run index
413 pattern = re.compile(r"sarif:/runs/(\d+)")
415 text = message["text"]
416 matches = re.finditer(pattern, text)
417 matches_list = list(matches)
419 # update matches from right to left to make increasing character length (9->10) smoother
420 for idx in range(len(matches_list) - 1, -1, -1):
421 match = matches_list[idx]
422 new_run_count = str(runs_count_offset + int(match.group(1)))
423 text = text[0 : match.start(1)] + new_run_count + text[match.end(1) :]
425 message["text"] = text
426 return message
428 sarif_files = (
429 file
430 for file in glob.iglob(os.path.join(output_dir, "*.sarif"))
431 if not empty(file)
433 # exposed for testing since the order of files returned by glob is not guaranteed to be sorted
434 if sort_files:
435 sarif_files = list(sarif_files)
436 sarif_files.sort()
438 runs_count = 0
439 merged = {}
440 for sarif_file in sarif_files:
441 with open(sarif_file) as fp:
442 sarif = json.load(fp)
443 if "runs" not in sarif:
444 continue
446 # start with the first file
447 if not merged:
448 merged = sarif
449 else:
450 # extract the run and append it to the merged output
451 for run in sarif["runs"]:
452 new_run = update_sarif_object(run, runs_count)
453 merged["runs"].append(new_run)
455 runs_count += len(sarif["runs"])
457 with open(os.path.join(output_dir, "results-merged.sarif"), "w") as out:
458 json.dump(merged, out, indent=4, sort_keys=True)
461 def parse_bug_plist(filename):
462 """Returns the generator of bugs from a single .plist file."""
464 with open(filename, "rb") as fp:
465 content = plistlib.load(fp)
466 files = content.get("files")
467 for bug in content.get("diagnostics", []):
468 if len(files) <= int(bug["location"]["file"]):
469 logging.warning('Parsing bug from "%s" failed', filename)
470 continue
472 yield {
473 "result": filename,
474 "bug_type": bug["type"],
475 "bug_category": bug["category"],
476 "bug_line": int(bug["location"]["line"]),
477 "bug_path_length": int(bug["location"]["col"]),
478 "bug_file": files[int(bug["location"]["file"])],
482 def parse_bug_html(filename):
483 """Parse out the bug information from HTML output."""
485 patterns = [
486 re.compile(r"<!-- BUGTYPE (?P<bug_type>.*) -->$"),
487 re.compile(r"<!-- BUGFILE (?P<bug_file>.*) -->$"),
488 re.compile(r"<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$"),
489 re.compile(r"<!-- BUGLINE (?P<bug_line>.*) -->$"),
490 re.compile(r"<!-- BUGCATEGORY (?P<bug_category>.*) -->$"),
491 re.compile(r"<!-- BUGDESC (?P<bug_description>.*) -->$"),
492 re.compile(r"<!-- FUNCTIONNAME (?P<bug_function>.*) -->$"),
494 endsign = re.compile(r"<!-- BUGMETAEND -->")
496 bug = {
497 "report_file": filename,
498 "bug_function": "n/a", # compatibility with < clang-3.5
499 "bug_category": "Other",
500 "bug_line": 0,
501 "bug_path_length": 1,
504 with open(filename, encoding="utf-8") as handler:
505 for line in handler.readlines():
506 # do not read the file further
507 if endsign.match(line):
508 break
509 # search for the right lines
510 for regex in patterns:
511 match = regex.match(line.strip())
512 if match:
513 bug.update(match.groupdict())
514 break
516 encode_value(bug, "bug_line", int)
517 encode_value(bug, "bug_path_length", int)
519 yield bug
522 def parse_crash(filename):
523 """Parse out the crash information from the report file."""
525 match = re.match(r"(.*)\.info\.txt", filename)
526 name = match.group(1) if match else None
527 with open(filename, mode="rb") as handler:
528 # this is a workaround to fix windows read '\r\n' as new lines.
529 lines = [line.decode().rstrip() for line in handler.readlines()]
530 return {
531 "source": lines[0],
532 "problem": lines[1],
533 "file": name,
534 "info": name + ".info.txt",
535 "stderr": name + ".stderr.txt",
539 def category_type_name(bug):
540 """Create a new bug attribute from bug by category and type.
542 The result will be used as CSS class selector in the final report."""
544 def smash(key):
545 """Make value ready to be HTML attribute value."""
547 return bug.get(key, "").lower().replace(" ", "_").replace("'", "")
549 return escape("bt_" + smash("bug_category") + "_" + smash("bug_type"))
552 def create_counters():
553 """Create counters for bug statistics.
555 Two entries are maintained: 'total' is an integer, represents the
556 number of bugs. The 'categories' is a two level categorisation of bug
557 counters. The first level is 'bug category' the second is 'bug type'.
558 Each entry in this classification is a dictionary of 'count', 'type'
559 and 'label'."""
561 def predicate(bug):
562 bug_category = bug["bug_category"]
563 bug_type = bug["bug_type"]
564 current_category = predicate.categories.get(bug_category, dict())
565 current_type = current_category.get(
566 bug_type,
568 "bug_type": bug_type,
569 "bug_type_class": category_type_name(bug),
570 "bug_count": 0,
573 current_type.update({"bug_count": current_type["bug_count"] + 1})
574 current_category.update({bug_type: current_type})
575 predicate.categories.update({bug_category: current_category})
576 predicate.total += 1
578 predicate.total = 0
579 predicate.categories = dict()
580 return predicate
583 def prettify_bug(prefix, output_dir):
584 def predicate(bug):
585 """Make safe this values to embed into HTML."""
587 bug["bug_type_class"] = category_type_name(bug)
589 encode_value(bug, "bug_file", lambda x: escape(chop(prefix, x)))
590 encode_value(bug, "bug_category", escape)
591 encode_value(bug, "bug_type", escape)
592 encode_value(bug, "report_file", lambda x: escape(chop(output_dir, x)))
593 return bug
595 return predicate
598 def prettify_crash(prefix, output_dir):
599 def predicate(crash):
600 """Make safe this values to embed into HTML."""
602 encode_value(crash, "source", lambda x: escape(chop(prefix, x)))
603 encode_value(crash, "problem", escape)
604 encode_value(crash, "file", lambda x: escape(chop(output_dir, x)))
605 encode_value(crash, "info", lambda x: escape(chop(output_dir, x)))
606 encode_value(crash, "stderr", lambda x: escape(chop(output_dir, x)))
607 return crash
609 return predicate
612 def copy_resource_files(output_dir):
613 """Copy the javascript and css files to the report directory."""
615 this_dir = os.path.dirname(os.path.realpath(__file__))
616 for resource in os.listdir(os.path.join(this_dir, "resources")):
617 shutil.copy(os.path.join(this_dir, "resources", resource), output_dir)
620 def encode_value(container, key, encode):
621 """Run 'encode' on 'container[key]' value and update it."""
623 if key in container:
624 value = encode(container[key])
625 container.update({key: value})
628 def chop(prefix, filename):
629 """Create 'filename' from '/prefix/filename'"""
631 return filename if not len(prefix) else os.path.relpath(filename, prefix)
634 def escape(text):
635 """Paranoid HTML escape method. (Python version independent)"""
637 escape_table = {
638 "&": "&amp;",
639 '"': "&quot;",
640 "'": "&apos;",
641 ">": "&gt;",
642 "<": "&lt;",
644 return "".join(escape_table.get(c, c) for c in text)
647 def reindent(text, indent):
648 """Utility function to format html output and keep indentation."""
650 result = ""
651 for line in text.splitlines():
652 if len(line.strip()):
653 result += " " * indent + line.split("|")[1] + os.linesep
654 return result
657 def comment(name, opts=dict()):
658 """Utility function to format meta information as comment."""
660 attributes = ""
661 for key, value in opts.items():
662 attributes += ' {0}="{1}"'.format(key, value)
664 return "<!-- {0}{1} -->{2}".format(name, attributes, os.linesep)
667 def commonprefix_from(filename):
668 """Create file prefix from a compilation database entries."""
670 with open(filename, "r") as handle:
671 return commonprefix(item["file"] for item in json.load(handle))
674 def commonprefix(files):
675 """Fixed version of os.path.commonprefix.
677 :param files: list of file names.
678 :return: the longest path prefix that is a prefix of all files."""
679 result = None
680 for current in files:
681 if result is not None:
682 result = os.path.commonprefix([result, current])
683 else:
684 result = current
686 if result is None:
687 return ""
688 elif not os.path.isdir(result):
689 return os.path.dirname(result)
690 else:
691 return os.path.abspath(result)