1 # -*- coding: utf-8 -*-
2 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 # See https://llvm.org/LICENSE.txt for license information.
4 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 """ This module is responsible to generate 'index.html' for the report.
7 The input for this step is the output directory, where individual reports
8 could be found. It parses those reports and generates 'index.html'. """
20 from libscanbuild
import duplicate_check
21 from libscanbuild
.clang
import get_version
23 __all__
= ["document"]
27 """Generates cover report and returns the number of bugs/crashes."""
29 html_reports_available
= args
.output_format
in {"html", "plist-html", "sarif-html"}
30 sarif_reports_available
= args
.output_format
in {"sarif", "sarif-html"}
32 logging
.debug("count crashes and bugs")
33 crash_count
= sum(1 for _
in read_crashes(args
.output
))
34 bug_counter
= create_counters()
35 for bug
in read_bugs(args
.output
, html_reports_available
):
37 result
= crash_count
+ bug_counter
.total
39 if html_reports_available
and result
:
40 use_cdb
= os
.path
.exists(args
.cdb
)
42 logging
.debug("generate index.html file")
43 # common prefix for source files to have sorter path
44 prefix
= commonprefix_from(args
.cdb
) if use_cdb
else os
.getcwd()
45 # assemble the cover from multiple fragments
49 fragments
.append(bug_summary(args
.output
, bug_counter
))
50 fragments
.append(bug_report(args
.output
, prefix
))
52 fragments
.append(crash_report(args
.output
, prefix
))
53 assemble_cover(args
, prefix
, fragments
)
54 # copy additional files to the report
55 copy_resource_files(args
.output
)
57 shutil
.copy(args
.cdb
, args
.output
)
59 for fragment
in fragments
:
62 if sarif_reports_available
:
63 logging
.debug("merging sarif files")
64 merge_sarif_files(args
.output
)
69 def assemble_cover(args
, prefix
, fragments
):
70 """Put together the fragments into a final report."""
75 if args
.html_title
is None:
76 args
.html_title
= os
.path
.basename(prefix
) + " - analyzer results"
78 with
open(os
.path
.join(args
.output
, "index.html"), "w") as handle
:
86 | <title>{html_title}</title>
87 | <link type="text/css" rel="stylesheet" href="scanview.css"/>
88 | <script type='text/javascript' src="sorttable.js"></script>
89 | <script type='text/javascript' src='selectable.js'></script>
92 ).format(html_title
=args
.html_title
)
94 handle
.write(comment("SUMMARYENDHEAD"))
99 | <h1>{html_title}</h1>
101 | <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
102 | <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
103 | <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
104 | <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
105 | <tr><th>Date:</th><td>{date}</td></tr>
109 html_title
=args
.html_title
,
110 user_name
=getpass
.getuser(),
111 host_name
=socket
.gethostname(),
113 cmd_args
=" ".join(sys
.argv
),
114 clang_version
=get_version(args
.clang
),
115 date
=datetime
.datetime
.today().strftime("%c"),
118 for fragment
in fragments
:
119 # copy the content of fragments
120 with
open(fragment
, "r") as input_handle
:
121 shutil
.copyfileobj(input_handle
, handle
)
132 def bug_summary(output_dir
, bug_counter
):
133 """Bug summary is a HTML table to give a better overview of the bugs."""
135 name
= os
.path
.join(output_dir
, "summary.html.fragment")
136 with
open(name
, "w") as handle
:
141 |<h2>Bug Summary</h2>
147 | <td class="sorttable_nosort">Display?</td>
157 | <tr style="font-weight:bold">
158 | <td class="SUMM_DESC">All Bugs</td>
159 | <td class="Q">{0}</td>
162 | <input checked type="checkbox" id="AllBugsCheck"
163 | onClick="CopyCheckedStateToCheckButtons(this);"/>
168 ).format(bug_counter
.total
)
170 for category
, types
in bug_counter
.categories
.items():
175 | <th>{0}</th><th colspan=2></th>
180 for bug_type
in types
.values():
185 | <td class="SUMM_DESC">{bug_type}</td>
186 | <td class="Q">{bug_count}</td>
189 | <input checked type="checkbox"
190 | onClick="ToggleDisplay(this,'{bug_type_class}');"/>
205 handle
.write(comment("SUMMARYBUGEND"))
209 def bug_report(output_dir
, prefix
):
210 """Creates a fragment from the analyzer reports."""
212 pretty
= prettify_bug(prefix
, output_dir
)
213 bugs
= (pretty(bug
) for bug
in read_bugs(output_dir
, True))
215 name
= os
.path
.join(output_dir
, "bugs.html.fragment")
216 with
open(name
, "w") as handle
:
222 |<table class="sortable" style="table-layout:automatic">
226 | <td class="sorttable_sorted">
228 | <span id="sorttable_sortfwdind"> ▾</span>
231 | <td>Function/Method</td>
232 | <td class="Q">Line</td>
233 | <td class="Q">Path Length</td>
234 | <td class="sorttable_nosort"></td>
241 handle
.write(comment("REPORTBUGCOL"))
246 | <tr class="{bug_type_class}">
247 | <td class="DESC">{bug_category}</td>
248 | <td class="DESC">{bug_type}</td>
249 | <td>{bug_file}</td>
250 | <td class="DESC">{bug_function}</td>
251 | <td class="Q">{bug_line}</td>
252 | <td class="Q">{bug_path_length}</td>
253 | <td><a href="{report_file}#EndPath">View Report</a></td>
258 handle
.write(comment("REPORTBUG", {"id": current
["report_file"]}))
267 handle
.write(comment("REPORTBUGEND"))
271 def crash_report(output_dir
, prefix
):
272 """Creates a fragment from the compiler crashes."""
274 pretty
= prettify_crash(prefix
, output_dir
)
275 crashes
= (pretty(crash
) for crash
in read_crashes(output_dir
))
277 name
= os
.path
.join(output_dir
, "crashes.html.fragment")
278 with
open(name
, "w") as handle
:
283 |<h2>Analyzer Failures</h2>
284 |<p>The analyzer had problems processing the following files:</p>
289 | <td>Source File</td>
290 | <td>Preprocessed File</td>
291 | <td>STDERR Output</td>
298 for current
in crashes
:
305 | <td><a href="{file}">preprocessor output</a></td>
306 | <td><a href="{stderr}">analyzer std err</a></td>
311 handle
.write(comment("REPORTPROBLEM", current
))
320 handle
.write(comment("REPORTCRASHES"))
324 def read_crashes(output_dir
):
325 """Generate a unique sequence of crashes from given output directory."""
328 parse_crash(filename
)
329 for filename
in glob
.iglob(os
.path
.join(output_dir
, "failures", "*.info.txt"))
333 def read_bugs(output_dir
, html
):
334 # type: (str, bool) -> Generator[Dict[str, Any], None, None]
335 """Generate a unique sequence of bugs from given output directory.
337 Duplicates can be in a project if the same module was compiled multiple
338 times with different compiler options. These would be better to show in
339 the final report (cover) only once."""
341 def empty(file_name
):
342 return os
.stat(file_name
).st_size
== 0
344 duplicate
= duplicate_check(
345 lambda bug
: "{bug_line}.{bug_path_length}:{bug_file}".format(**bug
)
348 # get the right parser for the job.
349 parser
= parse_bug_html
if html
else parse_bug_plist
350 # get the input files, which are not empty.
351 pattern
= os
.path
.join(output_dir
, "*.html" if html
else "*.plist")
352 bug_files
= (file for file in glob
.iglob(pattern
) if not empty(file))
354 for bug_file
in bug_files
:
355 for bug
in parser(bug_file
):
356 if not duplicate(bug
):
360 def merge_sarif_files(output_dir
, sort_files
=False):
361 """Reads and merges all .sarif files in the given output directory.
363 Each sarif file in the output directory is understood as a single run
364 and thus appear separate in the top level runs array. This requires
365 modifying the run index of any embedded links in messages.
368 def empty(file_name
):
369 return os
.stat(file_name
).st_size
== 0
371 def update_sarif_object(sarif_object
, runs_count_offset
):
373 Given a SARIF object, checks its dictionary entries for a 'message' property.
374 If it exists, updates the message index of embedded links in the run index.
376 Recursively looks through entries in the dictionary.
378 if not isinstance(sarif_object
, dict):
381 if "message" in sarif_object
:
382 sarif_object
["message"] = match_and_update_run(
383 sarif_object
["message"], runs_count_offset
386 for key
in sarif_object
:
387 if isinstance(sarif_object
[key
], list):
388 # iterate through subobjects and update it.
390 update_sarif_object(entry
, runs_count_offset
)
391 for entry
in sarif_object
[key
]
393 sarif_object
[key
] = arr
394 elif isinstance(sarif_object
[key
], dict):
395 sarif_object
[key
] = update_sarif_object(
396 sarif_object
[key
], runs_count_offset
404 def match_and_update_run(message
, runs_count_offset
):
406 Given a SARIF message object, checks if the text property contains an embedded link and
407 updates the run index if necessary.
409 if "text" not in message
:
412 # we only merge runs, so we only need to update the run index
413 pattern
= re
.compile(r
"sarif:/runs/(\d+)")
415 text
= message
["text"]
416 matches
= re
.finditer(pattern
, text
)
417 matches_list
= list(matches
)
419 # update matches from right to left to make increasing character length (9->10) smoother
420 for idx
in range(len(matches_list
) - 1, -1, -1):
421 match
= matches_list
[idx
]
422 new_run_count
= str(runs_count_offset
+ int(match
.group(1)))
423 text
= text
[0 : match
.start(1)] + new_run_count
+ text
[match
.end(1) :]
425 message
["text"] = text
430 for file in glob
.iglob(os
.path
.join(output_dir
, "*.sarif"))
433 # exposed for testing since the order of files returned by glob is not guaranteed to be sorted
435 sarif_files
= list(sarif_files
)
440 for sarif_file
in sarif_files
:
441 with
open(sarif_file
) as fp
:
442 sarif
= json
.load(fp
)
443 if "runs" not in sarif
:
446 # start with the first file
450 # extract the run and append it to the merged output
451 for run
in sarif
["runs"]:
452 new_run
= update_sarif_object(run
, runs_count
)
453 merged
["runs"].append(new_run
)
455 runs_count
+= len(sarif
["runs"])
457 with
open(os
.path
.join(output_dir
, "results-merged.sarif"), "w") as out
:
458 json
.dump(merged
, out
, indent
=4, sort_keys
=True)
461 def parse_bug_plist(filename
):
462 """Returns the generator of bugs from a single .plist file."""
464 with
open(filename
, "rb") as fp
:
465 content
= plistlib
.load(fp
)
466 files
= content
.get("files")
467 for bug
in content
.get("diagnostics", []):
468 if len(files
) <= int(bug
["location"]["file"]):
469 logging
.warning('Parsing bug from "%s" failed', filename
)
474 "bug_type": bug
["type"],
475 "bug_category": bug
["category"],
476 "bug_line": int(bug
["location"]["line"]),
477 "bug_path_length": int(bug
["location"]["col"]),
478 "bug_file": files
[int(bug
["location"]["file"])],
482 def parse_bug_html(filename
):
483 """Parse out the bug information from HTML output."""
486 re
.compile(r
"<!-- BUGTYPE (?P<bug_type>.*) -->$"),
487 re
.compile(r
"<!-- BUGFILE (?P<bug_file>.*) -->$"),
488 re
.compile(r
"<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$"),
489 re
.compile(r
"<!-- BUGLINE (?P<bug_line>.*) -->$"),
490 re
.compile(r
"<!-- BUGCATEGORY (?P<bug_category>.*) -->$"),
491 re
.compile(r
"<!-- BUGDESC (?P<bug_description>.*) -->$"),
492 re
.compile(r
"<!-- FUNCTIONNAME (?P<bug_function>.*) -->$"),
494 endsign
= re
.compile(r
"<!-- BUGMETAEND -->")
497 "report_file": filename
,
498 "bug_function": "n/a", # compatibility with < clang-3.5
499 "bug_category": "Other",
501 "bug_path_length": 1,
504 with
open(filename
, encoding
="utf-8") as handler
:
505 for line
in handler
.readlines():
506 # do not read the file further
507 if endsign
.match(line
):
509 # search for the right lines
510 for regex
in patterns
:
511 match
= regex
.match(line
.strip())
513 bug
.update(match
.groupdict())
516 encode_value(bug
, "bug_line", int)
517 encode_value(bug
, "bug_path_length", int)
522 def parse_crash(filename
):
523 """Parse out the crash information from the report file."""
525 match
= re
.match(r
"(.*)\.info\.txt", filename
)
526 name
= match
.group(1) if match
else None
527 with
open(filename
, mode
="rb") as handler
:
528 # this is a workaround to fix windows read '\r\n' as new lines.
529 lines
= [line
.decode().rstrip() for line
in handler
.readlines()]
534 "info": name
+ ".info.txt",
535 "stderr": name
+ ".stderr.txt",
539 def category_type_name(bug
):
540 """Create a new bug attribute from bug by category and type.
542 The result will be used as CSS class selector in the final report."""
545 """Make value ready to be HTML attribute value."""
547 return bug
.get(key
, "").lower().replace(" ", "_").replace("'", "")
549 return escape("bt_" + smash("bug_category") + "_" + smash("bug_type"))
552 def create_counters():
553 """Create counters for bug statistics.
555 Two entries are maintained: 'total' is an integer, represents the
556 number of bugs. The 'categories' is a two level categorisation of bug
557 counters. The first level is 'bug category' the second is 'bug type'.
558 Each entry in this classification is a dictionary of 'count', 'type'
562 bug_category
= bug
["bug_category"]
563 bug_type
= bug
["bug_type"]
564 current_category
= predicate
.categories
.get(bug_category
, dict())
565 current_type
= current_category
.get(
568 "bug_type": bug_type
,
569 "bug_type_class": category_type_name(bug
),
573 current_type
.update({"bug_count": current_type
["bug_count"] + 1})
574 current_category
.update({bug_type
: current_type
})
575 predicate
.categories
.update({bug_category
: current_category
})
579 predicate
.categories
= dict()
583 def prettify_bug(prefix
, output_dir
):
585 """Make safe this values to embed into HTML."""
587 bug
["bug_type_class"] = category_type_name(bug
)
589 encode_value(bug
, "bug_file", lambda x
: escape(chop(prefix
, x
)))
590 encode_value(bug
, "bug_category", escape
)
591 encode_value(bug
, "bug_type", escape
)
592 encode_value(bug
, "report_file", lambda x
: escape(chop(output_dir
, x
)))
598 def prettify_crash(prefix
, output_dir
):
599 def predicate(crash
):
600 """Make safe this values to embed into HTML."""
602 encode_value(crash
, "source", lambda x
: escape(chop(prefix
, x
)))
603 encode_value(crash
, "problem", escape
)
604 encode_value(crash
, "file", lambda x
: escape(chop(output_dir
, x
)))
605 encode_value(crash
, "info", lambda x
: escape(chop(output_dir
, x
)))
606 encode_value(crash
, "stderr", lambda x
: escape(chop(output_dir
, x
)))
612 def copy_resource_files(output_dir
):
613 """Copy the javascript and css files to the report directory."""
615 this_dir
= os
.path
.dirname(os
.path
.realpath(__file__
))
616 for resource
in os
.listdir(os
.path
.join(this_dir
, "resources")):
617 shutil
.copy(os
.path
.join(this_dir
, "resources", resource
), output_dir
)
620 def encode_value(container
, key
, encode
):
621 """Run 'encode' on 'container[key]' value and update it."""
624 value
= encode(container
[key
])
625 container
.update({key
: value
})
628 def chop(prefix
, filename
):
629 """Create 'filename' from '/prefix/filename'"""
631 return filename
if not len(prefix
) else os
.path
.relpath(filename
, prefix
)
635 """Paranoid HTML escape method. (Python version independent)"""
644 return "".join(escape_table
.get(c
, c
) for c
in text
)
647 def reindent(text
, indent
):
648 """Utility function to format html output and keep indentation."""
651 for line
in text
.splitlines():
652 if len(line
.strip()):
653 result
+= " " * indent
+ line
.split("|")[1] + os
.linesep
657 def comment(name
, opts
=dict()):
658 """Utility function to format meta information as comment."""
661 for key
, value
in opts
.items():
662 attributes
+= ' {0}="{1}"'.format(key
, value
)
664 return "<!-- {0}{1} -->{2}".format(name
, attributes
, os
.linesep
)
667 def commonprefix_from(filename
):
668 """Create file prefix from a compilation database entries."""
670 with
open(filename
, "r") as handle
:
671 return commonprefix(item
["file"] for item
in json
.load(handle
))
674 def commonprefix(files
):
675 """Fixed version of os.path.commonprefix.
677 :param files: list of file names.
678 :return: the longest path prefix that is a prefix of all files."""
680 for current
in files
:
681 if result
is not None:
682 result
= os
.path
.commonprefix([result
, current
])
688 elif not os
.path
.isdir(result
):
689 return os
.path
.dirname(result
)
691 return os
.path
.abspath(result
)