1 //===-- HTMLLogger.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the HTML logger. Given a directory dir/, we write
10 // dir/0.html for the first analysis, etc.
11 // These files contain a visualization that allows inspecting the CFG and the
12 // state of the analysis at each point.
13 // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14 // so each output file is self-contained.
18 // The timeline and function view are always shown. These allow selecting basic
19 // blocks, statements within them, and processing iterations (BBs are visited
20 // multiple times when e.g. loops are involved).
21 // These are written directly into the HTML body.
23 // There are also listings of particular basic blocks, and dumps of the state
24 // at particular analysis points (i.e. BB2 iteration 3 statement 2).
25 // These are only shown when the relevant BB/analysis point is *selected*.
29 // The HTML proper is mostly static.
30 // The analysis data is in a JSON object HTMLLoggerData which is embedded as
31 // a <script> in the <head>.
32 // This gets rendered into DOM by a simple template processor which substitutes
33 // the data into <template> tags embedded in the HTML. (see inflate() in JS).
37 // This is the only real interactive mechanism.
39 // At any given time, there are several named selections, e.g.:
40 // bb: B2 (basic block 0 is selected)
41 // elt: B2.4 (statement 4 is selected)
42 // iter: B2:1 (iteration 1 of the basic block is selected)
43 // hover: B3 (hovering over basic block 3)
45 // The selection is updated by mouse events: hover by moving the mouse and
46 // others by clicking. Elements that are click targets generally have attributes
47 // (id or data-foo) that define what they should select.
48 // See watchSelection() in JS for the exact logic.
50 // When the "bb" selection is set to "B2":
51 // - sections <section data-selection="bb"> get shown
52 // - templates under such sections get re-rendered
53 // - elements with class/id "B2" get class "bb-select"
55 //===----------------------------------------------------------------------===//
57 #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
58 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
59 #include "clang/Analysis/FlowSensitive/Logger.h"
60 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61 #include "clang/Analysis/FlowSensitive/Value.h"
62 #include "clang/Basic/SourceManager.h"
63 #include "clang/Lex/Lexer.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/ScopeExit.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/FormatVariadic.h"
68 #include "llvm/Support/JSON.h"
69 #include "llvm/Support/Program.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include "llvm/Support/raw_ostream.h"
72 // Defines assets: HTMLLogger_{html_js,css}
73 #include "HTMLLogger.inc"
75 namespace clang::dataflow
{
78 // Render a graphviz graph specification to SVG using the `dot` tool.
79 llvm::Expected
<std::string
> renderSVG(llvm::StringRef DotGraph
);
81 using StreamFactory
= std::function
<std::unique_ptr
<llvm::raw_ostream
>()>;
83 // Recursively dumps Values/StorageLocations as JSON
86 ModelDumper(llvm::json::OStream
&JOS
, const Environment
&Env
)
87 : JOS(JOS
), Env(Env
) {}
90 JOS
.attribute("value_id", llvm::to_string(&V
));
91 if (!Visited
.insert(&V
).second
)
94 JOS
.attribute("kind", debugString(V
.getKind()));
96 switch (V
.getKind()) {
97 case Value::Kind::Integer
:
98 case Value::Kind::TopBool
:
99 case Value::Kind::AtomicBool
:
100 case Value::Kind::FormulaBool
:
102 case Value::Kind::Pointer
:
104 "pointee", [&] { dump(cast
<PointerValue
>(V
).getPointeeLoc()); });
108 for (const auto& Prop
: V
.properties())
109 JOS
.attributeObject(("p:" + Prop
.first()).str(),
110 [&] { dump(*Prop
.second
); });
112 // Running the SAT solver is expensive, but knowing which booleans are
113 // guaranteed true/false here is valuable and hard to determine by hand.
114 if (auto *B
= llvm::dyn_cast
<BoolValue
>(&V
)) {
115 JOS
.attribute("formula", llvm::to_string(B
->formula()));
116 JOS
.attribute("truth", Env
.proves(B
->formula()) ? "true"
117 : Env
.proves(Env
.arena().makeNot(B
->formula()))
122 void dump(const StorageLocation
&L
) {
123 JOS
.attribute("location", llvm::to_string(&L
));
124 if (!Visited
.insert(&L
).second
)
127 JOS
.attribute("type", L
.getType().getAsString());
128 if (!L
.getType()->isRecordType())
129 if (auto *V
= Env
.getValue(L
))
132 if (auto *RLoc
= dyn_cast
<RecordStorageLocation
>(&L
)) {
133 for (const auto &Child
: RLoc
->children())
134 JOS
.attributeObject("f:" + Child
.first
->getNameAsString(), [&] {
139 for (const auto &SyntheticField
: RLoc
->synthetic_fields())
140 JOS
.attributeObject(("sf:" + SyntheticField
.first()).str(),
141 [&] { dump(*SyntheticField
.second
); });
145 llvm::DenseSet
<const void*> Visited
;
146 llvm::json::OStream
&JOS
;
147 const Environment
&Env
;
150 class HTMLLogger
: public Logger
{
152 const CFGBlock
*Block
;
158 StreamFactory Streams
;
159 std::unique_ptr
<llvm::raw_ostream
> OS
;
161 llvm::raw_string_ostream JStringStream
{JSON
};
162 llvm::json::OStream JOS
{JStringStream
, /*Indent=*/2};
164 const AdornedCFG
*ACFG
;
165 // Timeline of iterations of CFG block visitation.
166 std::vector
<Iteration
> Iters
;
167 // Indexes in `Iters` of the iterations for each block.
168 llvm::DenseMap
<const CFGBlock
*, llvm::SmallVector
<size_t>> BlockIters
;
169 // For a given block ID, did the block converge (on the last iteration)?
170 llvm::BitVector BlockConverged
;
171 // The messages logged in the current context but not yet written.
172 std::string ContextLogs
;
173 // The number of elements we have visited within the current CFG block.
174 unsigned ElementIndex
;
177 explicit HTMLLogger(StreamFactory Streams
) : Streams(std::move(Streams
)) {}
178 void beginAnalysis(const AdornedCFG
&ACFG
,
179 TypeErasedDataflowAnalysis
&A
) override
{
182 *OS
<< llvm::StringRef(HTMLLogger_html
).split("<?INJECT?>").first
;
184 BlockConverged
.resize(ACFG
.getCFG().getNumBlockIDs());
186 const auto &D
= ACFG
.getDecl();
187 const auto &SM
= A
.getASTContext().getSourceManager();
189 if (const auto *ND
= dyn_cast
<NamedDecl
>(&D
))
190 *OS
<< ND
->getNameAsString() << " at ";
191 *OS
<< SM
.getFilename(D
.getLocation()) << ":"
192 << SM
.getSpellingLineNumber(D
.getLocation());
195 *OS
<< "<style>" << HTMLLogger_css
<< "</style>\n";
196 *OS
<< "<script>" << HTMLLogger_js
<< "</script>\n";
200 JOS
.attributeBegin("states");
203 // Between beginAnalysis() and endAnalysis() we write all the states for
204 // particular analysis points into the `timeline` array.
205 void endAnalysis() override
{
209 JOS
.attributeArray("timeline", [&] {
210 for (const auto &E
: Iters
) {
212 JOS
.attribute("block", blockID(E
.Block
->getBlockID()));
213 JOS
.attribute("iter", E
.Iter
);
214 JOS
.attribute("post_visit", E
.PostVisit
);
215 JOS
.attribute("converged", E
.Converged
);
219 JOS
.attributeObject("cfg", [&] {
220 for (const auto &E
: BlockIters
)
221 writeBlock(*E
.first
, E
.second
);
228 *OS
<< "<script>var HTMLLoggerData = \n";
230 *OS
<< ";\n</script>\n";
231 *OS
<< llvm::StringRef(HTMLLogger_html
).split("<?INJECT?>").second
;
234 void enterBlock(const CFGBlock
&B
, bool PostVisit
) override
{
235 llvm::SmallVector
<size_t> &BIter
= BlockIters
[&B
];
236 unsigned IterNum
= BIter
.size() + 1;
237 BIter
.push_back(Iters
.size());
238 Iters
.push_back({&B
, IterNum
, PostVisit
, /*Converged=*/false});
240 BlockConverged
[B
.getBlockID()] = false;
243 void enterElement(const CFGElement
&E
) override
{
247 static std::string
blockID(unsigned Block
) {
248 return llvm::formatv("B{0}", Block
);
250 static std::string
eltID(unsigned Block
, unsigned Element
) {
251 return llvm::formatv("B{0}.{1}", Block
, Element
);
253 static std::string
iterID(unsigned Block
, unsigned Iter
) {
254 return llvm::formatv("B{0}:{1}", Block
, Iter
);
256 static std::string
elementIterID(unsigned Block
, unsigned Iter
,
258 return llvm::formatv("B{0}:{1}_B{0}.{2}", Block
, Iter
, Element
);
261 // Write the analysis state associated with a particular analysis point.
262 // FIXME: this dump is fairly opaque. We should show:
263 // - values associated with the current Stmt
264 // - values associated with its children
265 // - meaningful names for values
266 // - which boolean values are implied true/false by the flow condition
267 void recordState(TypeErasedDataflowAnalysisState
&State
) override
{
268 unsigned Block
= Iters
.back().Block
->getBlockID();
269 unsigned Iter
= Iters
.back().Iter
;
270 bool PostVisit
= Iters
.back().PostVisit
;
271 JOS
.attributeObject(elementIterID(Block
, Iter
, ElementIndex
), [&] {
272 JOS
.attribute("block", blockID(Block
));
273 JOS
.attribute("iter", Iter
);
274 JOS
.attribute("post_visit", PostVisit
);
275 JOS
.attribute("element", ElementIndex
);
277 // If this state immediately follows an Expr, show its built-in model.
278 if (ElementIndex
> 0) {
280 Iters
.back().Block
->Elements
[ElementIndex
- 1].getAs
<CFGStmt
>();
281 if (const Expr
*E
= S
? llvm::dyn_cast
<Expr
>(S
->getStmt()) : nullptr) {
282 if (E
->isPRValue()) {
283 if (!E
->getType()->isRecordType())
284 if (auto *V
= State
.Env
.getValue(*E
))
286 "value", [&] { ModelDumper(JOS
, State
.Env
).dump(*V
); });
288 if (auto *Loc
= State
.Env
.getStorageLocation(*E
))
290 "value", [&] { ModelDumper(JOS
, State
.Env
).dump(*Loc
); });
294 if (!ContextLogs
.empty()) {
295 JOS
.attribute("logs", ContextLogs
);
299 std::string BuiltinLattice
;
300 llvm::raw_string_ostream
BuiltinLatticeS(BuiltinLattice
);
301 State
.Env
.dump(BuiltinLatticeS
);
302 JOS
.attribute("builtinLattice", BuiltinLattice
);
306 void blockConverged() override
{
307 Iters
.back().Converged
= true;
308 BlockConverged
[Iters
.back().Block
->getBlockID()] = true;
311 void logText(llvm::StringRef S
) override
{
312 ContextLogs
.append(S
.begin(), S
.end());
313 ContextLogs
.push_back('\n');
317 // Write the CFG block details.
318 // Currently this is just the list of elements in execution order.
319 // FIXME: an AST dump would be a useful view, too.
320 void writeBlock(const CFGBlock
&B
, llvm::ArrayRef
<size_t> ItersForB
) {
321 JOS
.attributeObject(blockID(B
.getBlockID()), [&] {
322 JOS
.attributeArray("iters", [&] {
323 for (size_t IterIdx
: ItersForB
) {
324 const Iteration
&Iter
= Iters
[IterIdx
];
326 JOS
.attribute("iter", Iter
.Iter
);
327 JOS
.attribute("post_visit", Iter
.PostVisit
);
328 JOS
.attribute("converged", Iter
.Converged
);
332 JOS
.attributeArray("elements", [&] {
333 for (const auto &Elt
: B
.Elements
) {
335 llvm::raw_string_ostream
DumpS(Dump
);
336 Elt
.dumpToStream(DumpS
);
343 // Write the code of function being examined.
344 // We want to overlay the code with <span>s that mark which BB particular
345 // tokens are associated with, and even which BB element (so that clicking
346 // can select the right element).
348 const auto &AST
= ACFG
->getDecl().getASTContext();
349 bool Invalid
= false;
351 // Extract the source code from the original file.
352 // Pretty-printing from the AST would probably be nicer (no macros or
353 // indentation to worry about), but we need the boundaries of particular
354 // AST nodes and the printer doesn't provide this.
355 auto Range
= clang::Lexer::makeFileCharRange(
356 CharSourceRange::getTokenRange(ACFG
->getDecl().getSourceRange()),
357 AST
.getSourceManager(), AST
.getLangOpts());
358 if (Range
.isInvalid())
360 llvm::StringRef Code
= clang::Lexer::getSourceText(
361 Range
, AST
.getSourceManager(), AST
.getLangOpts(), &Invalid
);
365 // TokenInfo stores the BB and set of elements that a token is part of.
367 enum : unsigned { Missing
= static_cast<unsigned>(-1) };
369 // The basic block this is part of.
370 // This is the BB of the stmt with the smallest containing range.
371 unsigned BB
= Missing
;
372 unsigned BBPriority
= 0;
373 // The most specific stmt this is part of (smallest range).
374 unsigned Elt
= Missing
;
375 unsigned EltPriority
= 0;
376 // All stmts this is part of.
377 SmallVector
<unsigned> Elts
;
379 // Mark this token as being part of BB.Elt.
380 // RangeLen is the character length of the element's range, used to
381 // distinguish inner vs outer statements.
382 // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
383 // However "a" has a smaller range, so is more specific. Clicking on the
384 // token "a" should select the stmt "a".
385 void assign(unsigned BB
, unsigned Elt
, unsigned RangeLen
) {
386 // A worse BB (larger range) => ignore.
387 if (this->BB
!= Missing
&& BB
!= this->BB
&& BBPriority
<= RangeLen
)
389 if (BB
!= this->BB
) {
392 BBPriority
= RangeLen
;
394 BBPriority
= std::min(BBPriority
, RangeLen
);
396 if (this->Elt
== Missing
|| EltPriority
> RangeLen
)
399 bool operator==(const TokenInfo
&Other
) const {
400 return std::tie(BB
, Elt
, Elts
) ==
401 std::tie(Other
.BB
, Other
.Elt
, Other
.Elts
);
403 // Write the attributes for the <span> on this token.
404 void write(llvm::raw_ostream
&OS
) const {
407 OS
<< " " << blockID(BB
);
408 for (unsigned Elt
: Elts
)
409 OS
<< " " << eltID(BB
, Elt
);
413 OS
<< " data-elt='" << eltID(BB
, Elt
) << "'";
415 OS
<< " data-bb='" << blockID(BB
) << "'";
419 // Construct one TokenInfo per character in a flat array.
420 // This is inefficient (chars in a token all have the same info) but simple.
421 std::vector
<TokenInfo
> State(Code
.size());
422 for (const auto *Block
: ACFG
->getCFG()) {
423 unsigned EltIndex
= 0;
424 for (const auto& Elt
: *Block
) {
426 if (const auto S
= Elt
.getAs
<CFGStmt
>()) {
427 auto EltRange
= clang::Lexer::makeFileCharRange(
428 CharSourceRange::getTokenRange(S
->getStmt()->getSourceRange()),
429 AST
.getSourceManager(), AST
.getLangOpts());
430 if (EltRange
.isInvalid())
432 if (EltRange
.getBegin() < Range
.getBegin() ||
433 EltRange
.getEnd() >= Range
.getEnd() ||
434 EltRange
.getEnd() < Range
.getBegin() ||
435 EltRange
.getEnd() >= Range
.getEnd())
437 unsigned Off
= EltRange
.getBegin().getRawEncoding() -
438 Range
.getBegin().getRawEncoding();
439 unsigned Len
= EltRange
.getEnd().getRawEncoding() -
440 EltRange
.getBegin().getRawEncoding();
441 for (unsigned I
= 0; I
< Len
; ++I
)
442 State
[Off
+ I
].assign(Block
->getBlockID(), EltIndex
, Len
);
447 // Finally, write the code with the correct <span>s.
449 AST
.getSourceManager().getSpellingLineNumber(Range
.getBegin());
450 *OS
<< "<template data-copy='code'>\n";
451 *OS
<< "<code class='filename'>";
452 llvm::printHTMLEscaped(
453 llvm::sys::path::filename(
454 AST
.getSourceManager().getFilename(Range
.getBegin())),
457 *OS
<< "<code class='line' data-line='" << Line
++ << "'>";
458 for (unsigned I
= 0; I
< Code
.size(); ++I
) {
459 // Don't actually write a <span> around each character, only break spans
460 // when the TokenInfo changes.
461 bool NeedOpen
= I
== 0 || !(State
[I
] == State
[I
-1]);
462 bool NeedClose
= I
+ 1 == Code
.size() || !(State
[I
] == State
[I
+ 1]);
469 *OS
<< "</code>\n<code class='line' data-line='" << Line
++ << "'>";
471 llvm::printHTMLEscaped(Code
.substr(I
, 1), *OS
);
472 if (NeedClose
) *OS
<< "</span>";
475 *OS
<< "</template>";
478 // Write the CFG diagram, a graph of basic blocks.
479 // Laying out graphs is hard, so we construct a graphviz description and shell
480 // out to `dot` to turn it into an SVG.
482 *OS
<< "<template data-copy='cfg'>\n";
483 if (auto SVG
= renderSVG(buildCFGDot(ACFG
->getCFG())))
486 *OS
<< "Can't draw CFG: " << toString(SVG
.takeError());
487 *OS
<< "</template>\n";
490 // Produce a graphviz description of a CFG.
491 std::string
buildCFGDot(const clang::CFG
&CFG
) {
493 llvm::raw_string_ostream
GraphS(Graph
);
494 // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
495 GraphS
<< R
"(digraph {
497 node[class=bb, shape=square, fontname="sans
-serif
", tooltip=" "]
500 for (unsigned I
= 0; I
< CFG
.getNumBlockIDs(); ++I
) {
501 std::string Name
= blockID(I
);
502 // Rightwards arrow, vertical line
503 const char *ConvergenceMarker
= (const char *)u8
"\\n\u2192\u007c";
504 if (BlockConverged
[I
])
505 Name
+= ConvergenceMarker
;
506 GraphS
<< " " << blockID(I
) << " [id=" << blockID(I
) << " label=\""
509 for (const auto *Block
: CFG
) {
510 for (const auto &Succ
: Block
->succs()) {
511 if (Succ
.getReachableBlock())
512 GraphS
<< " " << blockID(Block
->getBlockID()) << " -> "
513 << blockID(Succ
.getReachableBlock()->getBlockID()) << "\n";
521 // Nothing interesting here, just subprocess/temp-file plumbing.
522 llvm::Expected
<std::string
> renderSVG(llvm::StringRef DotGraph
) {
524 if (const auto *FromEnv
= ::getenv("GRAPHVIZ_DOT"))
527 auto FromPath
= llvm::sys::findProgramByName("dot");
529 return llvm::createStringError(FromPath
.getError(),
530 "'dot' not found on PATH");
531 DotPath
= FromPath
.get();
534 // Create input and output files for `dot` subprocess.
535 // (We create the output file as empty, to reserve the temp filename).
536 llvm::SmallString
<256> Input
, Output
;
538 if (auto EC
= llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD
,
540 return llvm::createStringError(EC
, "failed to create `dot` temp input");
541 llvm::raw_fd_ostream(InputFD
, /*shouldClose=*/true) << DotGraph
;
543 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input
); });
544 if (auto EC
= llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output
))
545 return llvm::createStringError(EC
, "failed to create `dot` temp output");
547 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output
); });
549 std::vector
<std::optional
<llvm::StringRef
>> Redirects
= {
551 /*stderr=*/std::nullopt
};
553 int Code
= llvm::sys::ExecuteAndWait(
554 DotPath
, {"dot", "-Tsvg"}, /*Env=*/std::nullopt
, Redirects
,
555 /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg
);
557 return llvm::createStringError(llvm::inconvertibleErrorCode(),
558 "'dot' failed: " + ErrMsg
);
560 return llvm::createStringError(llvm::inconvertibleErrorCode(),
561 "'dot' failed (" + llvm::Twine(Code
) + ")");
563 auto Buf
= llvm::MemoryBuffer::getFile(Output
);
565 return llvm::createStringError(Buf
.getError(), "Can't read `dot` output");
567 // Output has <?xml> prefix we don't want. Skip to <svg> tag.
568 llvm::StringRef Result
= Buf
.get()->getBuffer();
569 auto Pos
= Result
.find("<svg");
570 if (Pos
== llvm::StringRef::npos
)
571 return llvm::createStringError(llvm::inconvertibleErrorCode(),
572 "Can't find <svg> tag in `dot` output");
573 return Result
.substr(Pos
).str();
578 std::unique_ptr
<Logger
>
579 Logger::html(std::function
<std::unique_ptr
<llvm::raw_ostream
>()> Streams
) {
580 return std::make_unique
<HTMLLogger
>(std::move(Streams
));
583 } // namespace clang::dataflow