[Flang] remove whole-archive option for AIX linker (#76039)
[llvm-project.git] / clang / lib / Analysis / FlowSensitive / HTMLLogger.cpp
blob2a7bfce5350159dfd6503f041b679a9533a3fb97
1 //===-- HTMLLogger.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the HTML logger. Given a directory dir/, we write
10 // dir/0.html for the first analysis, etc.
11 // These files contain a visualization that allows inspecting the CFG and the
12 // state of the analysis at each point.
13 // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14 // so each output file is self-contained.
16 // VIEWS
18 // The timeline and function view are always shown. These allow selecting basic
19 // blocks, statements within them, and processing iterations (BBs are visited
20 // multiple times when e.g. loops are involved).
21 // These are written directly into the HTML body.
23 // There are also listings of particular basic blocks, and dumps of the state
24 // at particular analysis points (i.e. BB2 iteration 3 statement 2).
25 // These are only shown when the relevant BB/analysis point is *selected*.
27 // DATA AND TEMPLATES
29 // The HTML proper is mostly static.
30 // The analysis data is in a JSON object HTMLLoggerData which is embedded as
31 // a <script> in the <head>.
32 // This gets rendered into DOM by a simple template processor which substitutes
33 // the data into <template> tags embedded in the HTML. (see inflate() in JS).
35 // SELECTION
37 // This is the only real interactive mechanism.
39 // At any given time, there are several named selections, e.g.:
40 // bb: B2 (basic block 0 is selected)
41 // elt: B2.4 (statement 4 is selected)
42 // iter: B2:1 (iteration 1 of the basic block is selected)
43 // hover: B3 (hovering over basic block 3)
45 // The selection is updated by mouse events: hover by moving the mouse and
46 // others by clicking. Elements that are click targets generally have attributes
47 // (id or data-foo) that define what they should select.
48 // See watchSelection() in JS for the exact logic.
50 // When the "bb" selection is set to "B2":
51 // - sections <section data-selection="bb"> get shown
52 // - templates under such sections get re-rendered
53 // - elements with class/id "B2" get class "bb-select"
55 //===----------------------------------------------------------------------===//
57 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
58 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
59 #include "clang/Analysis/FlowSensitive/Logger.h"
60 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61 #include "clang/Analysis/FlowSensitive/Value.h"
62 #include "clang/Basic/SourceManager.h"
63 #include "clang/Lex/Lexer.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/ScopeExit.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/FormatVariadic.h"
68 #include "llvm/Support/JSON.h"
69 #include "llvm/Support/Program.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include "llvm/Support/raw_ostream.h"
72 // Defines assets: HTMLLogger_{html_js,css}
73 #include "HTMLLogger.inc"
75 namespace clang::dataflow {
76 namespace {
78 // Render a graphviz graph specification to SVG using the `dot` tool.
79 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
81 using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
83 // Recursively dumps Values/StorageLocations as JSON
84 class ModelDumper {
85 public:
86 ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87 : JOS(JOS), Env(Env) {}
89 void dump(Value &V) {
90 JOS.attribute("value_id", llvm::to_string(&V));
91 if (!Visited.insert(&V).second)
92 return;
94 JOS.attribute("kind", debugString(V.getKind()));
96 switch (V.getKind()) {
97 case Value::Kind::Integer:
98 case Value::Kind::Record:
99 case Value::Kind::TopBool:
100 case Value::Kind::AtomicBool:
101 case Value::Kind::FormulaBool:
102 break;
103 case Value::Kind::Pointer:
104 JOS.attributeObject(
105 "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
106 break;
109 for (const auto& Prop : V.properties())
110 JOS.attributeObject(("p:" + Prop.first()).str(),
111 [&] { dump(*Prop.second); });
113 // Running the SAT solver is expensive, but knowing which booleans are
114 // guaranteed true/false here is valuable and hard to determine by hand.
115 if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
116 JOS.attribute("formula", llvm::to_string(B->formula()));
117 JOS.attribute("truth", Env.proves(B->formula()) ? "true"
118 : Env.proves(Env.arena().makeNot(B->formula()))
119 ? "false"
120 : "unknown");
123 void dump(const StorageLocation &L) {
124 JOS.attribute("location", llvm::to_string(&L));
125 if (!Visited.insert(&L).second)
126 return;
128 JOS.attribute("type", L.getType().getAsString());
129 if (auto *V = Env.getValue(L))
130 dump(*V);
132 if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
133 for (const auto &Child : RLoc->children())
134 JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
135 if (Child.second)
136 if (Value *Val = Env.getValue(*Child.second))
137 dump(*Val);
140 for (const auto &SyntheticField : RLoc->synthetic_fields())
141 JOS.attributeObject(("sf:" + SyntheticField.first()).str(),
142 [&] { dump(*SyntheticField.second); });
146 llvm::DenseSet<const void*> Visited;
147 llvm::json::OStream &JOS;
148 const Environment &Env;
151 class HTMLLogger : public Logger {
152 struct Iteration {
153 const CFGBlock *Block;
154 unsigned Iter;
155 bool PostVisit;
156 bool Converged;
159 StreamFactory Streams;
160 std::unique_ptr<llvm::raw_ostream> OS;
161 std::optional<llvm::json::OStream> JOS;
163 const ControlFlowContext *CFG;
164 // Timeline of iterations of CFG block visitation.
165 std::vector<Iteration> Iters;
166 // Indexes in `Iters` of the iterations for each block.
167 llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;
168 // The messages logged in the current context but not yet written.
169 std::string ContextLogs;
170 // The number of elements we have visited within the current CFG block.
171 unsigned ElementIndex;
173 public:
174 explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
175 void beginAnalysis(const ControlFlowContext &CFG,
176 TypeErasedDataflowAnalysis &A) override {
177 OS = Streams();
178 this->CFG = &CFG;
179 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
181 const auto &D = CFG.getDecl();
182 const auto &SM = A.getASTContext().getSourceManager();
183 *OS << "<title>";
184 if (const auto *ND = dyn_cast<NamedDecl>(&D))
185 *OS << ND->getNameAsString() << " at ";
186 *OS << SM.getFilename(D.getLocation()) << ":"
187 << SM.getSpellingLineNumber(D.getLocation());
188 *OS << "</title>\n";
190 *OS << "<style>" << HTMLLogger_css << "</style>\n";
191 *OS << "<script>" << HTMLLogger_js << "</script>\n";
193 writeCode();
194 writeCFG();
196 *OS << "<script>var HTMLLoggerData = \n";
197 JOS.emplace(*OS, /*Indent=*/2);
198 JOS->objectBegin();
199 JOS->attributeBegin("states");
200 JOS->objectBegin();
202 // Between beginAnalysis() and endAnalysis() we write all the states for
203 // particular analysis points into the `timeline` array.
204 void endAnalysis() override {
205 JOS->objectEnd();
206 JOS->attributeEnd();
208 JOS->attributeArray("timeline", [&] {
209 for (const auto &E : Iters) {
210 JOS->object([&] {
211 JOS->attribute("block", blockID(E.Block->getBlockID()));
212 JOS->attribute("iter", E.Iter);
213 JOS->attribute("post_visit", E.PostVisit);
214 JOS->attribute("converged", E.Converged);
218 JOS->attributeObject("cfg", [&] {
219 for (const auto &E : BlockIters)
220 writeBlock(*E.first, E.second);
223 JOS->objectEnd();
224 JOS.reset();
225 *OS << ";\n</script>\n";
226 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
229 void enterBlock(const CFGBlock &B, bool PostVisit) override {
230 llvm::SmallVector<size_t> &BIter = BlockIters[&B];
231 unsigned IterNum = BIter.size() + 1;
232 BIter.push_back(Iters.size());
233 Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});
234 ElementIndex = 0;
236 void enterElement(const CFGElement &E) override {
237 ++ElementIndex;
240 static std::string blockID(unsigned Block) {
241 return llvm::formatv("B{0}", Block);
243 static std::string eltID(unsigned Block, unsigned Element) {
244 return llvm::formatv("B{0}.{1}", Block, Element);
246 static std::string iterID(unsigned Block, unsigned Iter) {
247 return llvm::formatv("B{0}:{1}", Block, Iter);
249 static std::string elementIterID(unsigned Block, unsigned Iter,
250 unsigned Element) {
251 return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
254 // Write the analysis state associated with a particular analysis point.
255 // FIXME: this dump is fairly opaque. We should show:
256 // - values associated with the current Stmt
257 // - values associated with its children
258 // - meaningful names for values
259 // - which boolean values are implied true/false by the flow condition
260 void recordState(TypeErasedDataflowAnalysisState &State) override {
261 unsigned Block = Iters.back().Block->getBlockID();
262 unsigned Iter = Iters.back().Iter;
263 bool PostVisit = Iters.back().PostVisit;
264 JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
265 JOS->attribute("block", blockID(Block));
266 JOS->attribute("iter", Iter);
267 JOS->attribute("post_visit", PostVisit);
268 JOS->attribute("element", ElementIndex);
270 // If this state immediately follows an Expr, show its built-in model.
271 if (ElementIndex > 0) {
272 auto S =
273 Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
274 if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
275 if (E->isPRValue()) {
276 if (auto *V = State.Env.getValue(*E))
277 JOS->attributeObject(
278 "value", [&] { ModelDumper(*JOS, State.Env).dump(*V); });
279 } else {
280 if (auto *Loc = State.Env.getStorageLocation(*E))
281 JOS->attributeObject(
282 "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); });
286 if (!ContextLogs.empty()) {
287 JOS->attribute("logs", ContextLogs);
288 ContextLogs.clear();
291 std::string BuiltinLattice;
292 llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
293 State.Env.dump(BuiltinLatticeS);
294 JOS->attribute("builtinLattice", BuiltinLattice);
298 void blockConverged() override { Iters.back().Converged = true; }
300 void logText(llvm::StringRef S) override {
301 ContextLogs.append(S.begin(), S.end());
302 ContextLogs.push_back('\n');
305 private:
306 // Write the CFG block details.
307 // Currently this is just the list of elements in execution order.
308 // FIXME: an AST dump would be a useful view, too.
309 void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {
310 JOS->attributeObject(blockID(B.getBlockID()), [&] {
311 JOS->attributeArray("iters", [&] {
312 for (size_t IterIdx : ItersForB) {
313 const Iteration &Iter = Iters[IterIdx];
314 JOS->object([&] {
315 JOS->attribute("iter", Iter.Iter);
316 JOS->attribute("post_visit", Iter.PostVisit);
317 JOS->attribute("converged", Iter.Converged);
321 JOS->attributeArray("elements", [&] {
322 for (const auto &Elt : B.Elements) {
323 std::string Dump;
324 llvm::raw_string_ostream DumpS(Dump);
325 Elt.dumpToStream(DumpS);
326 JOS->value(Dump);
332 // Write the code of function being examined.
333 // We want to overlay the code with <span>s that mark which BB particular
334 // tokens are associated with, and even which BB element (so that clicking
335 // can select the right element).
336 void writeCode() {
337 const auto &AST = CFG->getDecl().getASTContext();
338 bool Invalid = false;
340 // Extract the source code from the original file.
341 // Pretty-printing from the AST would probably be nicer (no macros or
342 // indentation to worry about), but we need the boundaries of particular
343 // AST nodes and the printer doesn't provide this.
344 auto Range = clang::Lexer::makeFileCharRange(
345 CharSourceRange::getTokenRange(CFG->getDecl().getSourceRange()),
346 AST.getSourceManager(), AST.getLangOpts());
347 if (Range.isInvalid())
348 return;
349 llvm::StringRef Code = clang::Lexer::getSourceText(
350 Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
351 if (Invalid)
352 return;
354 // TokenInfo stores the BB and set of elements that a token is part of.
355 struct TokenInfo {
356 enum : unsigned { Missing = static_cast<unsigned>(-1) };
358 // The basic block this is part of.
359 // This is the BB of the stmt with the smallest containing range.
360 unsigned BB = Missing;
361 unsigned BBPriority = 0;
362 // The most specific stmt this is part of (smallest range).
363 unsigned Elt = Missing;
364 unsigned EltPriority = 0;
365 // All stmts this is part of.
366 SmallVector<unsigned> Elts;
368 // Mark this token as being part of BB.Elt.
369 // RangeLen is the character length of the element's range, used to
370 // distinguish inner vs outer statements.
371 // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
372 // However "a" has a smaller range, so is more specific. Clicking on the
373 // token "a" should select the stmt "a".
374 void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
375 // A worse BB (larger range) => ignore.
376 if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
377 return;
378 if (BB != this->BB) {
379 this->BB = BB;
380 Elts.clear();
381 BBPriority = RangeLen;
383 BBPriority = std::min(BBPriority, RangeLen);
384 Elts.push_back(Elt);
385 if (this->Elt == Missing || EltPriority > RangeLen)
386 this->Elt = Elt;
388 bool operator==(const TokenInfo &Other) const {
389 return std::tie(BB, Elt, Elts) ==
390 std::tie(Other.BB, Other.Elt, Other.Elts);
392 // Write the attributes for the <span> on this token.
393 void write(llvm::raw_ostream &OS) const {
394 OS << "class='c";
395 if (BB != Missing)
396 OS << " " << blockID(BB);
397 for (unsigned Elt : Elts)
398 OS << " " << eltID(BB, Elt);
399 OS << "'";
401 if (Elt != Missing)
402 OS << " data-elt='" << eltID(BB, Elt) << "'";
403 if (BB != Missing)
404 OS << " data-bb='" << blockID(BB) << "'";
408 // Construct one TokenInfo per character in a flat array.
409 // This is inefficient (chars in a token all have the same info) but simple.
410 std::vector<TokenInfo> State(Code.size());
411 for (const auto *Block : CFG->getCFG()) {
412 unsigned EltIndex = 0;
413 for (const auto& Elt : *Block) {
414 ++EltIndex;
415 if (const auto S = Elt.getAs<CFGStmt>()) {
416 auto EltRange = clang::Lexer::makeFileCharRange(
417 CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
418 AST.getSourceManager(), AST.getLangOpts());
419 if (EltRange.isInvalid())
420 continue;
421 if (EltRange.getBegin() < Range.getBegin() ||
422 EltRange.getEnd() >= Range.getEnd() ||
423 EltRange.getEnd() < Range.getBegin() ||
424 EltRange.getEnd() >= Range.getEnd())
425 continue;
426 unsigned Off = EltRange.getBegin().getRawEncoding() -
427 Range.getBegin().getRawEncoding();
428 unsigned Len = EltRange.getEnd().getRawEncoding() -
429 EltRange.getBegin().getRawEncoding();
430 for (unsigned I = 0; I < Len; ++I)
431 State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
436 // Finally, write the code with the correct <span>s.
437 unsigned Line =
438 AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
439 *OS << "<template data-copy='code'>\n";
440 *OS << "<code class='filename'>";
441 llvm::printHTMLEscaped(
442 llvm::sys::path::filename(
443 AST.getSourceManager().getFilename(Range.getBegin())),
444 *OS);
445 *OS << "</code>";
446 *OS << "<code class='line' data-line='" << Line++ << "'>";
447 for (unsigned I = 0; I < Code.size(); ++I) {
448 // Don't actually write a <span> around each character, only break spans
449 // when the TokenInfo changes.
450 bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
451 bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
452 if (NeedOpen) {
453 *OS << "<span ";
454 State[I].write(*OS);
455 *OS << ">";
457 if (Code[I] == '\n')
458 *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
459 else
460 llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
461 if (NeedClose) *OS << "</span>";
463 *OS << "</code>\n";
464 *OS << "</template>";
467 // Write the CFG diagram, a graph of basic blocks.
468 // Laying out graphs is hard, so we construct a graphviz description and shell
469 // out to `dot` to turn it into an SVG.
470 void writeCFG() {
471 *OS << "<template data-copy='cfg'>\n";
472 if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
473 *OS << *SVG;
474 else
475 *OS << "Can't draw CFG: " << toString(SVG.takeError());
476 *OS << "</template>\n";
479 // Produce a graphviz description of a CFG.
480 static std::string buildCFGDot(const clang::CFG &CFG) {
481 std::string Graph;
482 llvm::raw_string_ostream GraphS(Graph);
483 // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
484 GraphS << R"(digraph {
485 tooltip=" "
486 node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
487 edge[tooltip = " "]
489 for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
490 GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n";
491 for (const auto *Block : CFG) {
492 for (const auto &Succ : Block->succs()) {
493 if (Succ.getReachableBlock())
494 GraphS << " " << blockID(Block->getBlockID()) << " -> "
495 << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
498 GraphS << "}\n";
499 return Graph;
503 // Nothing interesting here, just subprocess/temp-file plumbing.
504 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
505 std::string DotPath;
506 if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
507 DotPath = FromEnv;
508 else {
509 auto FromPath = llvm::sys::findProgramByName("dot");
510 if (!FromPath)
511 return llvm::createStringError(FromPath.getError(),
512 "'dot' not found on PATH");
513 DotPath = FromPath.get();
516 // Create input and output files for `dot` subprocess.
517 // (We create the output file as empty, to reserve the temp filename).
518 llvm::SmallString<256> Input, Output;
519 int InputFD;
520 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
521 Input))
522 return llvm::createStringError(EC, "failed to create `dot` temp input");
523 llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
524 auto DeleteInput =
525 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
526 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
527 return llvm::createStringError(EC, "failed to create `dot` temp output");
528 auto DeleteOutput =
529 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
531 std::vector<std::optional<llvm::StringRef>> Redirects = {
532 Input, Output,
533 /*stderr=*/std::nullopt};
534 std::string ErrMsg;
535 int Code = llvm::sys::ExecuteAndWait(
536 DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
537 /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
538 if (!ErrMsg.empty())
539 return llvm::createStringError(llvm::inconvertibleErrorCode(),
540 "'dot' failed: " + ErrMsg);
541 if (Code != 0)
542 return llvm::createStringError(llvm::inconvertibleErrorCode(),
543 "'dot' failed (" + llvm::Twine(Code) + ")");
545 auto Buf = llvm::MemoryBuffer::getFile(Output);
546 if (!Buf)
547 return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
549 // Output has <?xml> prefix we don't want. Skip to <svg> tag.
550 llvm::StringRef Result = Buf.get()->getBuffer();
551 auto Pos = Result.find("<svg");
552 if (Pos == llvm::StringRef::npos)
553 return llvm::createStringError(llvm::inconvertibleErrorCode(),
554 "Can't find <svg> tag in `dot` output");
555 return Result.substr(Pos).str();
558 } // namespace
560 std::unique_ptr<Logger>
561 Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
562 return std::make_unique<HTMLLogger>(std::move(Streams));
565 } // namespace clang::dataflow