1 //===-- HTMLForest.cpp - browser-based parse forest explorer
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 // The plain text forest node dump (clang-pseudo -print-forest) is useful but
11 // hard to reconcile with the code being examined, especially when it is large.
13 // HTMLForest produces a self-contained HTML file containing both the code and
14 // the forest representation, linking them interactively with javascript.
15 // At any given time, a single parse tree is shown (ambiguities resolved).
16 // The user can switch between ambiguous alternatives.
18 // +-------+---------------+
20 // | #tree | #code |#info||
23 // +-------+---------------+
25 // #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest.
26 // (It is a simple tree, not a DAG, because ambiguities have been resolved).
27 // Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER).
29 // #code is the source code, annotated with <span>s marking the node ranges.
30 // These spans are usually invisible (exception: ambiguities are marked), but
31 // they are used to show and change the selection.
33 // #info is a floating box that shows details of the currently selected node:
34 // - rule (for sequence nodes). Abbreviated rules are also shown.
35 // - alternatives (for ambiguous nodes). The user can choose an alternative.
36 // - ancestors. The parent nodes show how this node fits in translation-unit.
38 // There are two types of 'active' node:
39 // - *highlight* is what the cursor is over, and is colored blue.
40 // Near ancestors are shaded faintly (onion-skin) to show local structure.
41 // - *selection* is set by clicking.
42 // The #info box shows the selection, and selected nodes have a dashed ring.
44 //===----------------------------------------------------------------------===//
46 #include "clang-pseudo/Disambiguate.h"
47 #include "clang-pseudo/Forest.h"
48 #include "clang-pseudo/grammar/Grammar.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Support/JSON.h"
51 #include "llvm/Support/raw_ostream.h"
56 // Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc
57 #include "HTMLForestResources.inc"
60 llvm::raw_ostream
&Out
;
62 const ForestNode
&Root
;
63 const TokenStream
&Stream
;
64 const Disambiguation
&Disambig
;
67 Out
<< "<!doctype html>\n";
70 tag("title", [&] { Out
<< "HTMLForest"; });
71 tag("script", [&] { Out
<< HTMLForest_js
; });
72 tag("style", [&] { Out
<< HTMLForest_css
; });
78 tag("pre id='hidden-code' hidden", [&] { writeCode(); });
80 tag("body", [&] { Out
<< HTMLForest_html
; });
85 void writeForestJSON();
86 void tag(llvm::StringRef Opener
, llvm::function_ref
<void()> Body
) {
87 Out
<< "<" << Opener
<< ">";
89 Out
<< "</" << Opener
.split(' ').first
<< ">\n";
93 void Writer::writeCode() {
94 // This loop (whitespace logic) is cribbed from TokenStream::Print.
95 bool FirstToken
= true;
96 unsigned LastLine
= -1;
98 for (const auto &T
: Stream
.tokens()) {
99 StringRef Text
= T
.text();
102 } else if (T
.Line
== LastLine
) {
103 if (LastText
.data() + LastText
.size() != Text
.data())
106 Out
<< " \n"; // Extra space aids selection.
107 Out
.indent(T
.Indent
);
109 Out
<< "<span class='token' id='t" << Stream
.index(T
) << "'>";
110 llvm::printHTMLEscaped(Text
, Out
);
119 // Writes a JSON array of forest nodes. Items are e.g.:
120 // {kind:'sequence', symbol:'compound-stmt', children:[5,8,33],
121 // rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'}
122 // {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3}
123 // {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'}
124 void Writer::writeForestJSON() {
125 // This is the flat array of nodes: the index into this array is the node ID.
126 std::vector
<std::pair
<const ForestNode
*, /*End*/ Token::Index
>> Sequence
;
127 llvm::DenseMap
<const ForestNode
*, unsigned> Index
;
128 auto AssignID
= [&](const ForestNode
*N
, Token::Index End
) -> unsigned {
129 auto R
= Index
.try_emplace(N
, Sequence
.size());
131 Sequence
.push_back({N
, End
});
132 return R
.first
->second
;
134 AssignID(&Root
, Stream
.tokens().size());
135 auto TokenID
= [](Token::Index I
) { return ("t" + llvm::Twine(I
)).str(); };
137 llvm::json::OStream
Out(this->Out
, 2);
139 for (unsigned I
= 0; I
< Sequence
.size(); ++I
) {
140 const ForestNode
*N
= Sequence
[I
].first
;
141 Token::Index End
= Sequence
[I
].second
;
143 Out
.attribute("symbol", G
.symbolName(N
->symbol()));
145 case ForestNode::Terminal
:
146 Out
.attribute("kind", "terminal");
147 Out
.attribute("token", TokenID(N
->startTokenIndex()));
149 case ForestNode::Sequence
:
150 Out
.attribute("kind", "sequence");
151 Out
.attribute("rule", G
.dumpRule(N
->rule()));
153 case ForestNode::Ambiguous
:
154 Out
.attribute("kind", "ambiguous");
155 Out
.attribute("selected",
156 AssignID(N
->children()[Disambig
.lookup(N
)], End
));
158 case ForestNode::Opaque
:
159 Out
.attribute("kind", "opaque");
160 Out
.attribute("firstToken", TokenID(N
->startTokenIndex()));
161 // [firstToken, lastToken] is a closed range.
162 // If empty, lastToken is omitted.
163 if (N
->startTokenIndex() != End
)
164 Out
.attribute("lastToken", TokenID(End
- 1));
167 auto Children
= N
->children();
168 if (!Children
.empty())
169 Out
.attributeArray("children", [&] {
170 for (unsigned I
= 0; I
< Children
.size(); ++I
)
171 Out
.value(AssignID(Children
[I
],
172 I
+ 1 == Children
.size()
174 : Children
[I
+ 1]->startTokenIndex()));
183 // We only accept the derived stream here.
184 // FIXME: allow the original stream instead?
185 void writeHTMLForest(llvm::raw_ostream
&OS
, const Grammar
&G
,
186 const ForestNode
&Root
, const Disambiguation
&Disambig
,
187 const TokenStream
&Stream
) {
188 Writer
{OS
, G
, Root
, Stream
, Disambig
}.write();
191 } // namespace pseudo