[docs] Fix build-docs.sh
[llvm-project.git] / clang / lib / Rewrite / HTMLRewrite.cpp
blob083a9c09297e1e6a9af482d201cb5f95621c9e4f
1 //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the HTMLRewriter class, which is used to translate the
10 // text of a source file into prettified HTML.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Rewrite/Core/HTMLRewrite.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "clang/Lex/TokenConcatenation.h"
18 #include "clang/Rewrite/Core/Rewriter.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include <memory>
24 using namespace clang;
27 /// HighlightRange - Highlight a range in the source code with the specified
28 /// start/end tags. B/E must be in the same file. This ensures that
29 /// start/end tags are placed at the start/end of each line if the range is
30 /// multiline.
31 void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
32 const char *StartTag, const char *EndTag,
33 bool IsTokenRange) {
34 SourceManager &SM = R.getSourceMgr();
35 B = SM.getExpansionLoc(B);
36 E = SM.getExpansionLoc(E);
37 FileID FID = SM.getFileID(B);
38 assert(SM.getFileID(E) == FID && "B/E not in the same file!");
40 unsigned BOffset = SM.getFileOffset(B);
41 unsigned EOffset = SM.getFileOffset(E);
43 // Include the whole end token in the range.
44 if (IsTokenRange)
45 EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
47 bool Invalid = false;
48 const char *BufferStart = SM.getBufferData(FID, &Invalid).data();
49 if (Invalid)
50 return;
52 HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
53 BufferStart, StartTag, EndTag);
56 /// HighlightRange - This is the same as the above method, but takes
57 /// decomposed file locations.
58 void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
59 const char *BufferStart,
60 const char *StartTag, const char *EndTag) {
61 // Insert the tag at the absolute start/end of the range.
62 RB.InsertTextAfter(B, StartTag);
63 RB.InsertTextBefore(E, EndTag);
65 // Scan the range to see if there is a \r or \n. If so, and if the line is
66 // not blank, insert tags on that line as well.
67 bool HadOpenTag = true;
69 unsigned LastNonWhiteSpace = B;
70 for (unsigned i = B; i != E; ++i) {
71 switch (BufferStart[i]) {
72 case '\r':
73 case '\n':
74 // Okay, we found a newline in the range. If we have an open tag, we need
75 // to insert a close tag at the first non-whitespace before the newline.
76 if (HadOpenTag)
77 RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
79 // Instead of inserting an open tag immediately after the newline, we
80 // wait until we see a non-whitespace character. This prevents us from
81 // inserting tags around blank lines, and also allows the open tag to
82 // be put *after* whitespace on a non-blank line.
83 HadOpenTag = false;
84 break;
85 case '\0':
86 case ' ':
87 case '\t':
88 case '\f':
89 case '\v':
90 // Ignore whitespace.
91 break;
93 default:
94 // If there is no tag open, do it now.
95 if (!HadOpenTag) {
96 RB.InsertTextAfter(i, StartTag);
97 HadOpenTag = true;
100 // Remember this character.
101 LastNonWhiteSpace = i;
102 break;
107 void html::EscapeText(Rewriter &R, FileID FID,
108 bool EscapeSpaces, bool ReplaceTabs) {
110 llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID);
111 const char* C = Buf.getBufferStart();
112 const char* FileEnd = Buf.getBufferEnd();
114 assert (C <= FileEnd);
116 RewriteBuffer &RB = R.getEditBuffer(FID);
118 unsigned ColNo = 0;
119 for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
120 switch (*C) {
121 default: ++ColNo; break;
122 case '\n':
123 case '\r':
124 ColNo = 0;
125 break;
127 case ' ':
128 if (EscapeSpaces)
129 RB.ReplaceText(FilePos, 1, "&nbsp;");
130 ++ColNo;
131 break;
132 case '\f':
133 RB.ReplaceText(FilePos, 1, "<hr>");
134 ColNo = 0;
135 break;
137 case '\t': {
138 if (!ReplaceTabs)
139 break;
140 unsigned NumSpaces = 8-(ColNo&7);
141 if (EscapeSpaces)
142 RB.ReplaceText(FilePos, 1,
143 StringRef("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
144 "&nbsp;&nbsp;&nbsp;", 6*NumSpaces));
145 else
146 RB.ReplaceText(FilePos, 1, StringRef(" ", NumSpaces));
147 ColNo += NumSpaces;
148 break;
150 case '<':
151 RB.ReplaceText(FilePos, 1, "&lt;");
152 ++ColNo;
153 break;
155 case '>':
156 RB.ReplaceText(FilePos, 1, "&gt;");
157 ++ColNo;
158 break;
160 case '&':
161 RB.ReplaceText(FilePos, 1, "&amp;");
162 ++ColNo;
163 break;
168 std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) {
170 unsigned len = s.size();
171 std::string Str;
172 llvm::raw_string_ostream os(Str);
174 for (unsigned i = 0 ; i < len; ++i) {
176 char c = s[i];
177 switch (c) {
178 default:
179 os << c; break;
181 case ' ':
182 if (EscapeSpaces) os << "&nbsp;";
183 else os << ' ';
184 break;
186 case '\t':
187 if (ReplaceTabs) {
188 if (EscapeSpaces)
189 for (unsigned i = 0; i < 4; ++i)
190 os << "&nbsp;";
191 else
192 for (unsigned i = 0; i < 4; ++i)
193 os << " ";
195 else
196 os << c;
198 break;
200 case '<': os << "&lt;"; break;
201 case '>': os << "&gt;"; break;
202 case '&': os << "&amp;"; break;
206 return Str;
209 static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
210 unsigned B, unsigned E) {
211 SmallString<256> Str;
212 llvm::raw_svector_ostream OS(Str);
214 OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">"
215 << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo
216 << "</td><td class=\"line\">";
218 if (B == E) { // Handle empty lines.
219 OS << " </td></tr>";
220 RB.InsertTextBefore(B, OS.str());
221 } else {
222 RB.InsertTextBefore(B, OS.str());
223 RB.InsertTextBefore(E, "</td></tr>");
227 void html::AddLineNumbers(Rewriter& R, FileID FID) {
229 llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID);
230 const char* FileBeg = Buf.getBufferStart();
231 const char* FileEnd = Buf.getBufferEnd();
232 const char* C = FileBeg;
233 RewriteBuffer &RB = R.getEditBuffer(FID);
235 assert (C <= FileEnd);
237 unsigned LineNo = 0;
238 unsigned FilePos = 0;
240 while (C != FileEnd) {
242 ++LineNo;
243 unsigned LineStartPos = FilePos;
244 unsigned LineEndPos = FileEnd - FileBeg;
246 assert (FilePos <= LineEndPos);
247 assert (C < FileEnd);
249 // Scan until the newline (or end-of-file).
251 while (C != FileEnd) {
252 char c = *C;
253 ++C;
255 if (c == '\n') {
256 LineEndPos = FilePos++;
257 break;
260 ++FilePos;
263 AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
266 // Add one big table tag that surrounds all of the code.
267 std::string s;
268 llvm::raw_string_ostream os(s);
269 os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n";
270 RB.InsertTextBefore(0, os.str());
271 RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
274 void html::AddHeaderFooterInternalBuiltinCSS(Rewriter &R, FileID FID,
275 StringRef title) {
277 llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID);
278 const char* FileStart = Buf.getBufferStart();
279 const char* FileEnd = Buf.getBufferEnd();
281 SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
282 SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart);
284 std::string s;
285 llvm::raw_string_ostream os(s);
286 os << "<!doctype html>\n" // Use HTML 5 doctype
287 "<html>\n<head>\n";
289 if (!title.empty())
290 os << "<title>" << html::EscapeText(title) << "</title>\n";
292 os << R"<<<(
293 <style type="text/css">
294 body { color:#000000; background-color:#ffffff }
295 body { font-family:Helvetica, sans-serif; font-size:10pt }
296 h1 { font-size:14pt }
297 .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; }
298 .FileNav { margin-left: 5px; margin-right: 5px; display: inline; }
299 .FileNav a { text-decoration:none; font-size: larger; }
300 .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; }
301 .divider { background-color: gray; }
302 .code { border-collapse:collapse; width:100%; }
303 .code { font-family: "Monospace", monospace; font-size:10pt }
304 .code { line-height: 1.2em }
305 .comment { color: green; font-style: oblique }
306 .keyword { color: blue }
307 .string_literal { color: red }
308 .directive { color: darkmagenta }
310 /* Macros and variables could have pop-up notes hidden by default.
311 - Macro pop-up: expansion of the macro
312 - Variable pop-up: value (table) of the variable */
313 .macro_popup, .variable_popup { display: none; }
315 /* Pop-up appears on mouse-hover event. */
316 .macro:hover .macro_popup, .variable:hover .variable_popup {
317 display: block;
318 padding: 2px;
319 -webkit-border-radius:5px;
320 -webkit-box-shadow:1px 1px 7px #000;
321 border-radius:5px;
322 box-shadow:1px 1px 7px #000;
323 position: absolute;
324 top: -1em;
325 left:10em;
326 z-index: 1
329 .macro_popup {
330 border: 2px solid red;
331 background-color:#FFF0F0;
332 font-weight: normal;
335 .variable_popup {
336 border: 2px solid blue;
337 background-color:#F0F0FF;
338 font-weight: bold;
339 font-family: Helvetica, sans-serif;
340 font-size: 9pt;
343 /* Pop-up notes needs a relative position as a base where they pops up. */
344 .macro, .variable {
345 background-color: PaleGoldenRod;
346 position: relative;
348 .macro { color: DarkMagenta; }
350 #tooltiphint {
351 position: fixed;
352 width: 50em;
353 margin-left: -25em;
354 left: 50%;
355 padding: 10px;
356 border: 1px solid #b0b0b0;
357 border-radius: 2px;
358 box-shadow: 1px 1px 7px black;
359 background-color: #c0c0c0;
360 z-index: 2;
363 .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }
364 .num { text-align:right; font-size:8pt }
365 .num { color:#444444 }
366 .line { padding-left: 1ex; border-left: 3px solid #ccc }
367 .line { white-space: pre }
368 .msg { -webkit-box-shadow:1px 1px 7px #000 }
369 .msg { box-shadow:1px 1px 7px #000 }
370 .msg { -webkit-border-radius:5px }
371 .msg { border-radius:5px }
372 .msg { font-family:Helvetica, sans-serif; font-size:8pt }
373 .msg { float:left }
374 .msg { position:relative }
375 .msg { padding:0.25em 1ex 0.25em 1ex }
376 .msg { margin-top:10px; margin-bottom:10px }
377 .msg { font-weight:bold }
378 .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }
379 .msgT { padding:0x; spacing:0x }
380 .msgEvent { background-color:#fff8b4; color:#000000 }
381 .msgControl { background-color:#bbbbbb; color:#000000 }
382 .msgNote { background-color:#ddeeff; color:#000000 }
383 .mrange { background-color:#dfddf3 }
384 .mrange { border-bottom:1px solid #6F9DBE }
385 .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; }
386 .PathIndex { -webkit-border-radius:8px }
387 .PathIndex { border-radius:8px }
388 .PathIndexEvent { background-color:#bfba87 }
389 .PathIndexControl { background-color:#8c8c8c }
390 .PathIndexPopUp { background-color: #879abc; }
391 .PathNav a { text-decoration:none; font-size: larger }
392 .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
393 .CodeRemovalHint { background-color:#de1010 }
394 .CodeRemovalHint { border-bottom:1px solid #6F9DBE }
395 .msg.selected{ background-color:orange !important; }
397 table.simpletable {
398 padding: 5px;
399 font-size:12pt;
400 margin:20px;
401 border-collapse: collapse; border-spacing: 0px;
403 td.rowname {
404 text-align: right;
405 vertical-align: top;
406 font-weight: bold;
407 color:#444444;
408 padding-right:2ex;
411 /* Hidden text. */
412 input.spoilerhider + label {
413 cursor: pointer;
414 text-decoration: underline;
415 display: block;
417 input.spoilerhider {
418 display: none;
420 input.spoilerhider ~ .spoiler {
421 overflow: hidden;
422 margin: 10px auto 0;
423 height: 0;
424 opacity: 0;
426 input.spoilerhider:checked + label + .spoiler{
427 height: auto;
428 opacity: 1;
430 </style>
431 </head>
432 <body>)<<<";
434 // Generate header
435 R.InsertTextBefore(StartLoc, os.str());
436 // Generate footer
438 R.InsertTextAfter(EndLoc, "</body></html>\n");
441 /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
442 /// information about keywords, macro expansions etc. This uses the macro
443 /// table state from the end of the file, so it won't be perfectly perfect,
444 /// but it will be reasonably close.
445 void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
446 RewriteBuffer &RB = R.getEditBuffer(FID);
448 const SourceManager &SM = PP.getSourceManager();
449 llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
450 Lexer L(FID, FromFile, SM, PP.getLangOpts());
451 const char *BufferStart = L.getBuffer().data();
453 // Inform the preprocessor that we want to retain comments as tokens, so we
454 // can highlight them.
455 L.SetCommentRetentionState(true);
457 // Lex all the tokens in raw mode, to avoid entering #includes or expanding
458 // macros.
459 Token Tok;
460 L.LexFromRawLexer(Tok);
462 while (Tok.isNot(tok::eof)) {
463 // Since we are lexing unexpanded tokens, all tokens are from the main
464 // FileID.
465 unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
466 unsigned TokLen = Tok.getLength();
467 switch (Tok.getKind()) {
468 default: break;
469 case tok::identifier:
470 llvm_unreachable("tok::identifier in raw lexing mode!");
471 case tok::raw_identifier: {
472 // Fill in Result.IdentifierInfo and update the token kind,
473 // looking up the identifier in the identifier table.
474 PP.LookUpIdentifierInfo(Tok);
476 // If this is a pp-identifier, for a keyword, highlight it as such.
477 if (Tok.isNot(tok::identifier))
478 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
479 "<span class='keyword'>", "</span>");
480 break;
482 case tok::comment:
483 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
484 "<span class='comment'>", "</span>");
485 break;
486 case tok::utf8_string_literal:
487 // Chop off the u part of u8 prefix
488 ++TokOffs;
489 --TokLen;
490 // FALL THROUGH to chop the 8
491 [[fallthrough]];
492 case tok::wide_string_literal:
493 case tok::utf16_string_literal:
494 case tok::utf32_string_literal:
495 // Chop off the L, u, U or 8 prefix
496 ++TokOffs;
497 --TokLen;
498 [[fallthrough]];
499 case tok::string_literal:
500 // FIXME: Exclude the optional ud-suffix from the highlighted range.
501 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
502 "<span class='string_literal'>", "</span>");
503 break;
504 case tok::hash: {
505 // If this is a preprocessor directive, all tokens to end of line are too.
506 if (!Tok.isAtStartOfLine())
507 break;
509 // Eat all of the tokens until we get to the next one at the start of
510 // line.
511 unsigned TokEnd = TokOffs+TokLen;
512 L.LexFromRawLexer(Tok);
513 while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
514 TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
515 L.LexFromRawLexer(Tok);
518 // Find end of line. This is a hack.
519 HighlightRange(RB, TokOffs, TokEnd, BufferStart,
520 "<span class='directive'>", "</span>");
522 // Don't skip the next token.
523 continue;
527 L.LexFromRawLexer(Tok);
531 /// HighlightMacros - This uses the macro table state from the end of the
532 /// file, to re-expand macros and insert (into the HTML) information about the
533 /// macro expansions. This won't be perfectly perfect, but it will be
534 /// reasonably close.
535 void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
536 // Re-lex the raw token stream into a token buffer.
537 const SourceManager &SM = PP.getSourceManager();
538 std::vector<Token> TokenStream;
540 llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
541 Lexer L(FID, FromFile, SM, PP.getLangOpts());
543 // Lex all the tokens in raw mode, to avoid entering #includes or expanding
544 // macros.
545 while (true) {
546 Token Tok;
547 L.LexFromRawLexer(Tok);
549 // If this is a # at the start of a line, discard it from the token stream.
550 // We don't want the re-preprocess step to see #defines, #includes or other
551 // preprocessor directives.
552 if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
553 continue;
555 // If this is a ## token, change its kind to unknown so that repreprocessing
556 // it will not produce an error.
557 if (Tok.is(tok::hashhash))
558 Tok.setKind(tok::unknown);
560 // If this raw token is an identifier, the raw lexer won't have looked up
561 // the corresponding identifier info for it. Do this now so that it will be
562 // macro expanded when we re-preprocess it.
563 if (Tok.is(tok::raw_identifier))
564 PP.LookUpIdentifierInfo(Tok);
566 TokenStream.push_back(Tok);
568 if (Tok.is(tok::eof)) break;
571 // Temporarily change the diagnostics object so that we ignore any generated
572 // diagnostics from this pass.
573 DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(),
574 &PP.getDiagnostics().getDiagnosticOptions(),
575 new IgnoringDiagConsumer);
577 // FIXME: This is a huge hack; we reuse the input preprocessor because we want
578 // its state, but we aren't actually changing it (we hope). This should really
579 // construct a copy of the preprocessor.
580 Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
581 DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics();
582 TmpPP.setDiagnostics(TmpDiags);
584 // Inform the preprocessor that we don't want comments.
585 TmpPP.SetCommentRetentionState(false, false);
587 // We don't want pragmas either. Although we filtered out #pragma, removing
588 // _Pragma and __pragma is much harder.
589 bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled();
590 TmpPP.setPragmasEnabled(false);
592 // Enter the tokens we just lexed. This will cause them to be macro expanded
593 // but won't enter sub-files (because we removed #'s).
594 TmpPP.EnterTokenStream(TokenStream, false, /*IsReinject=*/false);
596 TokenConcatenation ConcatInfo(TmpPP);
598 // Lex all the tokens.
599 Token Tok;
600 TmpPP.Lex(Tok);
601 while (Tok.isNot(tok::eof)) {
602 // Ignore non-macro tokens.
603 if (!Tok.getLocation().isMacroID()) {
604 TmpPP.Lex(Tok);
605 continue;
608 // Okay, we have the first token of a macro expansion: highlight the
609 // expansion by inserting a start tag before the macro expansion and
610 // end tag after it.
611 CharSourceRange LLoc = SM.getExpansionRange(Tok.getLocation());
613 // Ignore tokens whose instantiation location was not the main file.
614 if (SM.getFileID(LLoc.getBegin()) != FID) {
615 TmpPP.Lex(Tok);
616 continue;
619 assert(SM.getFileID(LLoc.getEnd()) == FID &&
620 "Start and end of expansion must be in the same ultimate file!");
622 std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
623 unsigned LineLen = Expansion.size();
625 Token PrevPrevTok;
626 Token PrevTok = Tok;
627 // Okay, eat this token, getting the next one.
628 TmpPP.Lex(Tok);
630 // Skip all the rest of the tokens that are part of this macro
631 // instantiation. It would be really nice to pop up a window with all the
632 // spelling of the tokens or something.
633 while (!Tok.is(tok::eof) &&
634 SM.getExpansionLoc(Tok.getLocation()) == LLoc.getBegin()) {
635 // Insert a newline if the macro expansion is getting large.
636 if (LineLen > 60) {
637 Expansion += "<br>";
638 LineLen = 0;
641 LineLen -= Expansion.size();
643 // If the tokens were already space separated, or if they must be to avoid
644 // them being implicitly pasted, add a space between them.
645 if (Tok.hasLeadingSpace() ||
646 ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok))
647 Expansion += ' ';
649 // Escape any special characters in the token text.
650 Expansion += EscapeText(TmpPP.getSpelling(Tok));
651 LineLen += Expansion.size();
653 PrevPrevTok = PrevTok;
654 PrevTok = Tok;
655 TmpPP.Lex(Tok);
658 // Insert the 'macro_popup' as the end tag, so that multi-line macros all
659 // get highlighted.
660 Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>";
662 HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "<span class='macro'>",
663 Expansion.c_str(), LLoc.isTokenRange());
666 // Restore the preprocessor's old state.
667 TmpPP.setDiagnostics(*OldDiags);
668 TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);