1 //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the HTMLRewriter class, which is used to translate the
10 // text of a source file into prettified HTML.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Rewrite/Core/HTMLRewrite.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Lex/Preprocessor.h"
17 #include "clang/Lex/TokenConcatenation.h"
18 #include "clang/Rewrite/Core/Rewriter.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/raw_ostream.h"
24 using namespace clang
;
27 /// HighlightRange - Highlight a range in the source code with the specified
28 /// start/end tags. B/E must be in the same file. This ensures that
29 /// start/end tags are placed at the start/end of each line if the range is
31 void html::HighlightRange(Rewriter
&R
, SourceLocation B
, SourceLocation E
,
32 const char *StartTag
, const char *EndTag
,
34 SourceManager
&SM
= R
.getSourceMgr();
35 B
= SM
.getExpansionLoc(B
);
36 E
= SM
.getExpansionLoc(E
);
37 FileID FID
= SM
.getFileID(B
);
38 assert(SM
.getFileID(E
) == FID
&& "B/E not in the same file!");
40 unsigned BOffset
= SM
.getFileOffset(B
);
41 unsigned EOffset
= SM
.getFileOffset(E
);
43 // Include the whole end token in the range.
45 EOffset
+= Lexer::MeasureTokenLength(E
, R
.getSourceMgr(), R
.getLangOpts());
48 const char *BufferStart
= SM
.getBufferData(FID
, &Invalid
).data();
52 HighlightRange(R
.getEditBuffer(FID
), BOffset
, EOffset
,
53 BufferStart
, StartTag
, EndTag
);
56 /// HighlightRange - This is the same as the above method, but takes
57 /// decomposed file locations.
58 void html::HighlightRange(RewriteBuffer
&RB
, unsigned B
, unsigned E
,
59 const char *BufferStart
,
60 const char *StartTag
, const char *EndTag
) {
61 // Insert the tag at the absolute start/end of the range.
62 RB
.InsertTextAfter(B
, StartTag
);
63 RB
.InsertTextBefore(E
, EndTag
);
65 // Scan the range to see if there is a \r or \n. If so, and if the line is
66 // not blank, insert tags on that line as well.
67 bool HadOpenTag
= true;
69 unsigned LastNonWhiteSpace
= B
;
70 for (unsigned i
= B
; i
!= E
; ++i
) {
71 switch (BufferStart
[i
]) {
74 // Okay, we found a newline in the range. If we have an open tag, we need
75 // to insert a close tag at the first non-whitespace before the newline.
77 RB
.InsertTextBefore(LastNonWhiteSpace
+1, EndTag
);
79 // Instead of inserting an open tag immediately after the newline, we
80 // wait until we see a non-whitespace character. This prevents us from
81 // inserting tags around blank lines, and also allows the open tag to
82 // be put *after* whitespace on a non-blank line.
94 // If there is no tag open, do it now.
96 RB
.InsertTextAfter(i
, StartTag
);
100 // Remember this character.
101 LastNonWhiteSpace
= i
;
107 void html::EscapeText(Rewriter
&R
, FileID FID
,
108 bool EscapeSpaces
, bool ReplaceTabs
) {
110 llvm::MemoryBufferRef Buf
= R
.getSourceMgr().getBufferOrFake(FID
);
111 const char* C
= Buf
.getBufferStart();
112 const char* FileEnd
= Buf
.getBufferEnd();
114 assert (C
<= FileEnd
);
116 RewriteBuffer
&RB
= R
.getEditBuffer(FID
);
119 for (unsigned FilePos
= 0; C
!= FileEnd
; ++C
, ++FilePos
) {
121 default: ++ColNo
; break;
129 RB
.ReplaceText(FilePos
, 1, " ");
133 RB
.ReplaceText(FilePos
, 1, "<hr>");
140 unsigned NumSpaces
= 8-(ColNo
&7);
142 RB
.ReplaceText(FilePos
, 1,
143 StringRef(" "
144 " ", 6*NumSpaces
));
146 RB
.ReplaceText(FilePos
, 1, StringRef(" ", NumSpaces
));
151 RB
.ReplaceText(FilePos
, 1, "<");
156 RB
.ReplaceText(FilePos
, 1, ">");
161 RB
.ReplaceText(FilePos
, 1, "&");
168 std::string
html::EscapeText(StringRef s
, bool EscapeSpaces
, bool ReplaceTabs
) {
170 unsigned len
= s
.size();
172 llvm::raw_string_ostream
os(Str
);
174 for (unsigned i
= 0 ; i
< len
; ++i
) {
182 if (EscapeSpaces
) os
<< " ";
189 for (unsigned i
= 0; i
< 4; ++i
)
192 for (unsigned i
= 0; i
< 4; ++i
)
200 case '<': os
<< "<"; break;
201 case '>': os
<< ">"; break;
202 case '&': os
<< "&"; break;
209 static void AddLineNumber(RewriteBuffer
&RB
, unsigned LineNo
,
210 unsigned B
, unsigned E
) {
211 SmallString
<256> Str
;
212 llvm::raw_svector_ostream
OS(Str
);
214 OS
<< "<tr class=\"codeline\" data-linenumber=\"" << LineNo
<< "\">"
215 << "<td class=\"num\" id=\"LN" << LineNo
<< "\">" << LineNo
216 << "</td><td class=\"line\">";
218 if (B
== E
) { // Handle empty lines.
220 RB
.InsertTextBefore(B
, OS
.str());
222 RB
.InsertTextBefore(B
, OS
.str());
223 RB
.InsertTextBefore(E
, "</td></tr>");
227 void html::AddLineNumbers(Rewriter
& R
, FileID FID
) {
229 llvm::MemoryBufferRef Buf
= R
.getSourceMgr().getBufferOrFake(FID
);
230 const char* FileBeg
= Buf
.getBufferStart();
231 const char* FileEnd
= Buf
.getBufferEnd();
232 const char* C
= FileBeg
;
233 RewriteBuffer
&RB
= R
.getEditBuffer(FID
);
235 assert (C
<= FileEnd
);
238 unsigned FilePos
= 0;
240 while (C
!= FileEnd
) {
243 unsigned LineStartPos
= FilePos
;
244 unsigned LineEndPos
= FileEnd
- FileBeg
;
246 assert (FilePos
<= LineEndPos
);
247 assert (C
< FileEnd
);
249 // Scan until the newline (or end-of-file).
251 while (C
!= FileEnd
) {
256 LineEndPos
= FilePos
++;
263 AddLineNumber(RB
, LineNo
, LineStartPos
, LineEndPos
);
266 // Add one big table tag that surrounds all of the code.
268 llvm::raw_string_ostream
os(s
);
269 os
<< "<table class=\"code\" data-fileid=\"" << FID
.getHashValue() << "\">\n";
270 RB
.InsertTextBefore(0, os
.str());
271 RB
.InsertTextAfter(FileEnd
- FileBeg
, "</table>");
274 void html::AddHeaderFooterInternalBuiltinCSS(Rewriter
&R
, FileID FID
,
277 llvm::MemoryBufferRef Buf
= R
.getSourceMgr().getBufferOrFake(FID
);
278 const char* FileStart
= Buf
.getBufferStart();
279 const char* FileEnd
= Buf
.getBufferEnd();
281 SourceLocation StartLoc
= R
.getSourceMgr().getLocForStartOfFile(FID
);
282 SourceLocation EndLoc
= StartLoc
.getLocWithOffset(FileEnd
-FileStart
);
285 llvm::raw_string_ostream
os(s
);
286 os
<< "<!doctype html>\n" // Use HTML 5 doctype
290 os
<< "<title>" << html::EscapeText(title
) << "</title>\n";
293 <style type="text
/css
">
294 body { color:#000000; background-color:#ffffff }
295 body { font-family:Helvetica, sans-serif; font-size:10pt }
296 h1 { font-size:14pt }
297 .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; }
298 .FileNav { margin-left: 5px; margin-right: 5px; display: inline; }
299 .FileNav a { text-decoration:none; font-size: larger; }
300 .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; }
301 .divider { background-color: gray; }
302 .code { border-collapse:collapse; width:100%; }
303 .code { font-family: "Monospace
", monospace; font-size:10pt }
304 .code { line-height: 1.2em }
305 .comment { color: green; font-style: oblique }
306 .keyword { color: blue }
307 .string_literal { color: red }
308 .directive { color: darkmagenta }
310 /* Macros and variables could have pop-up notes hidden by default.
311 - Macro pop-up: expansion of the macro
312 - Variable pop-up: value (table) of the variable */
313 .macro_popup, .variable_popup { display: none; }
315 /* Pop-up appears on mouse-hover event. */
316 .macro:hover .macro_popup, .variable:hover .variable_popup {
319 -webkit-border-radius:5px;
320 -webkit-box-shadow:1px 1px 7px #000;
322 box-shadow:1px 1px 7px #000;
330 border: 2px solid red;
331 background-color:#FFF0F0;
336 border: 2px solid blue;
337 background-color:#F0F0FF;
339 font-family: Helvetica, sans-serif;
343 /* Pop-up notes needs a relative position as a base where they pops up. */
345 background-color: PaleGoldenRod;
348 .macro { color: DarkMagenta; }
356 border: 1px solid #b0b0b0;
358 box-shadow: 1px 1px 7px black;
359 background-color: #c0c0c0;
363 .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }
364 .num { text-align:right; font-size:8pt }
365 .num { color:#444444 }
366 .line { padding-left: 1ex; border-left: 3px solid #ccc }
367 .line { white-space: pre }
368 .msg { -webkit-box-shadow:1px 1px 7px #000 }
369 .msg { box-shadow:1px 1px 7px #000 }
370 .msg { -webkit-border-radius:5px }
371 .msg { border-radius:5px }
372 .msg { font-family:Helvetica, sans-serif; font-size:8pt }
374 .msg { position:relative }
375 .msg { padding:0.25em 1ex 0.25em 1ex }
376 .msg { margin-top:10px; margin-bottom:10px }
377 .msg { font-weight:bold }
378 .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }
379 .msgT { padding:0x; spacing:0x }
380 .msgEvent { background-color:#fff8b4; color:#000000 }
381 .msgControl { background-color:#bbbbbb; color:#000000 }
382 .msgNote { background-color:#ddeeff; color:#000000 }
383 .mrange { background-color:#dfddf3 }
384 .mrange { border-bottom:1px solid #6F9DBE }
385 .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; }
386 .PathIndex { -webkit-border-radius:8px }
387 .PathIndex { border-radius:8px }
388 .PathIndexEvent { background-color:#bfba87 }
389 .PathIndexControl { background-color:#8c8c8c }
390 .PathIndexPopUp { background-color: #879abc; }
391 .PathNav a { text-decoration:none; font-size: larger }
392 .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
393 .CodeRemovalHint { background-color:#de1010 }
394 .CodeRemovalHint { border-bottom:1px solid #6F9DBE }
395 .msg.selected{ background-color:orange !important; }
401 border-collapse: collapse; border-spacing: 0px;
412 input.spoilerhider + label {
414 text-decoration: underline;
420 input.spoilerhider ~ .spoiler {
426 input.spoilerhider:checked + label + .spoiler{
435 R
.InsertTextBefore(StartLoc
, os
.str());
438 R
.InsertTextAfter(EndLoc
, "</body></html>\n");
441 /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
442 /// information about keywords, macro expansions etc. This uses the macro
443 /// table state from the end of the file, so it won't be perfectly perfect,
444 /// but it will be reasonably close.
445 void html::SyntaxHighlight(Rewriter
&R
, FileID FID
, const Preprocessor
&PP
) {
446 RewriteBuffer
&RB
= R
.getEditBuffer(FID
);
448 const SourceManager
&SM
= PP
.getSourceManager();
449 llvm::MemoryBufferRef FromFile
= SM
.getBufferOrFake(FID
);
450 Lexer
L(FID
, FromFile
, SM
, PP
.getLangOpts());
451 const char *BufferStart
= L
.getBuffer().data();
453 // Inform the preprocessor that we want to retain comments as tokens, so we
454 // can highlight them.
455 L
.SetCommentRetentionState(true);
457 // Lex all the tokens in raw mode, to avoid entering #includes or expanding
460 L
.LexFromRawLexer(Tok
);
462 while (Tok
.isNot(tok::eof
)) {
463 // Since we are lexing unexpanded tokens, all tokens are from the main
465 unsigned TokOffs
= SM
.getFileOffset(Tok
.getLocation());
466 unsigned TokLen
= Tok
.getLength();
467 switch (Tok
.getKind()) {
469 case tok::identifier
:
470 llvm_unreachable("tok::identifier in raw lexing mode!");
471 case tok::raw_identifier
: {
472 // Fill in Result.IdentifierInfo and update the token kind,
473 // looking up the identifier in the identifier table.
474 PP
.LookUpIdentifierInfo(Tok
);
476 // If this is a pp-identifier, for a keyword, highlight it as such.
477 if (Tok
.isNot(tok::identifier
))
478 HighlightRange(RB
, TokOffs
, TokOffs
+TokLen
, BufferStart
,
479 "<span class='keyword'>", "</span>");
483 HighlightRange(RB
, TokOffs
, TokOffs
+TokLen
, BufferStart
,
484 "<span class='comment'>", "</span>");
486 case tok::utf8_string_literal
:
487 // Chop off the u part of u8 prefix
490 // FALL THROUGH to chop the 8
492 case tok::wide_string_literal
:
493 case tok::utf16_string_literal
:
494 case tok::utf32_string_literal
:
495 // Chop off the L, u, U or 8 prefix
499 case tok::string_literal
:
500 // FIXME: Exclude the optional ud-suffix from the highlighted range.
501 HighlightRange(RB
, TokOffs
, TokOffs
+TokLen
, BufferStart
,
502 "<span class='string_literal'>", "</span>");
505 // If this is a preprocessor directive, all tokens to end of line are too.
506 if (!Tok
.isAtStartOfLine())
509 // Eat all of the tokens until we get to the next one at the start of
511 unsigned TokEnd
= TokOffs
+TokLen
;
512 L
.LexFromRawLexer(Tok
);
513 while (!Tok
.isAtStartOfLine() && Tok
.isNot(tok::eof
)) {
514 TokEnd
= SM
.getFileOffset(Tok
.getLocation())+Tok
.getLength();
515 L
.LexFromRawLexer(Tok
);
518 // Find end of line. This is a hack.
519 HighlightRange(RB
, TokOffs
, TokEnd
, BufferStart
,
520 "<span class='directive'>", "</span>");
522 // Don't skip the next token.
527 L
.LexFromRawLexer(Tok
);
531 /// HighlightMacros - This uses the macro table state from the end of the
532 /// file, to re-expand macros and insert (into the HTML) information about the
533 /// macro expansions. This won't be perfectly perfect, but it will be
534 /// reasonably close.
535 void html::HighlightMacros(Rewriter
&R
, FileID FID
, const Preprocessor
& PP
) {
536 // Re-lex the raw token stream into a token buffer.
537 const SourceManager
&SM
= PP
.getSourceManager();
538 std::vector
<Token
> TokenStream
;
540 llvm::MemoryBufferRef FromFile
= SM
.getBufferOrFake(FID
);
541 Lexer
L(FID
, FromFile
, SM
, PP
.getLangOpts());
543 // Lex all the tokens in raw mode, to avoid entering #includes or expanding
547 L
.LexFromRawLexer(Tok
);
549 // If this is a # at the start of a line, discard it from the token stream.
550 // We don't want the re-preprocess step to see #defines, #includes or other
551 // preprocessor directives.
552 if (Tok
.is(tok::hash
) && Tok
.isAtStartOfLine())
555 // If this is a ## token, change its kind to unknown so that repreprocessing
556 // it will not produce an error.
557 if (Tok
.is(tok::hashhash
))
558 Tok
.setKind(tok::unknown
);
560 // If this raw token is an identifier, the raw lexer won't have looked up
561 // the corresponding identifier info for it. Do this now so that it will be
562 // macro expanded when we re-preprocess it.
563 if (Tok
.is(tok::raw_identifier
))
564 PP
.LookUpIdentifierInfo(Tok
);
566 TokenStream
.push_back(Tok
);
568 if (Tok
.is(tok::eof
)) break;
571 // Temporarily change the diagnostics object so that we ignore any generated
572 // diagnostics from this pass.
573 DiagnosticsEngine
TmpDiags(PP
.getDiagnostics().getDiagnosticIDs(),
574 &PP
.getDiagnostics().getDiagnosticOptions(),
575 new IgnoringDiagConsumer
);
577 // FIXME: This is a huge hack; we reuse the input preprocessor because we want
578 // its state, but we aren't actually changing it (we hope). This should really
579 // construct a copy of the preprocessor.
580 Preprocessor
&TmpPP
= const_cast<Preprocessor
&>(PP
);
581 DiagnosticsEngine
*OldDiags
= &TmpPP
.getDiagnostics();
582 TmpPP
.setDiagnostics(TmpDiags
);
584 // Inform the preprocessor that we don't want comments.
585 TmpPP
.SetCommentRetentionState(false, false);
587 // We don't want pragmas either. Although we filtered out #pragma, removing
588 // _Pragma and __pragma is much harder.
589 bool PragmasPreviouslyEnabled
= TmpPP
.getPragmasEnabled();
590 TmpPP
.setPragmasEnabled(false);
592 // Enter the tokens we just lexed. This will cause them to be macro expanded
593 // but won't enter sub-files (because we removed #'s).
594 TmpPP
.EnterTokenStream(TokenStream
, false, /*IsReinject=*/false);
596 TokenConcatenation
ConcatInfo(TmpPP
);
598 // Lex all the tokens.
601 while (Tok
.isNot(tok::eof
)) {
602 // Ignore non-macro tokens.
603 if (!Tok
.getLocation().isMacroID()) {
608 // Okay, we have the first token of a macro expansion: highlight the
609 // expansion by inserting a start tag before the macro expansion and
611 CharSourceRange LLoc
= SM
.getExpansionRange(Tok
.getLocation());
613 // Ignore tokens whose instantiation location was not the main file.
614 if (SM
.getFileID(LLoc
.getBegin()) != FID
) {
619 assert(SM
.getFileID(LLoc
.getEnd()) == FID
&&
620 "Start and end of expansion must be in the same ultimate file!");
622 std::string Expansion
= EscapeText(TmpPP
.getSpelling(Tok
));
623 unsigned LineLen
= Expansion
.size();
627 // Okay, eat this token, getting the next one.
630 // Skip all the rest of the tokens that are part of this macro
631 // instantiation. It would be really nice to pop up a window with all the
632 // spelling of the tokens or something.
633 while (!Tok
.is(tok::eof
) &&
634 SM
.getExpansionLoc(Tok
.getLocation()) == LLoc
.getBegin()) {
635 // Insert a newline if the macro expansion is getting large.
641 LineLen
-= Expansion
.size();
643 // If the tokens were already space separated, or if they must be to avoid
644 // them being implicitly pasted, add a space between them.
645 if (Tok
.hasLeadingSpace() ||
646 ConcatInfo
.AvoidConcat(PrevPrevTok
, PrevTok
, Tok
))
649 // Escape any special characters in the token text.
650 Expansion
+= EscapeText(TmpPP
.getSpelling(Tok
));
651 LineLen
+= Expansion
.size();
653 PrevPrevTok
= PrevTok
;
658 // Insert the 'macro_popup' as the end tag, so that multi-line macros all
660 Expansion
= "<span class='macro_popup'>" + Expansion
+ "</span></span>";
662 HighlightRange(R
, LLoc
.getBegin(), LLoc
.getEnd(), "<span class='macro'>",
663 Expansion
.c_str(), LLoc
.isTokenRange());
666 // Restore the preprocessor's old state.
667 TmpPP
.setDiagnostics(*OldDiags
);
668 TmpPP
.setPragmasEnabled(PragmasPreviouslyEnabled
);