clang/lib/Rewrite/HTMLRewrite.cpp

   1 //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 //  This file defines the HTMLRewriter class, which is used to translate the
  10 //  text of a source file into prettified HTML.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "clang/Rewrite/Core/HTMLRewrite.h"
  15 #include "clang/Basic/SourceManager.h"
  16 #include "clang/Lex/Preprocessor.h"
  17 #include "clang/Lex/TokenConcatenation.h"
  18 #include "clang/Rewrite/Core/Rewriter.h"
  19 #include "llvm/ADT/RewriteBuffer.h"
  20 #include "llvm/ADT/SmallString.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include "llvm/Support/MemoryBuffer.h"
  23 #include "llvm/Support/raw_ostream.h"
  24 #include <memory>
  25
  26 using namespace clang;
  27 using namespace llvm;
  28 using namespace html;
  29
  30 /// HighlightRange - Highlight a range in the source code with the specified
  31 /// start/end tags.  B/E must be in the same file.  This ensures that
  32 /// start/end tags are placed at the start/end of each line if the range is
  33 /// multiline.
  34 void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
  35                           const char *StartTag, const char *EndTag,
  36                           bool IsTokenRange) {
  37   SourceManager &SM = R.getSourceMgr();
  38   B = SM.getExpansionLoc(B);
  39   E = SM.getExpansionLoc(E);
  40   FileID FID = SM.getFileID(B);
  41   assert(SM.getFileID(E) == FID && "B/E not in the same file!");
  42
  43   unsigned BOffset = SM.getFileOffset(B);
  44   unsigned EOffset = SM.getFileOffset(E);
  45
  46   // Include the whole end token in the range.
  47   if (IsTokenRange)
  48     EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
  49
  50   bool Invalid = false;
  51   const char *BufferStart = SM.getBufferData(FID, &Invalid).data();
  52   if (Invalid)
  53     return;
  54
  55   HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
  56                  BufferStart, StartTag, EndTag);
  57 }
  58
  59 /// HighlightRange - This is the same as the above method, but takes
  60 /// decomposed file locations.
  61 void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
  62                           const char *BufferStart,
  63                           const char *StartTag, const char *EndTag) {
  64   // Insert the tag at the absolute start/end of the range.
  65   RB.InsertTextAfter(B, StartTag);
  66   RB.InsertTextBefore(E, EndTag);
  67
  68   // Scan the range to see if there is a \r or \n.  If so, and if the line is
  69   // not blank, insert tags on that line as well.
  70   bool HadOpenTag = true;
  71
  72   unsigned LastNonWhiteSpace = B;
  73   for (unsigned i = B; i != E; ++i) {
  74     switch (BufferStart[i]) {
  75     case '\r':
  76     case '\n':
  77       // Okay, we found a newline in the range.  If we have an open tag, we need
  78       // to insert a close tag at the first non-whitespace before the newline.
  79       if (HadOpenTag)
  80         RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
  81
  82       // Instead of inserting an open tag immediately after the newline, we
  83       // wait until we see a non-whitespace character.  This prevents us from
  84       // inserting tags around blank lines, and also allows the open tag to
  85       // be put *after* whitespace on a non-blank line.
  86       HadOpenTag = false;
  87       break;
  88     case '\0':
  89     case ' ':
  90     case '\t':
  91     case '\f':
  92     case '\v':
  93       // Ignore whitespace.
  94       break;
  95
  96     default:
  97       // If there is no tag open, do it now.
  98       if (!HadOpenTag) {
  99         RB.InsertTextAfter(i, StartTag);
 100         HadOpenTag = true;
 101       }
 102
 103       // Remember this character.
 104       LastNonWhiteSpace = i;
 105       break;
 106     }
 107   }
 108 }
 109
 110 namespace clang::html {
 111 struct RelexRewriteCache {
 112   // These structs mimic input arguments of HighlightRange().
 113   struct Highlight {
 114     SourceLocation B, E;
 115     std::string StartTag, EndTag;
 116     bool IsTokenRange;
 117   };
 118   struct RawHighlight {
 119     unsigned B, E;
 120     std::string StartTag, EndTag;
 121   };
 122
 123   // SmallVector isn't appropriate because these vectors are almost never small.
 124   using HighlightList = std::vector<Highlight>;
 125   using RawHighlightList = std::vector<RawHighlight>;
 126
 127   DenseMap<FileID, RawHighlightList> SyntaxHighlights;
 128   DenseMap<FileID, HighlightList> MacroHighlights;
 129 };
 130 } // namespace clang::html
 131
 132 html::RelexRewriteCacheRef html::instantiateRelexRewriteCache() {
 133   return std::make_shared<RelexRewriteCache>();
 134 }
 135
 136 void html::EscapeText(Rewriter &R, FileID FID,
 137                       bool EscapeSpaces, bool ReplaceTabs) {
 138
 139   llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID);
 140   const char* C = Buf.getBufferStart();
 141   const char* FileEnd = Buf.getBufferEnd();
 142
 143   assert (C <= FileEnd);
 144
 145   RewriteBuffer &RB = R.getEditBuffer(FID);
 146
 147   unsigned ColNo = 0;
 148   for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
 149     switch (*C) {
 150     default: ++ColNo; break;
 151     case '\n':
 152     case '\r':
 153       ColNo = 0;
 154       break;
 155
 156     case ' ':
 157       if (EscapeSpaces)
 158         RB.ReplaceText(FilePos, 1, "&nbsp;");
 159       ++ColNo;
 160       break;
 161     case '\f':
 162       RB.ReplaceText(FilePos, 1, "<hr>");
 163       ColNo = 0;
 164       break;
 165
 166     case '\t': {
 167       if (!ReplaceTabs)
 168         break;
 169       unsigned NumSpaces = 8-(ColNo&7);
 170       if (EscapeSpaces)
 171         RB.ReplaceText(FilePos, 1,
 172                        StringRef("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
 173                                        "&nbsp;&nbsp;&nbsp;", 6*NumSpaces));
 174       else
 175         RB.ReplaceText(FilePos, 1, StringRef("        ", NumSpaces));
 176       ColNo += NumSpaces;
 177       break;
 178     }
 179     case '<':
 180       RB.ReplaceText(FilePos, 1, "&lt;");
 181       ++ColNo;
 182       break;
 183
 184     case '>':
 185       RB.ReplaceText(FilePos, 1, "&gt;");
 186       ++ColNo;
 187       break;
 188
 189     case '&':
 190       RB.ReplaceText(FilePos, 1, "&amp;");
 191       ++ColNo;
 192       break;
 193     }
 194   }
 195 }
 196
 197 std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) {
 198
 199   unsigned len = s.size();
 200   std::string Str;
 201   llvm::raw_string_ostream os(Str);
 202
 203   for (unsigned i = 0 ; i < len; ++i) {
 204
 205     char c = s[i];
 206     switch (c) {
 207     default:
 208       os << c; break;
 209
 210     case ' ':
 211       if (EscapeSpaces) os << "&nbsp;";
 212       else os << ' ';
 213       break;
 214
 215     case '\t':
 216       if (ReplaceTabs) {
 217         if (EscapeSpaces)
 218           for (unsigned i = 0; i < 4; ++i)
 219             os << "&nbsp;";
 220         else
 221           for (unsigned i = 0; i < 4; ++i)
 222             os << " ";
 223       }
 224       else
 225         os << c;
 226
 227       break;
 228
 229     case '<': os << "&lt;"; break;
 230     case '>': os << "&gt;"; break;
 231     case '&': os << "&amp;"; break;
 232     }
 233   }
 234
 235   return Str;
 236 }
 237
 238 static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
 239                           unsigned B, unsigned E) {
 240   SmallString<256> Str;
 241   llvm::raw_svector_ostream OS(Str);
 242
 243   OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">"
 244      << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo
 245      << "</td><td class=\"line\">";
 246
 247   if (B == E) { // Handle empty lines.
 248     OS << " </td></tr>";
 249     RB.InsertTextBefore(B, OS.str());
 250   } else {
 251     RB.InsertTextBefore(B, OS.str());
 252     RB.InsertTextBefore(E, "</td></tr>");
 253   }
 254 }
 255
 256 void html::AddLineNumbers(Rewriter& R, FileID FID) {
 257
 258   llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID);
 259   const char* FileBeg = Buf.getBufferStart();
 260   const char* FileEnd = Buf.getBufferEnd();
 261   const char* C = FileBeg;
 262   RewriteBuffer &RB = R.getEditBuffer(FID);
 263
 264   assert (C <= FileEnd);
 265
 266   unsigned LineNo = 0;
 267   unsigned FilePos = 0;
 268
 269   while (C != FileEnd) {
 270
 271     ++LineNo;
 272     unsigned LineStartPos = FilePos;
 273     unsigned LineEndPos = FileEnd - FileBeg;
 274
 275     assert (FilePos <= LineEndPos);
 276     assert (C < FileEnd);
 277
 278     // Scan until the newline (or end-of-file).
 279
 280     while (C != FileEnd) {
 281       char c = *C;
 282       ++C;
 283
 284       if (c == '\n') {
 285         LineEndPos = FilePos++;
 286         break;
 287       }
 288
 289       ++FilePos;
 290     }
 291
 292     AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
 293   }
 294
 295   // Add one big table tag that surrounds all of the code.
 296   std::string s;
 297   llvm::raw_string_ostream os(s);
 298   os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n";
 299   RB.InsertTextBefore(0, os.str());
 300   RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
 301 }
 302
 303 void html::AddHeaderFooterInternalBuiltinCSS(Rewriter &R, FileID FID,
 304                                              StringRef title) {
 305
 306   llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID);
 307   const char* FileStart = Buf.getBufferStart();
 308   const char* FileEnd = Buf.getBufferEnd();
 309
 310   SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
 311   SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart);
 312
 313   std::string s;
 314   llvm::raw_string_ostream os(s);
 315   os << "<!doctype html>\n" // Use HTML 5 doctype
 316         "<html>\n<head>\n";
 317
 318   if (!title.empty())
 319     os << "<title>" << html::EscapeText(title) << "</title>\n";
 320
 321   os << R"<<<(
 322 <style type="text/css">
 323 body { color:#000000; background-color:#ffffff }
 324 body { font-family:Helvetica, sans-serif; font-size:10pt }
 325 h1 { font-size:14pt }
 326 .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; }
 327 .FileNav { margin-left: 5px; margin-right: 5px; display: inline; }
 328 .FileNav a { text-decoration:none; font-size: larger; }
 329 .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; }
 330 .divider { background-color: gray; }
 331 .code { border-collapse:collapse; width:100%; }
 332 .code { font-family: "Monospace", monospace; font-size:10pt }
 333 .code { line-height: 1.2em }
 334 .comment { color: green; font-style: oblique }
 335 .keyword { color: blue }
 336 .string_literal { color: red }
 337 .directive { color: darkmagenta }
 338
 339 /* Macros and variables could have pop-up notes hidden by default.
 340   - Macro pop-up:    expansion of the macro
 341   - Variable pop-up: value (table) of the variable */
 342 .macro_popup, .variable_popup { display: none; }
 343
 344 /* Pop-up appears on mouse-hover event. */
 345 .macro:hover .macro_popup, .variable:hover .variable_popup {
 346   display: block;
 347   padding: 2px;
 348   -webkit-border-radius:5px;
 349   -webkit-box-shadow:1px 1px 7px #000;
 350   border-radius:5px;
 351   box-shadow:1px 1px 7px #000;
 352   position: absolute;
 353   top: -1em;
 354   left:10em;
 355   z-index: 1
 356 }
 357
 358 .macro_popup {
 359   border: 2px solid red;
 360   background-color:#FFF0F0;
 361   font-weight: normal;
 362 }
 363
 364 .variable_popup {
 365   border: 2px solid blue;
 366   background-color:#F0F0FF;
 367   font-weight: bold;
 368   font-family: Helvetica, sans-serif;
 369   font-size: 9pt;
 370 }
 371
 372 /* Pop-up notes needs a relative position as a base where they pops up. */
 373 .macro, .variable {
 374   background-color: PaleGoldenRod;
 375   position: relative;
 376 }
 377 .macro { color: DarkMagenta; }
 378
 379 #tooltiphint {
 380   position: fixed;
 381   width: 50em;
 382   margin-left: -25em;
 383   left: 50%;
 384   padding: 10px;
 385   border: 1px solid #b0b0b0;
 386   border-radius: 2px;
 387   box-shadow: 1px 1px 7px black;
 388   background-color: #c0c0c0;
 389   z-index: 2;
 390 }
 391
 392 .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }
 393 .num { text-align:right; font-size:8pt }
 394 .num { color:#444444 }
 395 .line { padding-left: 1ex; border-left: 3px solid #ccc }
 396 .line { white-space: pre }
 397 .msg { -webkit-box-shadow:1px 1px 7px #000 }
 398 .msg { box-shadow:1px 1px 7px #000 }
 399 .msg { -webkit-border-radius:5px }
 400 .msg { border-radius:5px }
 401 .msg { font-family:Helvetica, sans-serif; font-size:8pt }
 402 .msg { float:left }
 403 .msg { position:relative }
 404 .msg { padding:0.25em 1ex 0.25em 1ex }
 405 .msg { margin-top:10px; margin-bottom:10px }
 406 .msg { font-weight:bold }
 407 .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }
 408 .msgT { padding:0x; spacing:0x }
 409 .msgEvent { background-color:#fff8b4; color:#000000 }
 410 .msgControl { background-color:#bbbbbb; color:#000000 }
 411 .msgNote { background-color:#ddeeff; color:#000000 }
 412 .mrange { background-color:#dfddf3 }
 413 .mrange { border-bottom:1px solid #6F9DBE }
 414 .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; }
 415 .PathIndex { -webkit-border-radius:8px }
 416 .PathIndex { border-radius:8px }
 417 .PathIndexEvent { background-color:#bfba87 }
 418 .PathIndexControl { background-color:#8c8c8c }
 419 .PathIndexPopUp { background-color: #879abc; }
 420 .PathNav a { text-decoration:none; font-size: larger }
 421 .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }
 422 .CodeRemovalHint { background-color:#de1010 }
 423 .CodeRemovalHint { border-bottom:1px solid #6F9DBE }
 424 .msg.selected{ background-color:orange !important; }
 425
 426 table.simpletable {
 427   padding: 5px;
 428   font-size:12pt;
 429   margin:20px;
 430   border-collapse: collapse; border-spacing: 0px;
 431 }
 432 td.rowname {
 433   text-align: right;
 434   vertical-align: top;
 435   font-weight: bold;
 436   color:#444444;
 437   padding-right:2ex;
 438 }
 439
 440 /* Hidden text. */
 441 input.spoilerhider + label {
 442   cursor: pointer;
 443   text-decoration: underline;
 444   display: block;
 445 }
 446 input.spoilerhider {
 447  display: none;
 448 }
 449 input.spoilerhider ~ .spoiler {
 450   overflow: hidden;
 451   margin: 10px auto 0;
 452   height: 0;
 453   opacity: 0;
 454 }
 455 input.spoilerhider:checked + label + .spoiler{
 456   height: auto;
 457   opacity: 1;
 458 }
 459 </style>
 460 </head>
 461 <body>)<<<";
 462
 463   // Generate header
 464   R.InsertTextBefore(StartLoc, os.str());
 465   // Generate footer
 466
 467   R.InsertTextAfter(EndLoc, "</body></html>\n");
 468 }
 469
 470 /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
 471 /// information about keywords, macro expansions etc.  This uses the macro
 472 /// table state from the end of the file, so it won't be perfectly perfect,
 473 /// but it will be reasonably close.
 474 static void SyntaxHighlightImpl(
 475     Rewriter &R, FileID FID, const Preprocessor &PP,
 476     llvm::function_ref<void(RewriteBuffer &, unsigned, unsigned, const char *,
 477                             const char *, const char *)>
 478         HighlightRangeCallback) {
 479
 480   RewriteBuffer &RB = R.getEditBuffer(FID);
 481   const SourceManager &SM = PP.getSourceManager();
 482   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
 483   const char *BufferStart = FromFile.getBuffer().data();
 484
 485   Lexer L(FID, FromFile, SM, PP.getLangOpts());
 486
 487   // Inform the preprocessor that we want to retain comments as tokens, so we
 488   // can highlight them.
 489   L.SetCommentRetentionState(true);
 490
 491   // Lex all the tokens in raw mode, to avoid entering #includes or expanding
 492   // macros.
 493   Token Tok;
 494   L.LexFromRawLexer(Tok);
 495
 496   while (Tok.isNot(tok::eof)) {
 497     // Since we are lexing unexpanded tokens, all tokens are from the main
 498     // FileID.
 499     unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
 500     unsigned TokLen = Tok.getLength();
 501     switch (Tok.getKind()) {
 502     default: break;
 503     case tok::identifier:
 504       llvm_unreachable("tok::identifier in raw lexing mode!");
 505     case tok::raw_identifier: {
 506       // Fill in Result.IdentifierInfo and update the token kind,
 507       // looking up the identifier in the identifier table.
 508       PP.LookUpIdentifierInfo(Tok);
 509
 510       // If this is a pp-identifier, for a keyword, highlight it as such.
 511       if (Tok.isNot(tok::identifier))
 512         HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
 513                                "<span class='keyword'>", "</span>");
 514       break;
 515     }
 516     case tok::comment:
 517       HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
 518                              "<span class='comment'>", "</span>");
 519       break;
 520     case tok::utf8_string_literal:
 521       // Chop off the u part of u8 prefix
 522       ++TokOffs;
 523       --TokLen;
 524       // FALL THROUGH to chop the 8
 525       [[fallthrough]];
 526     case tok::wide_string_literal:
 527     case tok::utf16_string_literal:
 528     case tok::utf32_string_literal:
 529       // Chop off the L, u, U or 8 prefix
 530       ++TokOffs;
 531       --TokLen;
 532       [[fallthrough]];
 533     case tok::string_literal:
 534       // FIXME: Exclude the optional ud-suffix from the highlighted range.
 535       HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
 536                              "<span class='string_literal'>", "</span>");
 537       break;
 538     case tok::hash: {
 539       // If this is a preprocessor directive, all tokens to end of line are too.
 540       if (!Tok.isAtStartOfLine())
 541         break;
 542
 543       // Eat all of the tokens until we get to the next one at the start of
 544       // line.
 545       unsigned TokEnd = TokOffs+TokLen;
 546       L.LexFromRawLexer(Tok);
 547       while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
 548         TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
 549         L.LexFromRawLexer(Tok);
 550       }
 551
 552       // Find end of line.  This is a hack.
 553       HighlightRangeCallback(RB, TokOffs, TokEnd, BufferStart,
 554                              "<span class='directive'>", "</span>");
 555
 556       // Don't skip the next token.
 557       continue;
 558     }
 559     }
 560
 561     L.LexFromRawLexer(Tok);
 562   }
 563 }
 564 void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP,
 565                            RelexRewriteCacheRef Cache) {
 566   RewriteBuffer &RB = R.getEditBuffer(FID);
 567   const SourceManager &SM = PP.getSourceManager();
 568   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
 569   const char *BufferStart = FromFile.getBuffer().data();
 570
 571   if (Cache) {
 572     auto CacheIt = Cache->SyntaxHighlights.find(FID);
 573     if (CacheIt != Cache->SyntaxHighlights.end()) {
 574       for (const RelexRewriteCache::RawHighlight &H : CacheIt->second) {
 575         HighlightRange(RB, H.B, H.E, BufferStart, H.StartTag.data(),
 576                        H.EndTag.data());
 577       }
 578       return;
 579     }
 580   }
 581
 582   // "Every time you would call HighlightRange, cache the inputs as well."
 583   auto HighlightRangeCallback = [&](RewriteBuffer &RB, unsigned B, unsigned E,
 584                                     const char *BufferStart,
 585                                     const char *StartTag, const char *EndTag) {
 586     HighlightRange(RB, B, E, BufferStart, StartTag, EndTag);
 587
 588     if (Cache)
 589       Cache->SyntaxHighlights[FID].push_back({B, E, StartTag, EndTag});
 590   };
 591
 592   SyntaxHighlightImpl(R, FID, PP, HighlightRangeCallback);
 593 }
 594
 595 static void HighlightMacrosImpl(
 596     Rewriter &R, FileID FID, const Preprocessor &PP,
 597     llvm::function_ref<void(Rewriter &, SourceLocation, SourceLocation,
 598                             const char *, const char *, bool)>
 599         HighlightRangeCallback) {
 600
 601   // Re-lex the raw token stream into a token buffer.
 602   const SourceManager &SM = PP.getSourceManager();
 603   std::vector<Token> TokenStream;
 604
 605   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
 606   Lexer L(FID, FromFile, SM, PP.getLangOpts());
 607
 608   // Lex all the tokens in raw mode, to avoid entering #includes or expanding
 609   // macros.
 610   while (true) {
 611     Token Tok;
 612     L.LexFromRawLexer(Tok);
 613
 614     // If this is a # at the start of a line, discard it from the token stream.
 615     // We don't want the re-preprocess step to see #defines, #includes or other
 616     // preprocessor directives.
 617     if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
 618       continue;
 619
 620     // If this is a ## token, change its kind to unknown so that repreprocessing
 621     // it will not produce an error.
 622     if (Tok.is(tok::hashhash))
 623       Tok.setKind(tok::unknown);
 624
 625     // If this raw token is an identifier, the raw lexer won't have looked up
 626     // the corresponding identifier info for it.  Do this now so that it will be
 627     // macro expanded when we re-preprocess it.
 628     if (Tok.is(tok::raw_identifier))
 629       PP.LookUpIdentifierInfo(Tok);
 630
 631     TokenStream.push_back(Tok);
 632
 633     if (Tok.is(tok::eof)) break;
 634   }
 635
 636   // Temporarily change the diagnostics object so that we ignore any generated
 637   // diagnostics from this pass.
 638   DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(),
 639                              &PP.getDiagnostics().getDiagnosticOptions(),
 640                       new IgnoringDiagConsumer);
 641
 642   // FIXME: This is a huge hack; we reuse the input preprocessor because we want
 643   // its state, but we aren't actually changing it (we hope). This should really
 644   // construct a copy of the preprocessor.
 645   Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
 646   DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics();
 647   TmpPP.setDiagnostics(TmpDiags);
 648
 649   // Inform the preprocessor that we don't want comments.
 650   TmpPP.SetCommentRetentionState(false, false);
 651
 652   // We don't want pragmas either. Although we filtered out #pragma, removing
 653   // _Pragma and __pragma is much harder.
 654   bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled();
 655   TmpPP.setPragmasEnabled(false);
 656
 657   // Enter the tokens we just lexed.  This will cause them to be macro expanded
 658   // but won't enter sub-files (because we removed #'s).
 659   TmpPP.EnterTokenStream(TokenStream, false, /*IsReinject=*/false);
 660
 661   TokenConcatenation ConcatInfo(TmpPP);
 662
 663   // Lex all the tokens.
 664   Token Tok;
 665   TmpPP.Lex(Tok);
 666   while (Tok.isNot(tok::eof)) {
 667     // Ignore non-macro tokens.
 668     if (!Tok.getLocation().isMacroID()) {
 669       TmpPP.Lex(Tok);
 670       continue;
 671     }
 672
 673     // Okay, we have the first token of a macro expansion: highlight the
 674     // expansion by inserting a start tag before the macro expansion and
 675     // end tag after it.
 676     CharSourceRange LLoc = SM.getExpansionRange(Tok.getLocation());
 677
 678     // Ignore tokens whose instantiation location was not the main file.
 679     if (SM.getFileID(LLoc.getBegin()) != FID) {
 680       TmpPP.Lex(Tok);
 681       continue;
 682     }
 683
 684     assert(SM.getFileID(LLoc.getEnd()) == FID &&
 685            "Start and end of expansion must be in the same ultimate file!");
 686
 687     std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
 688     unsigned LineLen = Expansion.size();
 689
 690     Token PrevPrevTok;
 691     Token PrevTok = Tok;
 692     // Okay, eat this token, getting the next one.
 693     TmpPP.Lex(Tok);
 694
 695     // Skip all the rest of the tokens that are part of this macro
 696     // instantiation.  It would be really nice to pop up a window with all the
 697     // spelling of the tokens or something.
 698     while (!Tok.is(tok::eof) &&
 699            SM.getExpansionLoc(Tok.getLocation()) == LLoc.getBegin()) {
 700       // Insert a newline if the macro expansion is getting large.
 701       if (LineLen > 60) {
 702         Expansion += "<br>";
 703         LineLen = 0;
 704       }
 705
 706       LineLen -= Expansion.size();
 707
 708       // If the tokens were already space separated, or if they must be to avoid
 709       // them being implicitly pasted, add a space between them.
 710       if (Tok.hasLeadingSpace() ||
 711           ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok))
 712         Expansion += ' ';
 713
 714       // Escape any special characters in the token text.
 715       Expansion += EscapeText(TmpPP.getSpelling(Tok));
 716       LineLen += Expansion.size();
 717
 718       PrevPrevTok = PrevTok;
 719       PrevTok = Tok;
 720       TmpPP.Lex(Tok);
 721     }
 722
 723     // Insert the 'macro_popup' as the end tag, so that multi-line macros all
 724     // get highlighted.
 725     Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>";
 726
 727     HighlightRangeCallback(R, LLoc.getBegin(), LLoc.getEnd(),
 728                            "<span class='macro'>", Expansion.c_str(),
 729                            LLoc.isTokenRange());
 730   }
 731
 732   // Restore the preprocessor's old state.
 733   TmpPP.setDiagnostics(*OldDiags);
 734   TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);
 735 }
 736
 737 /// HighlightMacros - This uses the macro table state from the end of the
 738 /// file, to re-expand macros and insert (into the HTML) information about the
 739 /// macro expansions.  This won't be perfectly perfect, but it will be
 740 /// reasonably close.
 741 void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP,
 742                            RelexRewriteCacheRef Cache) {
 743   if (Cache) {
 744     auto CacheIt = Cache->MacroHighlights.find(FID);
 745     if (CacheIt != Cache->MacroHighlights.end()) {
 746       for (const RelexRewriteCache::Highlight &H : CacheIt->second) {
 747         HighlightRange(R, H.B, H.E, H.StartTag.data(), H.EndTag.data(),
 748                        H.IsTokenRange);
 749       }
 750       return;
 751     }
 752   }
 753
 754   // "Every time you would call HighlightRange, cache the inputs as well."
 755   auto HighlightRangeCallback = [&](Rewriter &R, SourceLocation B,
 756                                     SourceLocation E, const char *StartTag,
 757                                     const char *EndTag, bool isTokenRange) {
 758     HighlightRange(R, B, E, StartTag, EndTag, isTokenRange);
 759
 760     if (Cache) {
 761       Cache->MacroHighlights[FID].push_back(
 762           {B, E, StartTag, EndTag, isTokenRange});
 763     }
 764   };
 765
 766   HighlightMacrosImpl(R, FID, PP, HighlightRangeCallback);
 767 }