lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp

   1 //===-- ClangHighlighter.cpp ----------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "ClangHighlighter.h"
  10
  11 #include "lldb/Host/FileSystem.h"
  12 #include "lldb/Target/Language.h"
  13 #include "lldb/Utility/AnsiTerminal.h"
  14 #include "lldb/Utility/StreamString.h"
  15
  16 #include "clang/Basic/FileManager.h"
  17 #include "clang/Basic/SourceManager.h"
  18 #include "clang/Lex/Lexer.h"
  19 #include "llvm/ADT/StringSet.h"
  20 #include "llvm/Support/MemoryBuffer.h"
  21 #include <optional>
  22
  23 using namespace lldb_private;
  24
  25 bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
  26   return keywords.contains(token);
  27 }
  28
  29 ClangHighlighter::ClangHighlighter() {
  30 #define KEYWORD(X, N) keywords.insert(#X);
  31 #include "clang/Basic/TokenKinds.def"
  32 }
  33
  34 /// Determines which style should be applied to the given token.
  35 /// \param highlighter
  36 ///     The current highlighter that should use the style.
  37 /// \param token
  38 ///     The current token.
  39 /// \param tok_str
  40 ///     The string in the source code the token represents.
  41 /// \param options
  42 ///     The style we use for coloring the source code.
  43 /// \param in_pp_directive
  44 ///     If we are currently in a preprocessor directive. NOTE: This is
  45 ///     passed by reference and will be updated if the current token starts
  46 ///     or ends a preprocessor directive.
  47 /// \return
  48 ///     The ColorStyle that should be applied to the token.
  49 static HighlightStyle::ColorStyle
  50 determineClangStyle(const ClangHighlighter &highlighter,
  51                     const clang::Token &token, llvm::StringRef tok_str,
  52                     const HighlightStyle &options, bool &in_pp_directive) {
  53   using namespace clang;
  54
  55   if (token.is(tok::comment)) {
  56     // If we were in a preprocessor directive before, we now left it.
  57     in_pp_directive = false;
  58     return options.comment;
  59   } else if (in_pp_directive || token.getKind() == tok::hash) {
  60     // Let's assume that the rest of the line is a PP directive.
  61     in_pp_directive = true;
  62     // Preprocessor directives are hard to match, so we have to hack this in.
  63     return options.pp_directive;
  64   } else if (tok::isStringLiteral(token.getKind()))
  65     return options.string_literal;
  66   else if (tok::isLiteral(token.getKind()))
  67     return options.scalar_literal;
  68   else if (highlighter.isKeyword(tok_str))
  69     return options.keyword;
  70   else
  71     switch (token.getKind()) {
  72     case tok::raw_identifier:
  73     case tok::identifier:
  74       return options.identifier;
  75     case tok::l_brace:
  76     case tok::r_brace:
  77       return options.braces;
  78     case tok::l_square:
  79     case tok::r_square:
  80       return options.square_brackets;
  81     case tok::l_paren:
  82     case tok::r_paren:
  83       return options.parentheses;
  84     case tok::comma:
  85       return options.comma;
  86     case tok::coloncolon:
  87     case tok::colon:
  88       return options.colon;
  89
  90     case tok::amp:
  91     case tok::ampamp:
  92     case tok::ampequal:
  93     case tok::star:
  94     case tok::starequal:
  95     case tok::plus:
  96     case tok::plusplus:
  97     case tok::plusequal:
  98     case tok::minus:
  99     case tok::arrow:
 100     case tok::minusminus:
 101     case tok::minusequal:
 102     case tok::tilde:
 103     case tok::exclaim:
 104     case tok::exclaimequal:
 105     case tok::slash:
 106     case tok::slashequal:
 107     case tok::percent:
 108     case tok::percentequal:
 109     case tok::less:
 110     case tok::lessless:
 111     case tok::lessequal:
 112     case tok::lesslessequal:
 113     case tok::spaceship:
 114     case tok::greater:
 115     case tok::greatergreater:
 116     case tok::greaterequal:
 117     case tok::greatergreaterequal:
 118     case tok::caret:
 119     case tok::caretequal:
 120     case tok::pipe:
 121     case tok::pipepipe:
 122     case tok::pipeequal:
 123     case tok::question:
 124     case tok::equal:
 125     case tok::equalequal:
 126       return options.operators;
 127     default:
 128       break;
 129     }
 130   return HighlightStyle::ColorStyle();
 131 }
 132
 133 void ClangHighlighter::Highlight(const HighlightStyle &options,
 134                                  llvm::StringRef line,
 135                                  std::optional<size_t> cursor_pos,
 136                                  llvm::StringRef previous_lines,
 137                                  Stream &result) const {
 138   using namespace clang;
 139
 140   FileSystemOptions file_opts;
 141   FileManager file_mgr(file_opts,
 142                        FileSystem::Instance().GetVirtualFileSystem());
 143
 144   // The line might end in a backslash which would cause Clang to drop the
 145   // backslash and the terminating new line. This makes sense when parsing C++,
 146   // but when highlighting we care about preserving the backslash/newline. To
 147   // not lose this information we remove the new line here so that Clang knows
 148   // this is just a single line we are highlighting. We add back the newline
 149   // after tokenizing.
 150   llvm::StringRef line_ending = "";
 151   // There are a few legal line endings Clang recognizes and we need to
 152   // temporarily remove from the string.
 153   if (line.consume_back("\r\n"))
 154     line_ending = "\r\n";
 155   else if (line.consume_back("\n"))
 156     line_ending = "\n";
 157   else if (line.consume_back("\r"))
 158     line_ending = "\r";
 159
 160   unsigned line_number = previous_lines.count('\n') + 1U;
 161
 162   // Let's build the actual source code Clang needs and setup some utility
 163   // objects.
 164   std::string full_source = previous_lines.str() + line.str();
 165   llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
 166   llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
 167       new DiagnosticOptions());
 168   DiagnosticsEngine diags(diag_ids, diags_opts);
 169   clang::SourceManager SM(diags, file_mgr);
 170   auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
 171
 172   FileID FID = SM.createFileID(buf->getMemBufferRef());
 173
 174   // Let's just enable the latest ObjC and C++ which should get most tokens
 175   // right.
 176   LangOptions Opts;
 177   Opts.ObjC = true;
 178   // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
 179   Opts.CPlusPlus17 = true;
 180   Opts.LineComment = true;
 181
 182   Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
 183   // The lexer should keep whitespace around.
 184   lex.SetKeepWhitespaceMode(true);
 185
 186   // Keeps track if we have entered a PP directive.
 187   bool in_pp_directive = false;
 188
 189   // True once we actually lexed the user provided line.
 190   bool found_user_line = false;
 191
 192   // True if we already highlighted the token under the cursor, false otherwise.
 193   bool highlighted_cursor = false;
 194   Token token;
 195   bool exit = false;
 196   while (!exit) {
 197     // Returns true if this is the last token we get from the lexer.
 198     exit = lex.LexFromRawLexer(token);
 199
 200     bool invalid = false;
 201     unsigned current_line_number =
 202         SM.getSpellingLineNumber(token.getLocation(), &invalid);
 203     if (current_line_number != line_number)
 204       continue;
 205     found_user_line = true;
 206
 207     // We don't need to print any tokens without a spelling line number.
 208     if (invalid)
 209       continue;
 210
 211     // Same as above but with the column number.
 212     invalid = false;
 213     unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
 214     if (invalid)
 215       continue;
 216     // Column numbers start at 1, but indexes in our string start at 0.
 217     --start;
 218
 219     // Annotations don't have a length, so let's skip them.
 220     if (token.isAnnotation())
 221       continue;
 222
 223     // Extract the token string from our source code.
 224     llvm::StringRef tok_str = line.substr(start, token.getLength());
 225
 226     // If the token is just an empty string, we can skip all the work below.
 227     if (tok_str.empty())
 228       continue;
 229
 230     // If the cursor is inside this token, we have to apply the 'selected'
 231     // highlight style before applying the actual token color.
 232     llvm::StringRef to_print = tok_str;
 233     StreamString storage;
 234     auto end = start + token.getLength();
 235     if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
 236       highlighted_cursor = true;
 237       options.selected.Apply(storage, tok_str);
 238       to_print = storage.GetString();
 239     }
 240
 241     // See how we are supposed to highlight this token.
 242     HighlightStyle::ColorStyle color =
 243         determineClangStyle(*this, token, tok_str, options, in_pp_directive);
 244
 245     color.Apply(result, to_print);
 246   }
 247
 248   // Add the line ending we trimmed before tokenizing.
 249   result << line_ending;
 250
 251   // If we went over the whole file but couldn't find our own file, then
 252   // somehow our setup was wrong. When we're in release mode we just give the
 253   // user the normal line and pretend we don't know how to highlight it. In
 254   // debug mode we bail out with an assert as this should never happen.
 255   if (!found_user_line) {
 256     result << line;
 257     assert(false && "We couldn't find the user line in the input file?");
 258   }
 259 }