1 //===-- ClangHighlighter.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "ClangHighlighter.h"
11 #include "lldb/Host/FileSystem.h"
12 #include "lldb/Target/Language.h"
13 #include "lldb/Utility/AnsiTerminal.h"
14 #include "lldb/Utility/StreamString.h"
16 #include "clang/Basic/FileManager.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Lex/Lexer.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/MemoryBuffer.h"
23 using namespace lldb_private
;
25 bool ClangHighlighter::isKeyword(llvm::StringRef token
) const {
26 return keywords
.contains(token
);
29 ClangHighlighter::ClangHighlighter() {
30 #define KEYWORD(X, N) keywords.insert(#X);
31 #include "clang/Basic/TokenKinds.def"
34 /// Determines which style should be applied to the given token.
35 /// \param highlighter
36 /// The current highlighter that should use the style.
38 /// The current token.
40 /// The string in the source code the token represents.
42 /// The style we use for coloring the source code.
43 /// \param in_pp_directive
44 /// If we are currently in a preprocessor directive. NOTE: This is
45 /// passed by reference and will be updated if the current token starts
46 /// or ends a preprocessor directive.
48 /// The ColorStyle that should be applied to the token.
49 static HighlightStyle::ColorStyle
50 determineClangStyle(const ClangHighlighter
&highlighter
,
51 const clang::Token
&token
, llvm::StringRef tok_str
,
52 const HighlightStyle
&options
, bool &in_pp_directive
) {
53 using namespace clang
;
55 if (token
.is(tok::comment
)) {
56 // If we were in a preprocessor directive before, we now left it.
57 in_pp_directive
= false;
58 return options
.comment
;
59 } else if (in_pp_directive
|| token
.getKind() == tok::hash
) {
60 // Let's assume that the rest of the line is a PP directive.
61 in_pp_directive
= true;
62 // Preprocessor directives are hard to match, so we have to hack this in.
63 return options
.pp_directive
;
64 } else if (tok::isStringLiteral(token
.getKind()))
65 return options
.string_literal
;
66 else if (tok::isLiteral(token
.getKind()))
67 return options
.scalar_literal
;
68 else if (highlighter
.isKeyword(tok_str
))
69 return options
.keyword
;
71 switch (token
.getKind()) {
72 case tok::raw_identifier
:
74 return options
.identifier
;
77 return options
.braces
;
80 return options
.square_brackets
;
83 return options
.parentheses
;
100 case tok::minusminus
:
101 case tok::minusequal
:
104 case tok::exclaimequal
:
106 case tok::slashequal
:
108 case tok::percentequal
:
112 case tok::lesslessequal
:
115 case tok::greatergreater
:
116 case tok::greaterequal
:
117 case tok::greatergreaterequal
:
119 case tok::caretequal
:
125 case tok::equalequal
:
126 return options
.operators
;
130 return HighlightStyle::ColorStyle();
133 void ClangHighlighter::Highlight(const HighlightStyle
&options
,
134 llvm::StringRef line
,
135 std::optional
<size_t> cursor_pos
,
136 llvm::StringRef previous_lines
,
137 Stream
&result
) const {
138 using namespace clang
;
140 FileSystemOptions file_opts
;
141 FileManager
file_mgr(file_opts
,
142 FileSystem::Instance().GetVirtualFileSystem());
144 // The line might end in a backslash which would cause Clang to drop the
145 // backslash and the terminating new line. This makes sense when parsing C++,
146 // but when highlighting we care about preserving the backslash/newline. To
147 // not lose this information we remove the new line here so that Clang knows
148 // this is just a single line we are highlighting. We add back the newline
150 llvm::StringRef line_ending
= "";
151 // There are a few legal line endings Clang recognizes and we need to
152 // temporarily remove from the string.
153 if (line
.consume_back("\r\n"))
154 line_ending
= "\r\n";
155 else if (line
.consume_back("\n"))
157 else if (line
.consume_back("\r"))
160 unsigned line_number
= previous_lines
.count('\n') + 1U;
162 // Let's build the actual source code Clang needs and setup some utility
164 std::string full_source
= previous_lines
.str() + line
.str();
165 llvm::IntrusiveRefCntPtr
<DiagnosticIDs
> diag_ids(new DiagnosticIDs());
166 llvm::IntrusiveRefCntPtr
<DiagnosticOptions
> diags_opts(
167 new DiagnosticOptions());
168 DiagnosticsEngine
diags(diag_ids
, diags_opts
);
169 clang::SourceManager
SM(diags
, file_mgr
);
170 auto buf
= llvm::MemoryBuffer::getMemBuffer(full_source
);
172 FileID FID
= SM
.createFileID(buf
->getMemBufferRef());
174 // Let's just enable the latest ObjC and C++ which should get most tokens
178 // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
179 Opts
.CPlusPlus17
= true;
180 Opts
.LineComment
= true;
182 Lexer
lex(FID
, buf
->getMemBufferRef(), SM
, Opts
);
183 // The lexer should keep whitespace around.
184 lex
.SetKeepWhitespaceMode(true);
186 // Keeps track if we have entered a PP directive.
187 bool in_pp_directive
= false;
189 // True once we actually lexed the user provided line.
190 bool found_user_line
= false;
192 // True if we already highlighted the token under the cursor, false otherwise.
193 bool highlighted_cursor
= false;
197 // Returns true if this is the last token we get from the lexer.
198 exit
= lex
.LexFromRawLexer(token
);
200 bool invalid
= false;
201 unsigned current_line_number
=
202 SM
.getSpellingLineNumber(token
.getLocation(), &invalid
);
203 if (current_line_number
!= line_number
)
205 found_user_line
= true;
207 // We don't need to print any tokens without a spelling line number.
211 // Same as above but with the column number.
213 unsigned start
= SM
.getSpellingColumnNumber(token
.getLocation(), &invalid
);
216 // Column numbers start at 1, but indexes in our string start at 0.
219 // Annotations don't have a length, so let's skip them.
220 if (token
.isAnnotation())
223 // Extract the token string from our source code.
224 llvm::StringRef tok_str
= line
.substr(start
, token
.getLength());
226 // If the token is just an empty string, we can skip all the work below.
230 // If the cursor is inside this token, we have to apply the 'selected'
231 // highlight style before applying the actual token color.
232 llvm::StringRef to_print
= tok_str
;
233 StreamString storage
;
234 auto end
= start
+ token
.getLength();
235 if (cursor_pos
&& end
> *cursor_pos
&& !highlighted_cursor
) {
236 highlighted_cursor
= true;
237 options
.selected
.Apply(storage
, tok_str
);
238 to_print
= storage
.GetString();
241 // See how we are supposed to highlight this token.
242 HighlightStyle::ColorStyle color
=
243 determineClangStyle(*this, token
, tok_str
, options
, in_pp_directive
);
245 color
.Apply(result
, to_print
);
248 // Add the line ending we trimmed before tokenizing.
249 result
<< line_ending
;
251 // If we went over the whole file but couldn't find our own file, then
252 // somehow our setup was wrong. When we're in release mode we just give the
253 // user the normal line and pretend we don't know how to highlight it. In
254 // debug mode we bail out with an assert as this should never happen.
255 if (!found_user_line
) {
257 assert(false && "We couldn't find the user line in the input file?");