1 //===- DependencyDirectivesScanner.cpp ------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This is the interface for scanning header and source files to get the
11 /// minimum necessary preprocessor directives for evaluating includes. It
12 /// reduces the source down to #define, #include, #import, @import, and any
13 /// conditional preprocessor logic that contains one of those.
15 //===----------------------------------------------------------------------===//
17 #include "clang/Lex/DependencyDirectivesScanner.h"
18 #include "clang/Basic/CharInfo.h"
19 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Lex/LexDiagnostic.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Lex/Pragma.h"
23 #include "llvm/ADT/ScopeExit.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringSwitch.h"
29 using namespace clang
;
30 using namespace clang::dependency_directives_scan
;
35 struct DirectiveWithTokens
{
39 DirectiveWithTokens(DirectiveKind Kind
, unsigned NumTokens
)
40 : Kind(Kind
), NumTokens(NumTokens
) {}
43 /// Does an efficient "scan" of the sources to detect the presence of
44 /// preprocessor (or module import) directives and collects the raw lexed tokens
45 /// for those directives so that the \p Lexer can "replay" them when the file is
48 /// Note that the behavior of the raw lexer is affected by the language mode,
49 /// while at this point we want to do a scan and collect tokens once,
50 /// irrespective of the language mode that the file will get included in. To
51 /// compensate for that the \p Lexer, while "replaying", will adjust a token
52 /// where appropriate, when it could affect the preprocessor's state.
53 /// For example in a directive like
56 /// #if __has_cpp_attribute(clang::fallthrough)
59 /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
60 /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
61 /// while in C++ mode.
63 Scanner(StringRef Input
,
64 SmallVectorImpl
<dependency_directives_scan::Token
> &Tokens
,
65 DiagnosticsEngine
*Diags
, SourceLocation InputSourceLoc
)
66 : Input(Input
), Tokens(Tokens
), Diags(Diags
),
67 InputSourceLoc(InputSourceLoc
), LangOpts(getLangOptsForDepScanning()),
68 TheLexer(InputSourceLoc
, LangOpts
, Input
.begin(), Input
.begin(),
71 static LangOptions
getLangOptsForDepScanning() {
73 // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
75 LangOpts
.LineComment
= true;
76 // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and
81 /// Lex the provided source and emit the directive tokens.
83 /// \returns True on error.
84 bool scan(SmallVectorImpl
<Directive
> &Directives
);
87 /// Lexes next token and advances \p First and the \p Lexer.
88 [[nodiscard
]] dependency_directives_scan::Token
&
89 lexToken(const char *&First
, const char *const End
);
91 dependency_directives_scan::Token
&lexIncludeFilename(const char *&First
,
92 const char *const End
);
94 void skipLine(const char *&First
, const char *const End
);
95 void skipDirective(StringRef Name
, const char *&First
, const char *const End
);
97 /// Returns the spelling of a string literal or identifier after performing
98 /// any processing needed to handle \c clang::Token::NeedsCleaning.
99 StringRef
cleanStringIfNeeded(const dependency_directives_scan::Token
&Tok
);
101 /// Lexes next token and if it is identifier returns its string, otherwise
102 /// it skips the current line and returns \p std::nullopt.
104 /// In any case (whatever the token kind) \p First and the \p Lexer will
105 /// advance beyond the token.
106 [[nodiscard
]] std::optional
<StringRef
>
107 tryLexIdentifierOrSkipLine(const char *&First
, const char *const End
);
109 /// Used when it is certain that next token is an identifier.
110 [[nodiscard
]] StringRef
lexIdentifier(const char *&First
,
111 const char *const End
);
113 /// Lexes next token and returns true iff it is an identifier that matches \p
114 /// Id, otherwise it skips the current line and returns false.
116 /// In any case (whatever the token kind) \p First and the \p Lexer will
117 /// advance beyond the token.
118 [[nodiscard
]] bool isNextIdentifierOrSkipLine(StringRef Id
,
120 const char *const End
);
122 /// Lexes next token and returns true iff it matches the kind \p K.
123 /// Otherwise it skips the current line and returns false.
125 /// In any case (whatever the token kind) \p First and the \p Lexer will
126 /// advance beyond the token.
127 [[nodiscard
]] bool isNextTokenOrSkipLine(tok::TokenKind K
, const char *&First
,
128 const char *const End
);
130 /// Lexes next token and if it is string literal, returns its string.
131 /// Otherwise, it skips the current line and returns \p std::nullopt.
133 /// In any case (whatever the token kind) \p First and the \p Lexer will
134 /// advance beyond the token.
135 [[nodiscard
]] std::optional
<StringRef
>
136 tryLexStringLiteralOrSkipLine(const char *&First
, const char *const End
);
138 [[nodiscard
]] bool scanImpl(const char *First
, const char *const End
);
139 [[nodiscard
]] bool lexPPLine(const char *&First
, const char *const End
);
140 [[nodiscard
]] bool lexAt(const char *&First
, const char *const End
);
141 [[nodiscard
]] bool lexModule(const char *&First
, const char *const End
);
142 [[nodiscard
]] bool lexDefine(const char *HashLoc
, const char *&First
,
143 const char *const End
);
144 [[nodiscard
]] bool lexPragma(const char *&First
, const char *const End
);
145 [[nodiscard
]] bool lex_Pragma(const char *&First
, const char *const End
);
146 [[nodiscard
]] bool lexEndif(const char *&First
, const char *const End
);
147 [[nodiscard
]] bool lexDefault(DirectiveKind Kind
, const char *&First
,
148 const char *const End
);
149 [[nodiscard
]] bool lexModuleDirectiveBody(DirectiveKind Kind
,
151 const char *const End
);
152 void lexPPDirectiveBody(const char *&First
, const char *const End
);
154 DirectiveWithTokens
&pushDirective(DirectiveKind Kind
) {
155 Tokens
.append(CurDirToks
);
156 DirsWithToks
.emplace_back(Kind
, CurDirToks
.size());
158 return DirsWithToks
.back();
160 void popDirective() {
161 Tokens
.pop_back_n(DirsWithToks
.pop_back_val().NumTokens
);
163 DirectiveKind
topDirective() const {
164 return DirsWithToks
.empty() ? pp_none
: DirsWithToks
.back().Kind
;
167 unsigned getOffsetAt(const char *CurPtr
) const {
168 return CurPtr
- Input
.data();
171 /// Reports a diagnostic if the diagnostic engine is provided. Always returns
173 bool reportError(const char *CurPtr
, unsigned Err
);
175 StringMap
<char> SplitIds
;
177 SmallVectorImpl
<dependency_directives_scan::Token
> &Tokens
;
178 DiagnosticsEngine
*Diags
;
179 SourceLocation InputSourceLoc
;
181 const char *LastTokenPtr
= nullptr;
182 /// Keeps track of the tokens for the currently lexed directive. Once a
183 /// directive is fully lexed and "committed" then the tokens get appended to
184 /// \p Tokens and \p CurDirToks is cleared for the next directive.
185 SmallVector
<dependency_directives_scan::Token
, 32> CurDirToks
;
186 /// The directives that were lexed along with the number of tokens that each
187 /// directive contains. The tokens of all the directives are kept in \p Tokens
188 /// vector, in the same order as the directives order in \p DirsWithToks.
189 SmallVector
<DirectiveWithTokens
, 64> DirsWithToks
;
190 LangOptions LangOpts
;
194 } // end anonymous namespace
196 bool Scanner::reportError(const char *CurPtr
, unsigned Err
) {
199 assert(CurPtr
>= Input
.data() && "invalid buffer ptr");
200 Diags
->Report(InputSourceLoc
.getLocWithOffset(getOffsetAt(CurPtr
)), Err
);
204 static void skipOverSpaces(const char *&First
, const char *const End
) {
205 while (First
!= End
&& isHorizontalWhitespace(*First
))
209 [[nodiscard
]] static bool isRawStringLiteral(const char *First
,
210 const char *Current
) {
211 assert(First
<= Current
);
213 // Check if we can even back up.
214 if (*Current
!= '"' || First
== Current
)
221 if (First
== Current
|| !isAsciiIdentifierContinue(*--Current
))
224 // Check for a prefix of "u", "U", or "L".
225 if (*Current
== 'u' || *Current
== 'U' || *Current
== 'L')
226 return First
== Current
|| !isAsciiIdentifierContinue(*--Current
);
228 // Check for a prefix of "u8".
229 if (*Current
!= '8' || First
== Current
|| *Current
-- != 'u')
231 return First
== Current
|| !isAsciiIdentifierContinue(*--Current
);
234 static void skipRawString(const char *&First
, const char *const End
) {
235 assert(First
[0] == '"');
236 assert(First
[-1] == 'R');
238 const char *Last
= ++First
;
239 while (Last
!= End
&& *Last
!= '(')
242 First
= Last
; // Hit the end... just give up.
246 StringRef
Terminator(First
, Last
- First
);
248 // Move First to just past the next ")".
250 while (First
!= End
&& *First
!= ')')
256 // Look ahead for the terminator sequence.
258 while (Last
!= End
&& size_t(Last
- First
) < Terminator
.size() &&
259 Terminator
[Last
- First
] == *Last
)
262 // Check if we hit it (or the end of the file).
267 if (size_t(Last
- First
) < Terminator
.size())
276 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
277 static unsigned isEOL(const char *First
, const char *const End
) {
280 if (End
- First
> 1 && isVerticalWhitespace(First
[0]) &&
281 isVerticalWhitespace(First
[1]) && First
[0] != First
[1])
283 return !!isVerticalWhitespace(First
[0]);
286 static void skipString(const char *&First
, const char *const End
) {
287 assert(*First
== '\'' || *First
== '"' || *First
== '<');
288 const char Terminator
= *First
== '<' ? '>' : *First
;
289 for (++First
; First
!= End
&& *First
!= Terminator
; ++First
) {
290 // String and character literals don't extend past the end of the line.
291 if (isVerticalWhitespace(*First
))
295 // Skip past backslash to the next character. This ensures that the
296 // character right after it is skipped as well, which matters if it's
300 if (!isWhitespace(*First
))
302 // Whitespace after the backslash might indicate a line continuation.
303 const char *FirstAfterBackslashPastSpace
= First
;
304 skipOverSpaces(FirstAfterBackslashPastSpace
, End
);
305 if (unsigned NLSize
= isEOL(FirstAfterBackslashPastSpace
, End
)) {
306 // Advance the character pointer to the next line for the next
308 First
= FirstAfterBackslashPastSpace
+ NLSize
- 1;
312 ++First
; // Finish off the string.
315 // Returns the length of the skipped newline
316 static unsigned skipNewline(const char *&First
, const char *End
) {
319 assert(isVerticalWhitespace(*First
));
320 unsigned Len
= isEOL(First
, End
);
321 assert(Len
&& "expected newline");
326 static bool wasLineContinuation(const char *First
, unsigned EOLLen
) {
327 return *(First
- (int)EOLLen
- 1) == '\\';
330 static void skipToNewlineRaw(const char *&First
, const char *const End
) {
335 unsigned Len
= isEOL(First
, End
);
342 Len
= isEOL(First
, End
);
345 if (First
[-1] != '\\')
349 // Keep skipping lines...
353 static void skipLineComment(const char *&First
, const char *const End
) {
354 assert(First
[0] == '/' && First
[1] == '/');
356 skipToNewlineRaw(First
, End
);
359 static void skipBlockComment(const char *&First
, const char *const End
) {
360 assert(First
[0] == '/' && First
[1] == '*');
361 if (End
- First
< 4) {
365 for (First
+= 3; First
!= End
; ++First
)
366 if (First
[-1] == '*' && First
[0] == '/') {
372 /// \returns True if the current single quotation mark character is a C++ 14
374 static bool isQuoteCppDigitSeparator(const char *const Start
,
375 const char *const Cur
,
376 const char *const End
) {
377 assert(*Cur
== '\'' && "expected quotation character");
378 // skipLine called in places where we don't expect a valid number
379 // body before `start` on the same line, so always return false at the start.
382 // The previous character must be a valid PP number character.
383 // Make sure that the L, u, U, u8 prefixes don't get marked as a
385 char Prev
= *(Cur
- 1);
386 if (Prev
== 'L' || Prev
== 'U' || Prev
== 'u')
388 if (Prev
== '8' && (Cur
- 1 != Start
) && *(Cur
- 2) == 'u')
390 if (!isPreprocessingNumberBody(Prev
))
392 // The next character should be a valid identifier body character.
393 return (Cur
+ 1) < End
&& isAsciiIdentifierContinue(*(Cur
+ 1));
396 void Scanner::skipLine(const char *&First
, const char *const End
) {
398 assert(First
<= End
);
402 if (isVerticalWhitespace(*First
)) {
403 skipNewline(First
, End
);
406 const char *Start
= First
;
407 while (First
!= End
&& !isVerticalWhitespace(*First
)) {
408 // Iterate over strings correctly to avoid comments and newlines.
410 (*First
== '\'' && !isQuoteCppDigitSeparator(Start
, First
, End
))) {
411 LastTokenPtr
= First
;
412 if (isRawStringLiteral(Start
, First
))
413 skipRawString(First
, End
);
415 skipString(First
, End
);
419 // Iterate over comments correctly.
420 if (*First
!= '/' || End
- First
< 2) {
421 LastTokenPtr
= First
;
426 if (First
[1] == '/') {
428 skipLineComment(First
, End
);
432 if (First
[1] != '*') {
433 LastTokenPtr
= First
;
439 skipBlockComment(First
, End
);
444 // Skip over the newline.
445 unsigned Len
= skipNewline(First
, End
);
446 if (!wasLineContinuation(First
, Len
)) // Continue past line-continuations.
451 void Scanner::skipDirective(StringRef Name
, const char *&First
,
452 const char *const End
) {
453 if (llvm::StringSwitch
<bool>(Name
)
454 .Case("warning", true)
457 // Do not process quotes or comments.
458 skipToNewlineRaw(First
, End
);
460 skipLine(First
, End
);
463 static void skipWhitespace(const char *&First
, const char *const End
) {
465 assert(First
<= End
);
466 skipOverSpaces(First
, End
);
471 if (First
[0] == '\\' && isVerticalWhitespace(First
[1])) {
472 skipNewline(++First
, End
);
476 // Check for a non-comment character.
481 if (First
[1] == '/') {
482 skipLineComment(First
, End
);
486 // Cannot be a comment.
491 skipBlockComment(First
, End
);
495 bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind
, const char *&First
,
496 const char *const End
) {
497 const char *DirectiveLoc
= Input
.data() + CurDirToks
.front().Offset
;
499 const dependency_directives_scan::Token
&Tok
= lexToken(First
, End
);
500 if (Tok
.is(tok::eof
))
503 diag::err_dep_source_scanner_missing_semi_after_at_import
);
504 if (Tok
.is(tok::semi
))
508 skipWhitespace(First
, End
);
511 if (!isVerticalWhitespace(*First
))
513 DirectiveLoc
, diag::err_dep_source_scanner_unexpected_tokens_at_import
);
514 skipNewline(First
, End
);
518 dependency_directives_scan::Token
&Scanner::lexToken(const char *&First
,
519 const char *const End
) {
521 TheLexer
.LexFromRawLexer(Tok
);
522 First
= Input
.data() + TheLexer
.getCurrentBufferOffset();
523 assert(First
<= End
);
525 unsigned Offset
= TheLexer
.getCurrentBufferOffset() - Tok
.getLength();
526 CurDirToks
.emplace_back(Offset
, Tok
.getLength(), Tok
.getKind(),
528 return CurDirToks
.back();
531 dependency_directives_scan::Token
&
532 Scanner::lexIncludeFilename(const char *&First
, const char *const End
) {
534 TheLexer
.LexIncludeFilename(Tok
);
535 First
= Input
.data() + TheLexer
.getCurrentBufferOffset();
536 assert(First
<= End
);
538 unsigned Offset
= TheLexer
.getCurrentBufferOffset() - Tok
.getLength();
539 CurDirToks
.emplace_back(Offset
, Tok
.getLength(), Tok
.getKind(),
541 return CurDirToks
.back();
544 void Scanner::lexPPDirectiveBody(const char *&First
, const char *const End
) {
546 const dependency_directives_scan::Token
&Tok
= lexToken(First
, End
);
547 if (Tok
.is(tok::eod
))
553 Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token
&Tok
) {
554 bool NeedsCleaning
= Tok
.Flags
& clang::Token::NeedsCleaning
;
555 if (LLVM_LIKELY(!NeedsCleaning
))
556 return Input
.slice(Tok
.Offset
, Tok
.getEnd());
558 SmallString
<64> Spelling
;
559 Spelling
.resize(Tok
.Length
);
561 // FIXME: C++11 raw string literals need special handling (see getSpellingSlow
562 // in the Lexer). Currently we cannot see them due to our LangOpts.
564 unsigned SpellingLength
= 0;
565 const char *BufPtr
= Input
.begin() + Tok
.Offset
;
566 const char *AfterIdent
= Input
.begin() + Tok
.getEnd();
567 while (BufPtr
< AfterIdent
) {
568 auto [Char
, Size
] = Lexer::getCharAndSizeNoWarn(BufPtr
, LangOpts
);
569 Spelling
[SpellingLength
++] = Char
;
573 return SplitIds
.try_emplace(StringRef(Spelling
.begin(), SpellingLength
), 0)
577 std::optional
<StringRef
>
578 Scanner::tryLexIdentifierOrSkipLine(const char *&First
, const char *const End
) {
579 const dependency_directives_scan::Token
&Tok
= lexToken(First
, End
);
580 if (Tok
.isNot(tok::raw_identifier
)) {
581 if (!Tok
.is(tok::eod
))
582 skipLine(First
, End
);
586 return cleanStringIfNeeded(Tok
);
589 StringRef
Scanner::lexIdentifier(const char *&First
, const char *const End
) {
590 std::optional
<StringRef
> Id
= tryLexIdentifierOrSkipLine(First
, End
);
591 assert(Id
&& "expected identifier token");
595 bool Scanner::isNextIdentifierOrSkipLine(StringRef Id
, const char *&First
,
596 const char *const End
) {
597 if (std::optional
<StringRef
> FoundId
=
598 tryLexIdentifierOrSkipLine(First
, End
)) {
601 skipLine(First
, End
);
606 bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K
, const char *&First
,
607 const char *const End
) {
608 const dependency_directives_scan::Token
&Tok
= lexToken(First
, End
);
611 skipLine(First
, End
);
615 std::optional
<StringRef
>
616 Scanner::tryLexStringLiteralOrSkipLine(const char *&First
,
617 const char *const End
) {
618 const dependency_directives_scan::Token
&Tok
= lexToken(First
, End
);
619 if (!tok::isStringLiteral(Tok
.Kind
)) {
620 if (!Tok
.is(tok::eod
))
621 skipLine(First
, End
);
625 return cleanStringIfNeeded(Tok
);
628 bool Scanner::lexAt(const char *&First
, const char *const End
) {
632 const dependency_directives_scan::Token
&AtTok
= lexToken(First
, End
);
633 assert(AtTok
.is(tok::at
));
636 if (!isNextIdentifierOrSkipLine("import", First
, End
))
638 return lexModuleDirectiveBody(decl_at_import
, First
, End
);
641 bool Scanner::lexModule(const char *&First
, const char *const End
) {
642 StringRef Id
= lexIdentifier(First
, End
);
644 if (Id
== "export") {
646 std::optional
<StringRef
> NextId
= tryLexIdentifierOrSkipLine(First
, End
);
652 if (Id
!= "module" && Id
!= "import") {
653 skipLine(First
, End
);
657 skipWhitespace(First
, End
);
659 // Ignore this as a module directive if the next character can't be part of
668 if (!isAsciiIdentifierContinue(*First
)) {
669 skipLine(First
, End
);
674 TheLexer
.seek(getOffsetAt(First
), /*IsAtStartOfLine*/ false);
678 Kind
= Export
? cxx_export_module_decl
: cxx_module_decl
;
680 Kind
= Export
? cxx_export_import_decl
: cxx_import_decl
;
682 return lexModuleDirectiveBody(Kind
, First
, End
);
685 bool Scanner::lex_Pragma(const char *&First
, const char *const End
) {
686 if (!isNextTokenOrSkipLine(tok::l_paren
, First
, End
))
689 std::optional
<StringRef
> Str
= tryLexStringLiteralOrSkipLine(First
, End
);
691 if (!Str
|| !isNextTokenOrSkipLine(tok::r_paren
, First
, End
))
694 SmallString
<64> Buffer(*Str
);
695 prepare_PragmaString(Buffer
);
697 // Use a new scanner instance since the tokens will be inside the allocated
698 // string. We should already have captured all the relevant tokens in the
700 SmallVector
<dependency_directives_scan::Token
> DiscardTokens
;
701 const char *Begin
= Buffer
.c_str();
702 Scanner PragmaScanner
{StringRef(Begin
, Buffer
.size()), DiscardTokens
, Diags
,
705 PragmaScanner
.TheLexer
.setParsingPreprocessorDirective(true);
706 if (PragmaScanner
.lexPragma(Begin
, Buffer
.end()))
709 DirectiveKind K
= PragmaScanner
.topDirective();
711 skipLine(First
, End
);
715 assert(Begin
== Buffer
.end());
720 bool Scanner::lexPragma(const char *&First
, const char *const End
) {
721 std::optional
<StringRef
> FoundId
= tryLexIdentifierOrSkipLine(First
, End
);
725 StringRef Id
= *FoundId
;
726 auto Kind
= llvm::StringSwitch
<DirectiveKind
>(Id
)
727 .Case("once", pp_pragma_once
)
728 .Case("push_macro", pp_pragma_push_macro
)
729 .Case("pop_macro", pp_pragma_pop_macro
)
730 .Case("include_alias", pp_pragma_include_alias
)
732 if (Kind
!= pp_none
) {
733 lexPPDirectiveBody(First
, End
);
739 skipLine(First
, End
);
743 FoundId
= tryLexIdentifierOrSkipLine(First
, End
);
748 // #pragma clang system_header
749 if (Id
== "system_header") {
750 lexPPDirectiveBody(First
, End
);
751 pushDirective(pp_pragma_system_header
);
755 if (Id
!= "module") {
756 skipLine(First
, End
);
760 // #pragma clang module.
761 if (!isNextIdentifierOrSkipLine("import", First
, End
))
764 // #pragma clang module import.
765 lexPPDirectiveBody(First
, End
);
766 pushDirective(pp_pragma_import
);
770 bool Scanner::lexEndif(const char *&First
, const char *const End
) {
771 // Strip out "#else" if it's empty.
772 if (topDirective() == pp_else
)
775 // If "#ifdef" is empty, strip it and skip the "#endif".
777 // FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
778 // we can skip empty `#if` and `#elif` blocks as well after scanning for a
779 // literal __has_include in the condition. Even without that rule we could
780 // drop the tokens if we scan for identifiers in the condition and find none.
781 if (topDirective() == pp_ifdef
|| topDirective() == pp_ifndef
) {
783 skipLine(First
, End
);
787 return lexDefault(pp_endif
, First
, End
);
790 bool Scanner::lexDefault(DirectiveKind Kind
, const char *&First
,
791 const char *const End
) {
792 lexPPDirectiveBody(First
, End
);
797 static bool isStartOfRelevantLine(char First
) {
810 bool Scanner::lexPPLine(const char *&First
, const char *const End
) {
811 assert(First
!= End
);
813 skipWhitespace(First
, End
);
814 assert(First
<= End
);
818 if (!isStartOfRelevantLine(*First
)) {
819 skipLine(First
, End
);
820 assert(First
<= End
);
824 LastTokenPtr
= First
;
826 TheLexer
.seek(getOffsetAt(First
), /*IsAtStartOfLine*/ true);
828 auto ScEx1
= make_scope_exit([&]() {
829 /// Clear Scanner's CurDirToks before returning, in case we didn't push a
836 return lexAt(First
, End
);
838 if (*First
== 'i' || *First
== 'e' || *First
== 'm')
839 return lexModule(First
, End
);
842 if (isNextIdentifierOrSkipLine("_Pragma", First
, End
))
843 return lex_Pragma(First
, End
);
847 // Handle preprocessing directives.
849 TheLexer
.setParsingPreprocessorDirective(true);
850 auto ScEx2
= make_scope_exit(
851 [&]() { TheLexer
.setParsingPreprocessorDirective(false); });
854 const dependency_directives_scan::Token
&HashTok
= lexToken(First
, End
);
855 if (HashTok
.is(tok::hashhash
)) {
856 // A \p tok::hashhash at this location is passed by the preprocessor to the
857 // parser to interpret, like any other token. So for dependency scanning
858 // skip it like a normal token not affecting the preprocessor.
859 skipLine(First
, End
);
860 assert(First
<= End
);
863 assert(HashTok
.is(tok::hash
));
866 std::optional
<StringRef
> FoundId
= tryLexIdentifierOrSkipLine(First
, End
);
870 StringRef Id
= *FoundId
;
873 return lexPragma(First
, End
);
875 auto Kind
= llvm::StringSwitch
<DirectiveKind
>(Id
)
876 .Case("include", pp_include
)
877 .Case("__include_macros", pp___include_macros
)
878 .Case("define", pp_define
)
879 .Case("undef", pp_undef
)
880 .Case("import", pp_import
)
881 .Case("include_next", pp_include_next
)
883 .Case("ifdef", pp_ifdef
)
884 .Case("ifndef", pp_ifndef
)
885 .Case("elif", pp_elif
)
886 .Case("elifdef", pp_elifdef
)
887 .Case("elifndef", pp_elifndef
)
888 .Case("else", pp_else
)
889 .Case("endif", pp_endif
)
891 if (Kind
== pp_none
) {
892 skipDirective(Id
, First
, End
);
896 if (Kind
== pp_endif
)
897 return lexEndif(First
, End
);
901 case pp___include_macros
:
902 case pp_include_next
:
904 lexIncludeFilename(First
, End
);
911 return lexDefault(Kind
, First
, End
);
914 static void skipUTF8ByteOrderMark(const char *&First
, const char *const End
) {
915 if ((End
- First
) >= 3 && First
[0] == '\xef' && First
[1] == '\xbb' &&
920 bool Scanner::scanImpl(const char *First
, const char *const End
) {
921 skipUTF8ByteOrderMark(First
, End
);
923 if (lexPPLine(First
, End
))
928 bool Scanner::scan(SmallVectorImpl
<Directive
> &Directives
) {
929 bool Error
= scanImpl(Input
.begin(), Input
.end());
932 // Add an EOF on success.
934 (Tokens
.empty() || LastTokenPtr
> Input
.begin() + Tokens
.back().Offset
))
935 pushDirective(tokens_present_before_eof
);
936 pushDirective(pp_eof
);
939 ArrayRef
<dependency_directives_scan::Token
> RemainingTokens
= Tokens
;
940 for (const DirectiveWithTokens
&DirWithToks
: DirsWithToks
) {
941 assert(RemainingTokens
.size() >= DirWithToks
.NumTokens
);
942 Directives
.emplace_back(DirWithToks
.Kind
,
943 RemainingTokens
.take_front(DirWithToks
.NumTokens
));
944 RemainingTokens
= RemainingTokens
.drop_front(DirWithToks
.NumTokens
);
946 assert(RemainingTokens
.empty());
951 bool clang::scanSourceForDependencyDirectives(
952 StringRef Input
, SmallVectorImpl
<dependency_directives_scan::Token
> &Tokens
,
953 SmallVectorImpl
<Directive
> &Directives
, DiagnosticsEngine
*Diags
,
954 SourceLocation InputSourceLoc
) {
955 return Scanner(Input
, Tokens
, Diags
, InputSourceLoc
).scan(Directives
);
958 void clang::printDependencyDirectivesAsSource(
960 ArrayRef
<dependency_directives_scan::Directive
> Directives
,
961 llvm::raw_ostream
&OS
) {
962 // Add a space separator where it is convenient for testing purposes.
963 auto needsSpaceSeparator
=
964 [](tok::TokenKind Prev
,
965 const dependency_directives_scan::Token
&Tok
) -> bool {
966 if (Prev
== Tok
.Kind
)
967 return !Tok
.isOneOf(tok::l_paren
, tok::r_paren
, tok::l_square
,
969 if (Prev
== tok::raw_identifier
&&
970 Tok
.isOneOf(tok::hash
, tok::numeric_constant
, tok::string_literal
,
971 tok::char_constant
, tok::header_name
))
973 if (Prev
== tok::r_paren
&&
974 Tok
.isOneOf(tok::raw_identifier
, tok::hash
, tok::string_literal
,
975 tok::char_constant
, tok::unknown
))
977 if (Prev
== tok::comma
&&
978 Tok
.isOneOf(tok::l_paren
, tok::string_literal
, tok::less
))
983 for (const dependency_directives_scan::Directive
&Directive
: Directives
) {
984 if (Directive
.Kind
== tokens_present_before_eof
)
985 OS
<< "<TokBeforeEOF>";
986 std::optional
<tok::TokenKind
> PrevTokenKind
;
987 for (const dependency_directives_scan::Token
&Tok
: Directive
.Tokens
) {
988 if (PrevTokenKind
&& needsSpaceSeparator(*PrevTokenKind
, Tok
))
990 PrevTokenKind
= Tok
.Kind
;
991 OS
<< Source
.slice(Tok
.Offset
, Tok
.getEnd());