1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result. This is the traditional behavior of the -E option.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Frontend/PreprocessorOutputOptions.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Lex/TokenConcatenation.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
30 using namespace clang
;
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
34 static void PrintMacroDefinition(const IdentifierInfo
&II
, const MacroInfo
&MI
,
35 Preprocessor
&PP
, raw_ostream
&OS
) {
36 OS
<< "#define " << II
.getName();
38 if (MI
.isFunctionLike()) {
40 if (!MI
.param_empty()) {
41 MacroInfo::param_iterator AI
= MI
.param_begin(), E
= MI
.param_end();
42 for (; AI
+1 != E
; ++AI
) {
43 OS
<< (*AI
)->getName();
48 if ((*AI
)->getName() == "__VA_ARGS__")
51 OS
<< (*AI
)->getName();
54 if (MI
.isGNUVarargs())
55 OS
<< "..."; // #define foo(x...)
60 // GCC always emits a space, even if the macro body is empty. However, do not
61 // want to emit two spaces if the first token has a leading space.
62 if (MI
.tokens_empty() || !MI
.tokens_begin()->hasLeadingSpace())
65 SmallString
<128> SpellingBuffer
;
66 for (const auto &T
: MI
.tokens()) {
67 if (T
.hasLeadingSpace())
70 OS
<< PP
.getSpelling(T
, SpellingBuffer
);
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
79 class PrintPPOutputPPCallbacks
: public PPCallbacks
{
82 TokenConcatenation ConcatInfo
;
88 bool EmittedTokensOnThisLine
;
89 bool EmittedDirectiveOnThisLine
;
90 SrcMgr::CharacteristicKind FileType
;
91 SmallString
<512> CurFilename
;
93 bool DisableLineMarkers
;
95 bool DumpIncludeDirectives
;
96 bool UseLineDirectives
;
97 bool IsFirstFileEntered
;
98 bool MinimizeWhitespace
;
105 PrintPPOutputPPCallbacks(Preprocessor
&pp
, raw_ostream
&os
, bool lineMarkers
,
106 bool defines
, bool DumpIncludeDirectives
,
107 bool UseLineDirectives
, bool MinimizeWhitespace
,
109 : PP(pp
), SM(PP
.getSourceManager()), ConcatInfo(PP
), OS(os
),
110 DisableLineMarkers(lineMarkers
), DumpDefines(defines
),
111 DumpIncludeDirectives(DumpIncludeDirectives
),
112 UseLineDirectives(UseLineDirectives
),
113 MinimizeWhitespace(MinimizeWhitespace
), DirectivesOnly(DirectivesOnly
) {
115 CurFilename
+= "<uninit>";
116 EmittedTokensOnThisLine
= false;
117 EmittedDirectiveOnThisLine
= false;
118 FileType
= SrcMgr::C_User
;
120 IsFirstFileEntered
= false;
122 PrevTok
.startToken();
123 PrevPrevTok
.startToken();
126 bool isMinimizeWhitespace() const { return MinimizeWhitespace
; }
128 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine
= true; }
129 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine
; }
131 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine
= true; }
132 bool hasEmittedDirectiveOnThisLine() const {
133 return EmittedDirectiveOnThisLine
;
136 /// Ensure that the output stream position is at the beginning of a new line
137 /// and inserts one if it does not. It is intended to ensure that directives
138 /// inserted by the directives not from the input source (such as #line) are
139 /// in the first column. To insert newlines that represent the input, use
140 /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
141 void startNewLineIfNeeded();
143 void FileChanged(SourceLocation Loc
, FileChangeReason Reason
,
144 SrcMgr::CharacteristicKind FileType
,
145 FileID PrevFID
) override
;
146 void InclusionDirective(SourceLocation HashLoc
, const Token
&IncludeTok
,
147 StringRef FileName
, bool IsAngled
,
148 CharSourceRange FilenameRange
,
149 OptionalFileEntryRef File
, StringRef SearchPath
,
150 StringRef RelativePath
, const Module
*Imported
,
151 SrcMgr::CharacteristicKind FileType
) override
;
152 void Ident(SourceLocation Loc
, StringRef str
) override
;
153 void PragmaMessage(SourceLocation Loc
, StringRef Namespace
,
154 PragmaMessageKind Kind
, StringRef Str
) override
;
155 void PragmaDebug(SourceLocation Loc
, StringRef DebugType
) override
;
156 void PragmaDiagnosticPush(SourceLocation Loc
, StringRef Namespace
) override
;
157 void PragmaDiagnosticPop(SourceLocation Loc
, StringRef Namespace
) override
;
158 void PragmaDiagnostic(SourceLocation Loc
, StringRef Namespace
,
159 diag::Severity Map
, StringRef Str
) override
;
160 void PragmaWarning(SourceLocation Loc
, PragmaWarningSpecifier WarningSpec
,
161 ArrayRef
<int> Ids
) override
;
162 void PragmaWarningPush(SourceLocation Loc
, int Level
) override
;
163 void PragmaWarningPop(SourceLocation Loc
) override
;
164 void PragmaExecCharsetPush(SourceLocation Loc
, StringRef Str
) override
;
165 void PragmaExecCharsetPop(SourceLocation Loc
) override
;
166 void PragmaAssumeNonNullBegin(SourceLocation Loc
) override
;
167 void PragmaAssumeNonNullEnd(SourceLocation Loc
) override
;
169 /// Insert whitespace before emitting the next token.
171 /// @param Tok Next token to be emitted.
172 /// @param RequireSpace Ensure at least one whitespace is emitted. Useful
173 /// if non-tokens have been emitted to the stream.
174 /// @param RequireSameLine Never emit newlines. Useful when semantics depend
175 /// on being on the same line, such as directives.
176 void HandleWhitespaceBeforeTok(const Token
&Tok
, bool RequireSpace
,
177 bool RequireSameLine
);
179 /// Move to the line of the provided source location. This will
180 /// return true if a newline was inserted or if
181 /// the requested location is the first token on the first line.
182 /// In these cases the next output will be the first column on the line and
183 /// make it possible to insert indention. The newline was inserted
184 /// implicitly when at the beginning of the file.
186 /// @param Tok Token where to move to.
187 /// @param RequireStartOfLine Whether the next line depends on being in the
188 /// first column, such as a directive.
190 /// @return Whether column adjustments are necessary.
191 bool MoveToLine(const Token
&Tok
, bool RequireStartOfLine
) {
192 PresumedLoc PLoc
= SM
.getPresumedLoc(Tok
.getLocation());
193 unsigned TargetLine
= PLoc
.isValid() ? PLoc
.getLine() : CurLine
;
195 Tok
.isAtStartOfLine() && PLoc
.isValid() && PLoc
.getLine() == 1;
196 return MoveToLine(TargetLine
, RequireStartOfLine
) || IsFirstInFile
;
199 /// Move to the line of the provided source location. Returns true if a new
200 /// line was inserted.
201 bool MoveToLine(SourceLocation Loc
, bool RequireStartOfLine
) {
202 PresumedLoc PLoc
= SM
.getPresumedLoc(Loc
);
203 unsigned TargetLine
= PLoc
.isValid() ? PLoc
.getLine() : CurLine
;
204 return MoveToLine(TargetLine
, RequireStartOfLine
);
206 bool MoveToLine(unsigned LineNo
, bool RequireStartOfLine
);
208 bool AvoidConcat(const Token
&PrevPrevTok
, const Token
&PrevTok
,
210 return ConcatInfo
.AvoidConcat(PrevPrevTok
, PrevTok
, Tok
);
212 void WriteLineInfo(unsigned LineNo
, const char *Extra
=nullptr,
213 unsigned ExtraLen
=0);
214 bool LineMarkersAreDisabled() const { return DisableLineMarkers
; }
215 void HandleNewlinesInToken(const char *TokStr
, unsigned Len
);
217 /// MacroDefined - This hook is called whenever a macro definition is seen.
218 void MacroDefined(const Token
&MacroNameTok
,
219 const MacroDirective
*MD
) override
;
221 /// MacroUndefined - This hook is called whenever a macro #undef is seen.
222 void MacroUndefined(const Token
&MacroNameTok
,
223 const MacroDefinition
&MD
,
224 const MacroDirective
*Undef
) override
;
226 void BeginModule(const Module
*M
);
227 void EndModule(const Module
*M
);
229 } // end anonymous namespace
231 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo
,
234 startNewLineIfNeeded();
236 // Emit #line directives or GNU line markers depending on what mode we're in.
237 if (UseLineDirectives
) {
238 OS
<< "#line" << ' ' << LineNo
<< ' ' << '"';
239 OS
.write_escaped(CurFilename
);
242 OS
<< '#' << ' ' << LineNo
<< ' ' << '"';
243 OS
.write_escaped(CurFilename
);
247 OS
.write(Extra
, ExtraLen
);
249 if (FileType
== SrcMgr::C_System
)
251 else if (FileType
== SrcMgr::C_ExternCSystem
)
257 /// MoveToLine - Move the output to the source line specified by the location
258 /// object. We can do this by emitting some number of \n's, or be emitting a
259 /// #line directive. This returns false if already at the specified line, true
260 /// if some newlines were emitted.
261 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo
,
262 bool RequireStartOfLine
) {
263 // If it is required to start a new line or finish the current, insert
264 // vertical whitespace now and take it into account when moving to the
266 bool StartedNewLine
= false;
267 if ((RequireStartOfLine
&& EmittedTokensOnThisLine
) ||
268 EmittedDirectiveOnThisLine
) {
270 StartedNewLine
= true;
272 EmittedTokensOnThisLine
= false;
273 EmittedDirectiveOnThisLine
= false;
276 // If this line is "close enough" to the original line, just print newlines,
277 // otherwise print a #line directive.
278 if (CurLine
== LineNo
) {
279 // Nothing to do if we are already on the correct line.
280 } else if (MinimizeWhitespace
&& DisableLineMarkers
) {
281 // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
282 } else if (!StartedNewLine
&& LineNo
- CurLine
== 1) {
283 // Printing a single line has priority over printing a #line directive, even
284 // when minimizing whitespace which otherwise would print #line directives
285 // for every single line.
287 StartedNewLine
= true;
288 } else if (!DisableLineMarkers
) {
289 if (LineNo
- CurLine
<= 8) {
290 const char *NewLines
= "\n\n\n\n\n\n\n\n";
291 OS
.write(NewLines
, LineNo
- CurLine
);
293 // Emit a #line or line marker.
294 WriteLineInfo(LineNo
, nullptr, 0);
296 StartedNewLine
= true;
297 } else if (EmittedTokensOnThisLine
) {
298 // If we are not on the correct line and don't need to be line-correct,
299 // at least ensure we start on a new line.
301 StartedNewLine
= true;
304 if (StartedNewLine
) {
305 EmittedTokensOnThisLine
= false;
306 EmittedDirectiveOnThisLine
= false;
310 return StartedNewLine
;
313 void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
314 if (EmittedTokensOnThisLine
|| EmittedDirectiveOnThisLine
) {
316 EmittedTokensOnThisLine
= false;
317 EmittedDirectiveOnThisLine
= false;
321 /// FileChanged - Whenever the preprocessor enters or exits a #include file
322 /// it invokes this handler. Update our conception of the current source
324 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc
,
325 FileChangeReason Reason
,
326 SrcMgr::CharacteristicKind NewFileType
,
328 // Unless we are exiting a #include, make sure to skip ahead to the line the
329 // #include directive was at.
330 SourceManager
&SourceMgr
= SM
;
332 PresumedLoc UserLoc
= SourceMgr
.getPresumedLoc(Loc
);
333 if (UserLoc
.isInvalid())
336 unsigned NewLine
= UserLoc
.getLine();
338 if (Reason
== PPCallbacks::EnterFile
) {
339 SourceLocation IncludeLoc
= UserLoc
.getIncludeLoc();
340 if (IncludeLoc
.isValid())
341 MoveToLine(IncludeLoc
, /*RequireStartOfLine=*/false);
342 } else if (Reason
== PPCallbacks::SystemHeaderPragma
) {
343 // GCC emits the # directive for this directive on the line AFTER the
344 // directive and emits a bunch of spaces that aren't needed. This is because
345 // otherwise we will emit a line marker for THIS line, which requires an
346 // extra blank line after the directive to avoid making all following lines
347 // off by one. We can do better by simply incrementing NewLine here.
354 CurFilename
+= UserLoc
.getFilename();
355 FileType
= NewFileType
;
357 if (DisableLineMarkers
) {
358 if (!MinimizeWhitespace
)
359 startNewLineIfNeeded();
364 WriteLineInfo(CurLine
);
368 // Do not emit an enter marker for the main file (which we expect is the first
369 // entered file). This matches gcc, and improves compatibility with some tools
370 // which track the # line markers as a way to determine when the preprocessed
371 // output is in the context of the main file.
372 if (Reason
== PPCallbacks::EnterFile
&& !IsFirstFileEntered
) {
373 IsFirstFileEntered
= true;
378 case PPCallbacks::EnterFile
:
379 WriteLineInfo(CurLine
, " 1", 2);
381 case PPCallbacks::ExitFile
:
382 WriteLineInfo(CurLine
, " 2", 2);
384 case PPCallbacks::SystemHeaderPragma
:
385 case PPCallbacks::RenameFile
:
386 WriteLineInfo(CurLine
);
391 void PrintPPOutputPPCallbacks::InclusionDirective(
392 SourceLocation HashLoc
, const Token
&IncludeTok
, StringRef FileName
,
393 bool IsAngled
, CharSourceRange FilenameRange
, OptionalFileEntryRef File
,
394 StringRef SearchPath
, StringRef RelativePath
, const Module
*Imported
,
395 SrcMgr::CharacteristicKind FileType
) {
396 // In -dI mode, dump #include directives prior to dumping their content or
398 if (DumpIncludeDirectives
) {
399 MoveToLine(HashLoc
, /*RequireStartOfLine=*/true);
400 const std::string TokenText
= PP
.getSpelling(IncludeTok
);
401 assert(!TokenText
.empty());
402 OS
<< "#" << TokenText
<< " "
403 << (IsAngled
? '<' : '"') << FileName
<< (IsAngled
? '>' : '"')
404 << " /* clang -E -dI */";
405 setEmittedDirectiveOnThisLine();
408 // When preprocessing, turn implicit imports into module import pragmas.
410 switch (IncludeTok
.getIdentifierInfo()->getPPKeywordID()) {
411 case tok::pp_include
:
413 case tok::pp_include_next
:
414 MoveToLine(HashLoc
, /*RequireStartOfLine=*/true);
415 OS
<< "#pragma clang module import " << Imported
->getFullModuleName(true)
416 << " /* clang -E: implicit import for "
417 << "#" << PP
.getSpelling(IncludeTok
) << " "
418 << (IsAngled
? '<' : '"') << FileName
<< (IsAngled
? '>' : '"')
420 setEmittedDirectiveOnThisLine();
423 case tok::pp___include_macros
:
424 // #__include_macros has no effect on a user of a preprocessed source
425 // file; the only effect is on preprocessing.
427 // FIXME: That's not *quite* true: it causes the module in question to
428 // be loaded, which can affect downstream diagnostics.
432 llvm_unreachable("unknown include directive kind");
438 /// Handle entering the scope of a module during a module compilation.
439 void PrintPPOutputPPCallbacks::BeginModule(const Module
*M
) {
440 startNewLineIfNeeded();
441 OS
<< "#pragma clang module begin " << M
->getFullModuleName(true);
442 setEmittedDirectiveOnThisLine();
445 /// Handle leaving the scope of a module during a module compilation.
446 void PrintPPOutputPPCallbacks::EndModule(const Module
*M
) {
447 startNewLineIfNeeded();
448 OS
<< "#pragma clang module end /*" << M
->getFullModuleName(true) << "*/";
449 setEmittedDirectiveOnThisLine();
452 /// Ident - Handle #ident directives when read by the preprocessor.
454 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc
, StringRef S
) {
455 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
457 OS
.write("#ident ", strlen("#ident "));
458 OS
.write(S
.begin(), S
.size());
459 setEmittedTokensOnThisLine();
462 /// MacroDefined - This hook is called whenever a macro definition is seen.
463 void PrintPPOutputPPCallbacks::MacroDefined(const Token
&MacroNameTok
,
464 const MacroDirective
*MD
) {
465 const MacroInfo
*MI
= MD
->getMacroInfo();
466 // Print out macro definitions in -dD mode and when we have -fdirectives-only
467 // for C++20 header units.
468 if ((!DumpDefines
&& !DirectivesOnly
) ||
469 // Ignore __FILE__ etc.
470 MI
->isBuiltinMacro())
473 SourceLocation DefLoc
= MI
->getDefinitionLoc();
474 if (DirectivesOnly
&& !MI
->isUsed()) {
475 SourceManager
&SM
= PP
.getSourceManager();
476 if (SM
.isWrittenInBuiltinFile(DefLoc
) ||
477 SM
.isWrittenInCommandLineFile(DefLoc
))
480 MoveToLine(DefLoc
, /*RequireStartOfLine=*/true);
481 PrintMacroDefinition(*MacroNameTok
.getIdentifierInfo(), *MI
, PP
, OS
);
482 setEmittedDirectiveOnThisLine();
485 void PrintPPOutputPPCallbacks::MacroUndefined(const Token
&MacroNameTok
,
486 const MacroDefinition
&MD
,
487 const MacroDirective
*Undef
) {
488 // Print out macro definitions in -dD mode and when we have -fdirectives-only
489 // for C++20 header units.
490 if (!DumpDefines
&& !DirectivesOnly
)
493 MoveToLine(MacroNameTok
.getLocation(), /*RequireStartOfLine=*/true);
494 OS
<< "#undef " << MacroNameTok
.getIdentifierInfo()->getName();
495 setEmittedDirectiveOnThisLine();
498 static void outputPrintable(raw_ostream
&OS
, StringRef Str
) {
499 for (unsigned char Char
: Str
) {
500 if (isPrintable(Char
) && Char
!= '\\' && Char
!= '"')
502 else // Output anything hard as an octal escape.
504 << (char)('0' + ((Char
>> 6) & 7))
505 << (char)('0' + ((Char
>> 3) & 7))
506 << (char)('0' + ((Char
>> 0) & 7));
510 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc
,
512 PragmaMessageKind Kind
,
514 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
516 if (!Namespace
.empty())
517 OS
<< Namespace
<< ' ';
530 outputPrintable(OS
, Str
);
532 if (Kind
== PMK_Message
)
534 setEmittedDirectiveOnThisLine();
537 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc
,
538 StringRef DebugType
) {
539 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
541 OS
<< "#pragma clang __debug ";
544 setEmittedDirectiveOnThisLine();
547 void PrintPPOutputPPCallbacks::
548 PragmaDiagnosticPush(SourceLocation Loc
, StringRef Namespace
) {
549 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
550 OS
<< "#pragma " << Namespace
<< " diagnostic push";
551 setEmittedDirectiveOnThisLine();
554 void PrintPPOutputPPCallbacks::
555 PragmaDiagnosticPop(SourceLocation Loc
, StringRef Namespace
) {
556 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
557 OS
<< "#pragma " << Namespace
<< " diagnostic pop";
558 setEmittedDirectiveOnThisLine();
561 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc
,
565 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
566 OS
<< "#pragma " << Namespace
<< " diagnostic ";
568 case diag::Severity::Remark
:
571 case diag::Severity::Warning
:
574 case diag::Severity::Error
:
577 case diag::Severity::Ignored
:
580 case diag::Severity::Fatal
:
584 OS
<< " \"" << Str
<< '"';
585 setEmittedDirectiveOnThisLine();
588 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc
,
589 PragmaWarningSpecifier WarningSpec
,
591 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
593 OS
<< "#pragma warning(";
594 switch(WarningSpec
) {
595 case PWS_Default
: OS
<< "default"; break;
596 case PWS_Disable
: OS
<< "disable"; break;
597 case PWS_Error
: OS
<< "error"; break;
598 case PWS_Once
: OS
<< "once"; break;
599 case PWS_Suppress
: OS
<< "suppress"; break;
600 case PWS_Level1
: OS
<< '1'; break;
601 case PWS_Level2
: OS
<< '2'; break;
602 case PWS_Level3
: OS
<< '3'; break;
603 case PWS_Level4
: OS
<< '4'; break;
607 for (ArrayRef
<int>::iterator I
= Ids
.begin(), E
= Ids
.end(); I
!= E
; ++I
)
610 setEmittedDirectiveOnThisLine();
613 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc
,
615 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
616 OS
<< "#pragma warning(push";
620 setEmittedDirectiveOnThisLine();
623 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc
) {
624 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
625 OS
<< "#pragma warning(pop)";
626 setEmittedDirectiveOnThisLine();
629 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc
,
631 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
632 OS
<< "#pragma character_execution_set(push";
636 setEmittedDirectiveOnThisLine();
639 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc
) {
640 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
641 OS
<< "#pragma character_execution_set(pop)";
642 setEmittedDirectiveOnThisLine();
645 void PrintPPOutputPPCallbacks::
646 PragmaAssumeNonNullBegin(SourceLocation Loc
) {
647 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
648 OS
<< "#pragma clang assume_nonnull begin";
649 setEmittedDirectiveOnThisLine();
652 void PrintPPOutputPPCallbacks::
653 PragmaAssumeNonNullEnd(SourceLocation Loc
) {
654 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
655 OS
<< "#pragma clang assume_nonnull end";
656 setEmittedDirectiveOnThisLine();
659 void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token
&Tok
,
661 bool RequireSameLine
) {
662 // These tokens are not expanded to anything and don't need whitespace before
664 if (Tok
.is(tok::eof
) ||
665 (Tok
.isAnnotation() && !Tok
.is(tok::annot_header_unit
) &&
666 !Tok
.is(tok::annot_module_begin
) && !Tok
.is(tok::annot_module_end
)))
669 // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
670 if ((!RequireSameLine
|| EmittedDirectiveOnThisLine
) &&
671 MoveToLine(Tok
, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine
)) {
672 if (MinimizeWhitespace
) {
673 // Avoid interpreting hash as a directive under -fpreprocessed.
674 if (Tok
.is(tok::hash
))
677 // Print out space characters so that the first token on a line is
678 // indented for easy reading.
679 unsigned ColNo
= SM
.getExpansionColumnNumber(Tok
.getLocation());
681 // The first token on a line can have a column number of 1, yet still
682 // expect leading white space, if a macro expansion in column 1 starts
683 // with an empty macro argument, or an empty nested macro expansion. In
684 // this case, move the token to column 2.
685 if (ColNo
== 1 && Tok
.hasLeadingSpace())
688 // This hack prevents stuff like:
690 // HASH define foo bar
691 // From having the # character end up at column 1, which makes it so it
692 // is not handled as a #define next time through the preprocessor if in
693 // -fpreprocessed mode.
694 if (ColNo
<= 1 && Tok
.is(tok::hash
))
697 // Otherwise, indent the appropriate number of spaces.
698 for (; ColNo
> 1; --ColNo
)
702 // Insert whitespace between the previous and next token if either
703 // - The caller requires it
704 // - The input had whitespace between them and we are not in
705 // whitespace-minimization mode
706 // - The whitespace is necessary to keep the tokens apart and there is not
707 // already a newline between them
708 if (RequireSpace
|| (!MinimizeWhitespace
&& Tok
.hasLeadingSpace()) ||
709 ((EmittedTokensOnThisLine
|| EmittedDirectiveOnThisLine
) &&
710 AvoidConcat(PrevPrevTok
, PrevTok
, Tok
)))
714 PrevPrevTok
= PrevTok
;
718 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr
,
720 unsigned NumNewlines
= 0;
721 for (; Len
; --Len
, ++TokStr
) {
722 if (*TokStr
!= '\n' &&
728 // If we have \n\r or \r\n, skip both and count as one line.
730 (TokStr
[1] == '\n' || TokStr
[1] == '\r') &&
731 TokStr
[0] != TokStr
[1]) {
737 if (NumNewlines
== 0) return;
739 CurLine
+= NumNewlines
;
744 struct UnknownPragmaHandler
: public PragmaHandler
{
746 PrintPPOutputPPCallbacks
*Callbacks
;
748 // Set to true if tokens should be expanded
749 bool ShouldExpandTokens
;
751 UnknownPragmaHandler(const char *prefix
, PrintPPOutputPPCallbacks
*callbacks
,
752 bool RequireTokenExpansion
)
753 : Prefix(prefix
), Callbacks(callbacks
),
754 ShouldExpandTokens(RequireTokenExpansion
) {}
755 void HandlePragma(Preprocessor
&PP
, PragmaIntroducer Introducer
,
756 Token
&PragmaTok
) override
{
757 // Figure out what line we went to and insert the appropriate number of
758 // newline characters.
759 Callbacks
->MoveToLine(PragmaTok
.getLocation(), /*RequireStartOfLine=*/true);
760 Callbacks
->OS
.write(Prefix
, strlen(Prefix
));
761 Callbacks
->setEmittedTokensOnThisLine();
763 if (ShouldExpandTokens
) {
764 // The first token does not have expanded macros. Expand them, if
766 auto Toks
= std::make_unique
<Token
[]>(1);
768 PP
.EnterTokenStream(std::move(Toks
), /*NumToks=*/1,
769 /*DisableMacroExpansion=*/false,
770 /*IsReinject=*/false);
774 // Read and print all of the pragma tokens.
776 while (PragmaTok
.isNot(tok::eod
)) {
777 Callbacks
->HandleWhitespaceBeforeTok(PragmaTok
, /*RequireSpace=*/IsFirst
,
778 /*RequireSameLine=*/true);
780 std::string TokSpell
= PP
.getSpelling(PragmaTok
);
781 Callbacks
->OS
.write(&TokSpell
[0], TokSpell
.size());
782 Callbacks
->setEmittedTokensOnThisLine();
784 if (ShouldExpandTokens
)
787 PP
.LexUnexpandedToken(PragmaTok
);
789 Callbacks
->setEmittedDirectiveOnThisLine();
792 } // end anonymous namespace
795 static void PrintPreprocessedTokens(Preprocessor
&PP
, Token
&Tok
,
796 PrintPPOutputPPCallbacks
*Callbacks
,
798 bool DropComments
= PP
.getLangOpts().TraditionalCPP
&&
799 !PP
.getCommentRetentionState();
801 bool IsStartOfLine
= false;
804 // Two lines joined with line continuation ('\' as last character on the
805 // line) must be emitted as one line even though Tok.getLine() returns two
806 // different values. In this situation Tok.isAtStartOfLine() is false even
807 // though it may be the first token on the lexical line. When
808 // dropping/skipping a token that is at the start of a line, propagate the
809 // start-of-line-ness to the next token to not append it to the previous
811 IsStartOfLine
= IsStartOfLine
|| Tok
.isAtStartOfLine();
813 Callbacks
->HandleWhitespaceBeforeTok(Tok
, /*RequireSpace=*/false,
814 /*RequireSameLine=*/!IsStartOfLine
);
816 if (DropComments
&& Tok
.is(tok::comment
)) {
817 // Skip comments. Normally the preprocessor does not generate
818 // tok::comment nodes at all when not keeping comments, but under
819 // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
822 } else if (Tok
.is(tok::eod
)) {
823 // Don't print end of directive tokens, since they are typically newlines
824 // that mess up our line tracking. These come from unknown pre-processor
825 // directives or hash-prefixed comments in standalone assembly files.
827 // FIXME: The token on the next line after #include should have
828 // Tok.isAtStartOfLine() set.
829 IsStartOfLine
= true;
831 } else if (Tok
.is(tok::annot_module_include
)) {
832 // PrintPPOutputPPCallbacks::InclusionDirective handles producing
833 // appropriate output here. Ignore this token entirely.
835 IsStartOfLine
= true;
837 } else if (Tok
.is(tok::annot_module_begin
)) {
838 // FIXME: We retrieve this token after the FileChanged callback, and
839 // retrieve the module_end token before the FileChanged callback, so
840 // we render this within the file and render the module end outside the
841 // file, but this is backwards from the token locations: the module_begin
842 // token is at the include location (outside the file) and the module_end
843 // token is at the EOF location (within the file).
844 Callbacks
->BeginModule(
845 reinterpret_cast<Module
*>(Tok
.getAnnotationValue()));
847 IsStartOfLine
= true;
849 } else if (Tok
.is(tok::annot_module_end
)) {
850 Callbacks
->EndModule(
851 reinterpret_cast<Module
*>(Tok
.getAnnotationValue()));
853 IsStartOfLine
= true;
855 } else if (Tok
.is(tok::annot_header_unit
)) {
856 // This is a header-name that has been (effectively) converted into a
858 // FIXME: The module name could contain non-identifier module name
859 // components. We don't have a good way to round-trip those.
860 Module
*M
= reinterpret_cast<Module
*>(Tok
.getAnnotationValue());
861 std::string Name
= M
->getFullModuleName();
862 OS
.write(Name
.data(), Name
.size());
863 Callbacks
->HandleNewlinesInToken(Name
.data(), Name
.size());
864 } else if (Tok
.isAnnotation()) {
865 // Ignore annotation tokens created by pragmas - the pragmas themselves
866 // will be reproduced in the preprocessed output.
869 } else if (IdentifierInfo
*II
= Tok
.getIdentifierInfo()) {
871 } else if (Tok
.isLiteral() && !Tok
.needsCleaning() &&
872 Tok
.getLiteralData()) {
873 OS
.write(Tok
.getLiteralData(), Tok
.getLength());
874 } else if (Tok
.getLength() < std::size(Buffer
)) {
875 const char *TokPtr
= Buffer
;
876 unsigned Len
= PP
.getSpelling(Tok
, TokPtr
);
877 OS
.write(TokPtr
, Len
);
879 // Tokens that can contain embedded newlines need to adjust our current
881 // FIXME: The token may end with a newline in which case
882 // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
884 if (Tok
.getKind() == tok::comment
|| Tok
.getKind() == tok::unknown
)
885 Callbacks
->HandleNewlinesInToken(TokPtr
, Len
);
886 if (Tok
.is(tok::comment
) && Len
>= 2 && TokPtr
[0] == '/' &&
888 // It's a line comment;
889 // Ensure that we don't concatenate anything behind it.
890 Callbacks
->setEmittedDirectiveOnThisLine();
893 std::string S
= PP
.getSpelling(Tok
);
894 OS
.write(S
.data(), S
.size());
896 // Tokens that can contain embedded newlines need to adjust our current
898 if (Tok
.getKind() == tok::comment
|| Tok
.getKind() == tok::unknown
)
899 Callbacks
->HandleNewlinesInToken(S
.data(), S
.size());
900 if (Tok
.is(tok::comment
) && S
.size() >= 2 && S
[0] == '/' && S
[1] == '/') {
901 // It's a line comment;
902 // Ensure that we don't concatenate anything behind it.
903 Callbacks
->setEmittedDirectiveOnThisLine();
906 Callbacks
->setEmittedTokensOnThisLine();
907 IsStartOfLine
= false;
909 if (Tok
.is(tok::eof
)) break;
915 typedef std::pair
<const IdentifierInfo
*, MacroInfo
*> id_macro_pair
;
916 static int MacroIDCompare(const id_macro_pair
*LHS
, const id_macro_pair
*RHS
) {
917 return LHS
->first
->getName().compare(RHS
->first
->getName());
920 static void DoPrintMacros(Preprocessor
&PP
, raw_ostream
*OS
) {
921 // Ignore unknown pragmas.
924 // -dM mode just scans and ignores all tokens in the files, then dumps out
925 // the macro table at the end.
926 PP
.EnterMainSourceFile();
930 while (Tok
.isNot(tok::eof
));
932 SmallVector
<id_macro_pair
, 128> MacrosByID
;
933 for (Preprocessor::macro_iterator I
= PP
.macro_begin(), E
= PP
.macro_end();
935 auto *MD
= I
->second
.getLatest();
936 if (MD
&& MD
->isDefined())
937 MacrosByID
.push_back(id_macro_pair(I
->first
, MD
->getMacroInfo()));
939 llvm::array_pod_sort(MacrosByID
.begin(), MacrosByID
.end(), MacroIDCompare
);
941 for (unsigned i
= 0, e
= MacrosByID
.size(); i
!= e
; ++i
) {
942 MacroInfo
&MI
= *MacrosByID
[i
].second
;
943 // Ignore computed macros like __LINE__ and friends.
944 if (MI
.isBuiltinMacro()) continue;
946 PrintMacroDefinition(*MacrosByID
[i
].first
, MI
, PP
, *OS
);
951 /// DoPrintPreprocessedInput - This implements -E mode.
953 void clang::DoPrintPreprocessedInput(Preprocessor
&PP
, raw_ostream
*OS
,
954 const PreprocessorOutputOptions
&Opts
) {
955 // Show macros with no output is handled specially.
957 assert(Opts
.ShowMacros
&& "Not yet implemented!");
958 DoPrintMacros(PP
, OS
);
962 // Inform the preprocessor whether we want it to retain comments or not, due
964 PP
.SetCommentRetentionState(Opts
.ShowComments
, Opts
.ShowMacroComments
);
966 PrintPPOutputPPCallbacks
*Callbacks
= new PrintPPOutputPPCallbacks(
967 PP
, *OS
, !Opts
.ShowLineMarkers
, Opts
.ShowMacros
,
968 Opts
.ShowIncludeDirectives
, Opts
.UseLineDirectives
,
969 Opts
.MinimizeWhitespace
, Opts
.DirectivesOnly
);
971 // Expand macros in pragmas with -fms-extensions. The assumption is that
972 // the majority of pragmas in such a file will be Microsoft pragmas.
973 // Remember the handlers we will add so that we can remove them later.
974 std::unique_ptr
<UnknownPragmaHandler
> MicrosoftExtHandler(
975 new UnknownPragmaHandler(
976 "#pragma", Callbacks
,
977 /*RequireTokenExpansion=*/PP
.getLangOpts().MicrosoftExt
));
979 std::unique_ptr
<UnknownPragmaHandler
> GCCHandler(new UnknownPragmaHandler(
980 "#pragma GCC", Callbacks
,
981 /*RequireTokenExpansion=*/PP
.getLangOpts().MicrosoftExt
));
983 std::unique_ptr
<UnknownPragmaHandler
> ClangHandler(new UnknownPragmaHandler(
984 "#pragma clang", Callbacks
,
985 /*RequireTokenExpansion=*/PP
.getLangOpts().MicrosoftExt
));
987 PP
.AddPragmaHandler(MicrosoftExtHandler
.get());
988 PP
.AddPragmaHandler("GCC", GCCHandler
.get());
989 PP
.AddPragmaHandler("clang", ClangHandler
.get());
991 // The tokens after pragma omp need to be expanded.
993 // OpenMP [2.1, Directive format]
994 // Preprocessing tokens following the #pragma omp are subject to macro
996 std::unique_ptr
<UnknownPragmaHandler
> OpenMPHandler(
997 new UnknownPragmaHandler("#pragma omp", Callbacks
,
998 /*RequireTokenExpansion=*/true));
999 PP
.AddPragmaHandler("omp", OpenMPHandler
.get());
1001 PP
.addPPCallbacks(std::unique_ptr
<PPCallbacks
>(Callbacks
));
1003 // After we have configured the preprocessor, enter the main file.
1004 PP
.EnterMainSourceFile();
1005 if (Opts
.DirectivesOnly
)
1006 PP
.SetMacroExpansionOnlyInDirectives();
1008 // Consume all of the tokens that come from the predefines buffer. Those
1009 // should not be emitted into the output and are guaranteed to be at the
1011 const SourceManager
&SourceMgr
= PP
.getSourceManager();
1015 if (Tok
.is(tok::eof
) || !Tok
.getLocation().isFileID())
1018 PresumedLoc PLoc
= SourceMgr
.getPresumedLoc(Tok
.getLocation());
1019 if (PLoc
.isInvalid())
1022 if (strcmp(PLoc
.getFilename(), "<built-in>"))
1026 // Read all the preprocessed tokens, printing them out to the stream.
1027 PrintPreprocessedTokens(PP
, Tok
, Callbacks
, *OS
);
1030 // Remove the handlers we just added to leave the preprocessor in a sane state
1031 // so that it can be reused (for example by a clang::Parser instance).
1032 PP
.RemovePragmaHandler(MicrosoftExtHandler
.get());
1033 PP
.RemovePragmaHandler("GCC", GCCHandler
.get());
1034 PP
.RemovePragmaHandler("clang", ClangHandler
.get());
1035 PP
.RemovePragmaHandler("omp", OpenMPHandler
.get());