1 //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This code simply runs the preprocessor on the input file and prints out the
10 // result. This is the traditional behavior of the -E option.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Frontend/Utils.h"
15 #include "clang/Basic/CharInfo.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Frontend/PreprocessorOutputOptions.h"
19 #include "clang/Lex/MacroInfo.h"
20 #include "clang/Lex/PPCallbacks.h"
21 #include "clang/Lex/Pragma.h"
22 #include "clang/Lex/Preprocessor.h"
23 #include "clang/Lex/TokenConcatenation.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/raw_ostream.h"
30 using namespace clang
;
32 /// PrintMacroDefinition - Print a macro definition in a form that will be
33 /// properly accepted back as a definition.
34 static void PrintMacroDefinition(const IdentifierInfo
&II
, const MacroInfo
&MI
,
35 Preprocessor
&PP
, raw_ostream
*OS
) {
36 *OS
<< "#define " << II
.getName();
38 if (MI
.isFunctionLike()) {
40 if (!MI
.param_empty()) {
41 MacroInfo::param_iterator AI
= MI
.param_begin(), E
= MI
.param_end();
42 for (; AI
+1 != E
; ++AI
) {
43 *OS
<< (*AI
)->getName();
48 if ((*AI
)->getName() == "__VA_ARGS__")
51 *OS
<< (*AI
)->getName();
54 if (MI
.isGNUVarargs())
55 *OS
<< "..."; // #define foo(x...)
60 // GCC always emits a space, even if the macro body is empty. However, do not
61 // want to emit two spaces if the first token has a leading space.
62 if (MI
.tokens_empty() || !MI
.tokens_begin()->hasLeadingSpace())
65 SmallString
<128> SpellingBuffer
;
66 for (const auto &T
: MI
.tokens()) {
67 if (T
.hasLeadingSpace())
70 *OS
<< PP
.getSpelling(T
, SpellingBuffer
);
74 //===----------------------------------------------------------------------===//
75 // Preprocessed token printer
76 //===----------------------------------------------------------------------===//
79 class PrintPPOutputPPCallbacks
: public PPCallbacks
{
82 TokenConcatenation ConcatInfo
;
88 bool EmittedTokensOnThisLine
;
89 bool EmittedDirectiveOnThisLine
;
90 SrcMgr::CharacteristicKind FileType
;
91 SmallString
<512> CurFilename
;
93 bool DisableLineMarkers
;
95 bool DumpIncludeDirectives
;
96 bool UseLineDirectives
;
97 bool IsFirstFileEntered
;
98 bool MinimizeWhitespace
;
100 bool KeepSystemIncludes
;
102 std::unique_ptr
<llvm::raw_null_ostream
> NullOS
;
108 PrintPPOutputPPCallbacks(Preprocessor
&pp
, raw_ostream
*os
, bool lineMarkers
,
109 bool defines
, bool DumpIncludeDirectives
,
110 bool UseLineDirectives
, bool MinimizeWhitespace
,
111 bool DirectivesOnly
, bool KeepSystemIncludes
)
112 : PP(pp
), SM(PP
.getSourceManager()), ConcatInfo(PP
), OS(os
),
113 DisableLineMarkers(lineMarkers
), DumpDefines(defines
),
114 DumpIncludeDirectives(DumpIncludeDirectives
),
115 UseLineDirectives(UseLineDirectives
),
116 MinimizeWhitespace(MinimizeWhitespace
), DirectivesOnly(DirectivesOnly
),
117 KeepSystemIncludes(KeepSystemIncludes
), OrigOS(os
) {
119 CurFilename
+= "<uninit>";
120 EmittedTokensOnThisLine
= false;
121 EmittedDirectiveOnThisLine
= false;
122 FileType
= SrcMgr::C_User
;
124 IsFirstFileEntered
= false;
125 if (KeepSystemIncludes
)
126 NullOS
= std::make_unique
<llvm::raw_null_ostream
>();
128 PrevTok
.startToken();
129 PrevPrevTok
.startToken();
132 bool isMinimizeWhitespace() const { return MinimizeWhitespace
; }
134 void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine
= true; }
135 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine
; }
137 void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine
= true; }
138 bool hasEmittedDirectiveOnThisLine() const {
139 return EmittedDirectiveOnThisLine
;
142 /// Ensure that the output stream position is at the beginning of a new line
143 /// and inserts one if it does not. It is intended to ensure that directives
144 /// inserted by the directives not from the input source (such as #line) are
145 /// in the first column. To insert newlines that represent the input, use
146 /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
147 void startNewLineIfNeeded();
149 void FileChanged(SourceLocation Loc
, FileChangeReason Reason
,
150 SrcMgr::CharacteristicKind FileType
,
151 FileID PrevFID
) override
;
152 void InclusionDirective(SourceLocation HashLoc
, const Token
&IncludeTok
,
153 StringRef FileName
, bool IsAngled
,
154 CharSourceRange FilenameRange
,
155 OptionalFileEntryRef File
, StringRef SearchPath
,
156 StringRef RelativePath
, const Module
*Imported
,
157 SrcMgr::CharacteristicKind FileType
) override
;
158 void Ident(SourceLocation Loc
, StringRef str
) override
;
159 void PragmaMessage(SourceLocation Loc
, StringRef Namespace
,
160 PragmaMessageKind Kind
, StringRef Str
) override
;
161 void PragmaDebug(SourceLocation Loc
, StringRef DebugType
) override
;
162 void PragmaDiagnosticPush(SourceLocation Loc
, StringRef Namespace
) override
;
163 void PragmaDiagnosticPop(SourceLocation Loc
, StringRef Namespace
) override
;
164 void PragmaDiagnostic(SourceLocation Loc
, StringRef Namespace
,
165 diag::Severity Map
, StringRef Str
) override
;
166 void PragmaWarning(SourceLocation Loc
, PragmaWarningSpecifier WarningSpec
,
167 ArrayRef
<int> Ids
) override
;
168 void PragmaWarningPush(SourceLocation Loc
, int Level
) override
;
169 void PragmaWarningPop(SourceLocation Loc
) override
;
170 void PragmaExecCharsetPush(SourceLocation Loc
, StringRef Str
) override
;
171 void PragmaExecCharsetPop(SourceLocation Loc
) override
;
172 void PragmaAssumeNonNullBegin(SourceLocation Loc
) override
;
173 void PragmaAssumeNonNullEnd(SourceLocation Loc
) override
;
175 /// Insert whitespace before emitting the next token.
177 /// @param Tok Next token to be emitted.
178 /// @param RequireSpace Ensure at least one whitespace is emitted. Useful
179 /// if non-tokens have been emitted to the stream.
180 /// @param RequireSameLine Never emit newlines. Useful when semantics depend
181 /// on being on the same line, such as directives.
182 void HandleWhitespaceBeforeTok(const Token
&Tok
, bool RequireSpace
,
183 bool RequireSameLine
);
185 /// Move to the line of the provided source location. This will
186 /// return true if a newline was inserted or if
187 /// the requested location is the first token on the first line.
188 /// In these cases the next output will be the first column on the line and
189 /// make it possible to insert indention. The newline was inserted
190 /// implicitly when at the beginning of the file.
192 /// @param Tok Token where to move to.
193 /// @param RequireStartOfLine Whether the next line depends on being in the
194 /// first column, such as a directive.
196 /// @return Whether column adjustments are necessary.
197 bool MoveToLine(const Token
&Tok
, bool RequireStartOfLine
) {
198 PresumedLoc PLoc
= SM
.getPresumedLoc(Tok
.getLocation());
199 unsigned TargetLine
= PLoc
.isValid() ? PLoc
.getLine() : CurLine
;
201 Tok
.isAtStartOfLine() && PLoc
.isValid() && PLoc
.getLine() == 1;
202 return MoveToLine(TargetLine
, RequireStartOfLine
) || IsFirstInFile
;
205 /// Move to the line of the provided source location. Returns true if a new
206 /// line was inserted.
207 bool MoveToLine(SourceLocation Loc
, bool RequireStartOfLine
) {
208 PresumedLoc PLoc
= SM
.getPresumedLoc(Loc
);
209 unsigned TargetLine
= PLoc
.isValid() ? PLoc
.getLine() : CurLine
;
210 return MoveToLine(TargetLine
, RequireStartOfLine
);
212 bool MoveToLine(unsigned LineNo
, bool RequireStartOfLine
);
214 bool AvoidConcat(const Token
&PrevPrevTok
, const Token
&PrevTok
,
216 return ConcatInfo
.AvoidConcat(PrevPrevTok
, PrevTok
, Tok
);
218 void WriteLineInfo(unsigned LineNo
, const char *Extra
=nullptr,
219 unsigned ExtraLen
=0);
220 bool LineMarkersAreDisabled() const { return DisableLineMarkers
; }
221 void HandleNewlinesInToken(const char *TokStr
, unsigned Len
);
223 /// MacroDefined - This hook is called whenever a macro definition is seen.
224 void MacroDefined(const Token
&MacroNameTok
,
225 const MacroDirective
*MD
) override
;
227 /// MacroUndefined - This hook is called whenever a macro #undef is seen.
228 void MacroUndefined(const Token
&MacroNameTok
,
229 const MacroDefinition
&MD
,
230 const MacroDirective
*Undef
) override
;
232 void BeginModule(const Module
*M
);
233 void EndModule(const Module
*M
);
235 } // end anonymous namespace
237 void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo
,
240 startNewLineIfNeeded();
242 // Emit #line directives or GNU line markers depending on what mode we're in.
243 if (UseLineDirectives
) {
244 *OS
<< "#line" << ' ' << LineNo
<< ' ' << '"';
245 OS
->write_escaped(CurFilename
);
248 *OS
<< '#' << ' ' << LineNo
<< ' ' << '"';
249 OS
->write_escaped(CurFilename
);
253 OS
->write(Extra
, ExtraLen
);
255 if (FileType
== SrcMgr::C_System
)
257 else if (FileType
== SrcMgr::C_ExternCSystem
)
258 OS
->write(" 3 4", 4);
263 /// MoveToLine - Move the output to the source line specified by the location
264 /// object. We can do this by emitting some number of \n's, or be emitting a
265 /// #line directive. This returns false if already at the specified line, true
266 /// if some newlines were emitted.
267 bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo
,
268 bool RequireStartOfLine
) {
269 // If it is required to start a new line or finish the current, insert
270 // vertical whitespace now and take it into account when moving to the
272 bool StartedNewLine
= false;
273 if ((RequireStartOfLine
&& EmittedTokensOnThisLine
) ||
274 EmittedDirectiveOnThisLine
) {
276 StartedNewLine
= true;
278 EmittedTokensOnThisLine
= false;
279 EmittedDirectiveOnThisLine
= false;
282 // If this line is "close enough" to the original line, just print newlines,
283 // otherwise print a #line directive.
284 if (CurLine
== LineNo
) {
285 // Nothing to do if we are already on the correct line.
286 } else if (MinimizeWhitespace
&& DisableLineMarkers
) {
287 // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
288 } else if (!StartedNewLine
&& LineNo
- CurLine
== 1) {
289 // Printing a single line has priority over printing a #line directive, even
290 // when minimizing whitespace which otherwise would print #line directives
291 // for every single line.
293 StartedNewLine
= true;
294 } else if (!DisableLineMarkers
) {
295 if (LineNo
- CurLine
<= 8) {
296 const char *NewLines
= "\n\n\n\n\n\n\n\n";
297 OS
->write(NewLines
, LineNo
- CurLine
);
299 // Emit a #line or line marker.
300 WriteLineInfo(LineNo
, nullptr, 0);
302 StartedNewLine
= true;
303 } else if (EmittedTokensOnThisLine
) {
304 // If we are not on the correct line and don't need to be line-correct,
305 // at least ensure we start on a new line.
307 StartedNewLine
= true;
310 if (StartedNewLine
) {
311 EmittedTokensOnThisLine
= false;
312 EmittedDirectiveOnThisLine
= false;
316 return StartedNewLine
;
319 void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
320 if (EmittedTokensOnThisLine
|| EmittedDirectiveOnThisLine
) {
322 EmittedTokensOnThisLine
= false;
323 EmittedDirectiveOnThisLine
= false;
327 /// FileChanged - Whenever the preprocessor enters or exits a #include file
328 /// it invokes this handler. Update our conception of the current source
330 void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc
,
331 FileChangeReason Reason
,
332 SrcMgr::CharacteristicKind NewFileType
,
334 // Unless we are exiting a #include, make sure to skip ahead to the line the
335 // #include directive was at.
336 SourceManager
&SourceMgr
= SM
;
338 PresumedLoc UserLoc
= SourceMgr
.getPresumedLoc(Loc
);
339 if (UserLoc
.isInvalid())
342 unsigned NewLine
= UserLoc
.getLine();
344 if (Reason
== PPCallbacks::EnterFile
) {
345 SourceLocation IncludeLoc
= UserLoc
.getIncludeLoc();
346 if (IncludeLoc
.isValid())
347 MoveToLine(IncludeLoc
, /*RequireStartOfLine=*/false);
348 } else if (Reason
== PPCallbacks::SystemHeaderPragma
) {
349 // GCC emits the # directive for this directive on the line AFTER the
350 // directive and emits a bunch of spaces that aren't needed. This is because
351 // otherwise we will emit a line marker for THIS line, which requires an
352 // extra blank line after the directive to avoid making all following lines
353 // off by one. We can do better by simply incrementing NewLine here.
359 // In KeepSystemIncludes mode, redirect OS as needed.
360 if (KeepSystemIncludes
&& (isSystem(FileType
) != isSystem(NewFileType
)))
361 OS
= isSystem(FileType
) ? OrigOS
: NullOS
.get();
364 CurFilename
+= UserLoc
.getFilename();
365 FileType
= NewFileType
;
367 if (DisableLineMarkers
) {
368 if (!MinimizeWhitespace
)
369 startNewLineIfNeeded();
374 WriteLineInfo(CurLine
);
378 // Do not emit an enter marker for the main file (which we expect is the first
379 // entered file). This matches gcc, and improves compatibility with some tools
380 // which track the # line markers as a way to determine when the preprocessed
381 // output is in the context of the main file.
382 if (Reason
== PPCallbacks::EnterFile
&& !IsFirstFileEntered
) {
383 IsFirstFileEntered
= true;
388 case PPCallbacks::EnterFile
:
389 WriteLineInfo(CurLine
, " 1", 2);
391 case PPCallbacks::ExitFile
:
392 WriteLineInfo(CurLine
, " 2", 2);
394 case PPCallbacks::SystemHeaderPragma
:
395 case PPCallbacks::RenameFile
:
396 WriteLineInfo(CurLine
);
401 void PrintPPOutputPPCallbacks::InclusionDirective(
402 SourceLocation HashLoc
, const Token
&IncludeTok
, StringRef FileName
,
403 bool IsAngled
, CharSourceRange FilenameRange
, OptionalFileEntryRef File
,
404 StringRef SearchPath
, StringRef RelativePath
, const Module
*Imported
,
405 SrcMgr::CharacteristicKind FileType
) {
406 // In -dI mode, dump #include directives prior to dumping their content or
407 // interpretation. Similar for -fkeep-system-includes.
408 if (DumpIncludeDirectives
|| (KeepSystemIncludes
&& isSystem(FileType
))) {
409 MoveToLine(HashLoc
, /*RequireStartOfLine=*/true);
410 const std::string TokenText
= PP
.getSpelling(IncludeTok
);
411 assert(!TokenText
.empty());
412 *OS
<< "#" << TokenText
<< " "
413 << (IsAngled
? '<' : '"') << FileName
<< (IsAngled
? '>' : '"')
415 << (DumpIncludeDirectives
? "-dI" : "-fkeep-system-includes")
417 setEmittedDirectiveOnThisLine();
420 // When preprocessing, turn implicit imports into module import pragmas.
422 switch (IncludeTok
.getIdentifierInfo()->getPPKeywordID()) {
423 case tok::pp_include
:
425 case tok::pp_include_next
:
426 MoveToLine(HashLoc
, /*RequireStartOfLine=*/true);
427 *OS
<< "#pragma clang module import "
428 << Imported
->getFullModuleName(true)
429 << " /* clang -E: implicit import for "
430 << "#" << PP
.getSpelling(IncludeTok
) << " "
431 << (IsAngled
? '<' : '"') << FileName
<< (IsAngled
? '>' : '"')
433 setEmittedDirectiveOnThisLine();
436 case tok::pp___include_macros
:
437 // #__include_macros has no effect on a user of a preprocessed source
438 // file; the only effect is on preprocessing.
440 // FIXME: That's not *quite* true: it causes the module in question to
441 // be loaded, which can affect downstream diagnostics.
445 llvm_unreachable("unknown include directive kind");
451 /// Handle entering the scope of a module during a module compilation.
452 void PrintPPOutputPPCallbacks::BeginModule(const Module
*M
) {
453 startNewLineIfNeeded();
454 *OS
<< "#pragma clang module begin " << M
->getFullModuleName(true);
455 setEmittedDirectiveOnThisLine();
458 /// Handle leaving the scope of a module during a module compilation.
459 void PrintPPOutputPPCallbacks::EndModule(const Module
*M
) {
460 startNewLineIfNeeded();
461 *OS
<< "#pragma clang module end /*" << M
->getFullModuleName(true) << "*/";
462 setEmittedDirectiveOnThisLine();
465 /// Ident - Handle #ident directives when read by the preprocessor.
467 void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc
, StringRef S
) {
468 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
470 OS
->write("#ident ", strlen("#ident "));
471 OS
->write(S
.begin(), S
.size());
472 setEmittedTokensOnThisLine();
475 /// MacroDefined - This hook is called whenever a macro definition is seen.
476 void PrintPPOutputPPCallbacks::MacroDefined(const Token
&MacroNameTok
,
477 const MacroDirective
*MD
) {
478 const MacroInfo
*MI
= MD
->getMacroInfo();
479 // Print out macro definitions in -dD mode and when we have -fdirectives-only
480 // for C++20 header units.
481 if ((!DumpDefines
&& !DirectivesOnly
) ||
482 // Ignore __FILE__ etc.
483 MI
->isBuiltinMacro())
486 SourceLocation DefLoc
= MI
->getDefinitionLoc();
487 if (DirectivesOnly
&& !MI
->isUsed()) {
488 SourceManager
&SM
= PP
.getSourceManager();
489 if (SM
.isWrittenInBuiltinFile(DefLoc
) ||
490 SM
.isWrittenInCommandLineFile(DefLoc
))
493 MoveToLine(DefLoc
, /*RequireStartOfLine=*/true);
494 PrintMacroDefinition(*MacroNameTok
.getIdentifierInfo(), *MI
, PP
, OS
);
495 setEmittedDirectiveOnThisLine();
498 void PrintPPOutputPPCallbacks::MacroUndefined(const Token
&MacroNameTok
,
499 const MacroDefinition
&MD
,
500 const MacroDirective
*Undef
) {
501 // Print out macro definitions in -dD mode and when we have -fdirectives-only
502 // for C++20 header units.
503 if (!DumpDefines
&& !DirectivesOnly
)
506 MoveToLine(MacroNameTok
.getLocation(), /*RequireStartOfLine=*/true);
507 *OS
<< "#undef " << MacroNameTok
.getIdentifierInfo()->getName();
508 setEmittedDirectiveOnThisLine();
511 static void outputPrintable(raw_ostream
*OS
, StringRef Str
) {
512 for (unsigned char Char
: Str
) {
513 if (isPrintable(Char
) && Char
!= '\\' && Char
!= '"')
515 else // Output anything hard as an octal escape.
517 << (char)('0' + ((Char
>> 6) & 7))
518 << (char)('0' + ((Char
>> 3) & 7))
519 << (char)('0' + ((Char
>> 0) & 7));
523 void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc
,
525 PragmaMessageKind Kind
,
527 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
529 if (!Namespace
.empty())
530 *OS
<< Namespace
<< ' ';
543 outputPrintable(OS
, Str
);
545 if (Kind
== PMK_Message
)
547 setEmittedDirectiveOnThisLine();
550 void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc
,
551 StringRef DebugType
) {
552 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
554 *OS
<< "#pragma clang __debug ";
557 setEmittedDirectiveOnThisLine();
560 void PrintPPOutputPPCallbacks::
561 PragmaDiagnosticPush(SourceLocation Loc
, StringRef Namespace
) {
562 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
563 *OS
<< "#pragma " << Namespace
<< " diagnostic push";
564 setEmittedDirectiveOnThisLine();
567 void PrintPPOutputPPCallbacks::
568 PragmaDiagnosticPop(SourceLocation Loc
, StringRef Namespace
) {
569 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
570 *OS
<< "#pragma " << Namespace
<< " diagnostic pop";
571 setEmittedDirectiveOnThisLine();
574 void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc
,
578 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
579 *OS
<< "#pragma " << Namespace
<< " diagnostic ";
581 case diag::Severity::Remark
:
584 case diag::Severity::Warning
:
587 case diag::Severity::Error
:
590 case diag::Severity::Ignored
:
593 case diag::Severity::Fatal
:
597 *OS
<< " \"" << Str
<< '"';
598 setEmittedDirectiveOnThisLine();
601 void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc
,
602 PragmaWarningSpecifier WarningSpec
,
604 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
606 *OS
<< "#pragma warning(";
607 switch(WarningSpec
) {
608 case PWS_Default
: *OS
<< "default"; break;
609 case PWS_Disable
: *OS
<< "disable"; break;
610 case PWS_Error
: *OS
<< "error"; break;
611 case PWS_Once
: *OS
<< "once"; break;
612 case PWS_Suppress
: *OS
<< "suppress"; break;
613 case PWS_Level1
: *OS
<< '1'; break;
614 case PWS_Level2
: *OS
<< '2'; break;
615 case PWS_Level3
: *OS
<< '3'; break;
616 case PWS_Level4
: *OS
<< '4'; break;
620 for (ArrayRef
<int>::iterator I
= Ids
.begin(), E
= Ids
.end(); I
!= E
; ++I
)
623 setEmittedDirectiveOnThisLine();
626 void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc
,
628 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
629 *OS
<< "#pragma warning(push";
631 *OS
<< ", " << Level
;
633 setEmittedDirectiveOnThisLine();
636 void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc
) {
637 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
638 *OS
<< "#pragma warning(pop)";
639 setEmittedDirectiveOnThisLine();
642 void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc
,
644 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
645 *OS
<< "#pragma character_execution_set(push";
649 setEmittedDirectiveOnThisLine();
652 void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc
) {
653 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
654 *OS
<< "#pragma character_execution_set(pop)";
655 setEmittedDirectiveOnThisLine();
658 void PrintPPOutputPPCallbacks::
659 PragmaAssumeNonNullBegin(SourceLocation Loc
) {
660 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
661 *OS
<< "#pragma clang assume_nonnull begin";
662 setEmittedDirectiveOnThisLine();
665 void PrintPPOutputPPCallbacks::
666 PragmaAssumeNonNullEnd(SourceLocation Loc
) {
667 MoveToLine(Loc
, /*RequireStartOfLine=*/true);
668 *OS
<< "#pragma clang assume_nonnull end";
669 setEmittedDirectiveOnThisLine();
672 void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token
&Tok
,
674 bool RequireSameLine
) {
675 // These tokens are not expanded to anything and don't need whitespace before
677 if (Tok
.is(tok::eof
) ||
678 (Tok
.isAnnotation() && !Tok
.is(tok::annot_header_unit
) &&
679 !Tok
.is(tok::annot_module_begin
) && !Tok
.is(tok::annot_module_end
) &&
680 !Tok
.is(tok::annot_repl_input_end
)))
683 // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
684 if ((!RequireSameLine
|| EmittedDirectiveOnThisLine
) &&
685 MoveToLine(Tok
, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine
)) {
686 if (MinimizeWhitespace
) {
687 // Avoid interpreting hash as a directive under -fpreprocessed.
688 if (Tok
.is(tok::hash
))
691 // Print out space characters so that the first token on a line is
692 // indented for easy reading.
693 unsigned ColNo
= SM
.getExpansionColumnNumber(Tok
.getLocation());
695 // The first token on a line can have a column number of 1, yet still
696 // expect leading white space, if a macro expansion in column 1 starts
697 // with an empty macro argument, or an empty nested macro expansion. In
698 // this case, move the token to column 2.
699 if (ColNo
== 1 && Tok
.hasLeadingSpace())
702 // This hack prevents stuff like:
704 // HASH define foo bar
705 // From having the # character end up at column 1, which makes it so it
706 // is not handled as a #define next time through the preprocessor if in
707 // -fpreprocessed mode.
708 if (ColNo
<= 1 && Tok
.is(tok::hash
))
711 // Otherwise, indent the appropriate number of spaces.
712 for (; ColNo
> 1; --ColNo
)
716 // Insert whitespace between the previous and next token if either
717 // - The caller requires it
718 // - The input had whitespace between them and we are not in
719 // whitespace-minimization mode
720 // - The whitespace is necessary to keep the tokens apart and there is not
721 // already a newline between them
722 if (RequireSpace
|| (!MinimizeWhitespace
&& Tok
.hasLeadingSpace()) ||
723 ((EmittedTokensOnThisLine
|| EmittedDirectiveOnThisLine
) &&
724 AvoidConcat(PrevPrevTok
, PrevTok
, Tok
)))
728 PrevPrevTok
= PrevTok
;
732 void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr
,
734 unsigned NumNewlines
= 0;
735 for (; Len
; --Len
, ++TokStr
) {
736 if (*TokStr
!= '\n' &&
742 // If we have \n\r or \r\n, skip both and count as one line.
744 (TokStr
[1] == '\n' || TokStr
[1] == '\r') &&
745 TokStr
[0] != TokStr
[1]) {
751 if (NumNewlines
== 0) return;
753 CurLine
+= NumNewlines
;
758 struct UnknownPragmaHandler
: public PragmaHandler
{
760 PrintPPOutputPPCallbacks
*Callbacks
;
762 // Set to true if tokens should be expanded
763 bool ShouldExpandTokens
;
765 UnknownPragmaHandler(const char *prefix
, PrintPPOutputPPCallbacks
*callbacks
,
766 bool RequireTokenExpansion
)
767 : Prefix(prefix
), Callbacks(callbacks
),
768 ShouldExpandTokens(RequireTokenExpansion
) {}
769 void HandlePragma(Preprocessor
&PP
, PragmaIntroducer Introducer
,
770 Token
&PragmaTok
) override
{
771 // Figure out what line we went to and insert the appropriate number of
772 // newline characters.
773 Callbacks
->MoveToLine(PragmaTok
.getLocation(), /*RequireStartOfLine=*/true);
774 Callbacks
->OS
->write(Prefix
, strlen(Prefix
));
775 Callbacks
->setEmittedTokensOnThisLine();
777 if (ShouldExpandTokens
) {
778 // The first token does not have expanded macros. Expand them, if
780 auto Toks
= std::make_unique
<Token
[]>(1);
782 PP
.EnterTokenStream(std::move(Toks
), /*NumToks=*/1,
783 /*DisableMacroExpansion=*/false,
784 /*IsReinject=*/false);
788 // Read and print all of the pragma tokens.
790 while (PragmaTok
.isNot(tok::eod
)) {
791 Callbacks
->HandleWhitespaceBeforeTok(PragmaTok
, /*RequireSpace=*/IsFirst
,
792 /*RequireSameLine=*/true);
794 std::string TokSpell
= PP
.getSpelling(PragmaTok
);
795 Callbacks
->OS
->write(&TokSpell
[0], TokSpell
.size());
796 Callbacks
->setEmittedTokensOnThisLine();
798 if (ShouldExpandTokens
)
801 PP
.LexUnexpandedToken(PragmaTok
);
803 Callbacks
->setEmittedDirectiveOnThisLine();
806 } // end anonymous namespace
809 static void PrintPreprocessedTokens(Preprocessor
&PP
, Token
&Tok
,
810 PrintPPOutputPPCallbacks
*Callbacks
) {
811 bool DropComments
= PP
.getLangOpts().TraditionalCPP
&&
812 !PP
.getCommentRetentionState();
814 bool IsStartOfLine
= false;
817 // Two lines joined with line continuation ('\' as last character on the
818 // line) must be emitted as one line even though Tok.getLine() returns two
819 // different values. In this situation Tok.isAtStartOfLine() is false even
820 // though it may be the first token on the lexical line. When
821 // dropping/skipping a token that is at the start of a line, propagate the
822 // start-of-line-ness to the next token to not append it to the previous
824 IsStartOfLine
= IsStartOfLine
|| Tok
.isAtStartOfLine();
826 Callbacks
->HandleWhitespaceBeforeTok(Tok
, /*RequireSpace=*/false,
827 /*RequireSameLine=*/!IsStartOfLine
);
829 if (DropComments
&& Tok
.is(tok::comment
)) {
830 // Skip comments. Normally the preprocessor does not generate
831 // tok::comment nodes at all when not keeping comments, but under
832 // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
835 } else if (Tok
.is(tok::annot_repl_input_end
)) {
838 } else if (Tok
.is(tok::eod
)) {
839 // Don't print end of directive tokens, since they are typically newlines
840 // that mess up our line tracking. These come from unknown pre-processor
841 // directives or hash-prefixed comments in standalone assembly files.
843 // FIXME: The token on the next line after #include should have
844 // Tok.isAtStartOfLine() set.
845 IsStartOfLine
= true;
847 } else if (Tok
.is(tok::annot_module_include
)) {
848 // PrintPPOutputPPCallbacks::InclusionDirective handles producing
849 // appropriate output here. Ignore this token entirely.
851 IsStartOfLine
= true;
853 } else if (Tok
.is(tok::annot_module_begin
)) {
854 // FIXME: We retrieve this token after the FileChanged callback, and
855 // retrieve the module_end token before the FileChanged callback, so
856 // we render this within the file and render the module end outside the
857 // file, but this is backwards from the token locations: the module_begin
858 // token is at the include location (outside the file) and the module_end
859 // token is at the EOF location (within the file).
860 Callbacks
->BeginModule(
861 reinterpret_cast<Module
*>(Tok
.getAnnotationValue()));
863 IsStartOfLine
= true;
865 } else if (Tok
.is(tok::annot_module_end
)) {
866 Callbacks
->EndModule(
867 reinterpret_cast<Module
*>(Tok
.getAnnotationValue()));
869 IsStartOfLine
= true;
871 } else if (Tok
.is(tok::annot_header_unit
)) {
872 // This is a header-name that has been (effectively) converted into a
874 // FIXME: The module name could contain non-identifier module name
875 // components. We don't have a good way to round-trip those.
876 Module
*M
= reinterpret_cast<Module
*>(Tok
.getAnnotationValue());
877 std::string Name
= M
->getFullModuleName();
878 Callbacks
->OS
->write(Name
.data(), Name
.size());
879 Callbacks
->HandleNewlinesInToken(Name
.data(), Name
.size());
880 } else if (Tok
.isAnnotation()) {
881 // Ignore annotation tokens created by pragmas - the pragmas themselves
882 // will be reproduced in the preprocessed output.
885 } else if (IdentifierInfo
*II
= Tok
.getIdentifierInfo()) {
886 *Callbacks
->OS
<< II
->getName();
887 } else if (Tok
.isLiteral() && !Tok
.needsCleaning() &&
888 Tok
.getLiteralData()) {
889 Callbacks
->OS
->write(Tok
.getLiteralData(), Tok
.getLength());
890 } else if (Tok
.getLength() < std::size(Buffer
)) {
891 const char *TokPtr
= Buffer
;
892 unsigned Len
= PP
.getSpelling(Tok
, TokPtr
);
893 Callbacks
->OS
->write(TokPtr
, Len
);
895 // Tokens that can contain embedded newlines need to adjust our current
897 // FIXME: The token may end with a newline in which case
898 // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
900 if (Tok
.getKind() == tok::comment
|| Tok
.getKind() == tok::unknown
)
901 Callbacks
->HandleNewlinesInToken(TokPtr
, Len
);
902 if (Tok
.is(tok::comment
) && Len
>= 2 && TokPtr
[0] == '/' &&
904 // It's a line comment;
905 // Ensure that we don't concatenate anything behind it.
906 Callbacks
->setEmittedDirectiveOnThisLine();
909 std::string S
= PP
.getSpelling(Tok
);
910 Callbacks
->OS
->write(S
.data(), S
.size());
912 // Tokens that can contain embedded newlines need to adjust our current
914 if (Tok
.getKind() == tok::comment
|| Tok
.getKind() == tok::unknown
)
915 Callbacks
->HandleNewlinesInToken(S
.data(), S
.size());
916 if (Tok
.is(tok::comment
) && S
.size() >= 2 && S
[0] == '/' && S
[1] == '/') {
917 // It's a line comment;
918 // Ensure that we don't concatenate anything behind it.
919 Callbacks
->setEmittedDirectiveOnThisLine();
922 Callbacks
->setEmittedTokensOnThisLine();
923 IsStartOfLine
= false;
925 if (Tok
.is(tok::eof
)) break;
931 typedef std::pair
<const IdentifierInfo
*, MacroInfo
*> id_macro_pair
;
932 static int MacroIDCompare(const id_macro_pair
*LHS
, const id_macro_pair
*RHS
) {
933 return LHS
->first
->getName().compare(RHS
->first
->getName());
936 static void DoPrintMacros(Preprocessor
&PP
, raw_ostream
*OS
) {
937 // Ignore unknown pragmas.
940 // -dM mode just scans and ignores all tokens in the files, then dumps out
941 // the macro table at the end.
942 PP
.EnterMainSourceFile();
946 while (Tok
.isNot(tok::eof
));
948 SmallVector
<id_macro_pair
, 128> MacrosByID
;
949 for (Preprocessor::macro_iterator I
= PP
.macro_begin(), E
= PP
.macro_end();
951 auto *MD
= I
->second
.getLatest();
952 if (MD
&& MD
->isDefined())
953 MacrosByID
.push_back(id_macro_pair(I
->first
, MD
->getMacroInfo()));
955 llvm::array_pod_sort(MacrosByID
.begin(), MacrosByID
.end(), MacroIDCompare
);
957 for (unsigned i
= 0, e
= MacrosByID
.size(); i
!= e
; ++i
) {
958 MacroInfo
&MI
= *MacrosByID
[i
].second
;
959 // Ignore computed macros like __LINE__ and friends.
960 if (MI
.isBuiltinMacro()) continue;
962 PrintMacroDefinition(*MacrosByID
[i
].first
, MI
, PP
, OS
);
967 /// DoPrintPreprocessedInput - This implements -E mode.
969 void clang::DoPrintPreprocessedInput(Preprocessor
&PP
, raw_ostream
*OS
,
970 const PreprocessorOutputOptions
&Opts
) {
971 // Show macros with no output is handled specially.
973 assert(Opts
.ShowMacros
&& "Not yet implemented!");
974 DoPrintMacros(PP
, OS
);
978 // Inform the preprocessor whether we want it to retain comments or not, due
980 PP
.SetCommentRetentionState(Opts
.ShowComments
, Opts
.ShowMacroComments
);
982 PrintPPOutputPPCallbacks
*Callbacks
= new PrintPPOutputPPCallbacks(
983 PP
, OS
, !Opts
.ShowLineMarkers
, Opts
.ShowMacros
,
984 Opts
.ShowIncludeDirectives
, Opts
.UseLineDirectives
,
985 Opts
.MinimizeWhitespace
, Opts
.DirectivesOnly
, Opts
.KeepSystemIncludes
);
987 // Expand macros in pragmas with -fms-extensions. The assumption is that
988 // the majority of pragmas in such a file will be Microsoft pragmas.
989 // Remember the handlers we will add so that we can remove them later.
990 std::unique_ptr
<UnknownPragmaHandler
> MicrosoftExtHandler(
991 new UnknownPragmaHandler(
992 "#pragma", Callbacks
,
993 /*RequireTokenExpansion=*/PP
.getLangOpts().MicrosoftExt
));
995 std::unique_ptr
<UnknownPragmaHandler
> GCCHandler(new UnknownPragmaHandler(
996 "#pragma GCC", Callbacks
,
997 /*RequireTokenExpansion=*/PP
.getLangOpts().MicrosoftExt
));
999 std::unique_ptr
<UnknownPragmaHandler
> ClangHandler(new UnknownPragmaHandler(
1000 "#pragma clang", Callbacks
,
1001 /*RequireTokenExpansion=*/PP
.getLangOpts().MicrosoftExt
));
1003 PP
.AddPragmaHandler(MicrosoftExtHandler
.get());
1004 PP
.AddPragmaHandler("GCC", GCCHandler
.get());
1005 PP
.AddPragmaHandler("clang", ClangHandler
.get());
1007 // The tokens after pragma omp need to be expanded.
1009 // OpenMP [2.1, Directive format]
1010 // Preprocessing tokens following the #pragma omp are subject to macro
1012 std::unique_ptr
<UnknownPragmaHandler
> OpenMPHandler(
1013 new UnknownPragmaHandler("#pragma omp", Callbacks
,
1014 /*RequireTokenExpansion=*/true));
1015 PP
.AddPragmaHandler("omp", OpenMPHandler
.get());
1017 PP
.addPPCallbacks(std::unique_ptr
<PPCallbacks
>(Callbacks
));
1019 // After we have configured the preprocessor, enter the main file.
1020 PP
.EnterMainSourceFile();
1021 if (Opts
.DirectivesOnly
)
1022 PP
.SetMacroExpansionOnlyInDirectives();
1024 // Consume all of the tokens that come from the predefines buffer. Those
1025 // should not be emitted into the output and are guaranteed to be at the
1027 const SourceManager
&SourceMgr
= PP
.getSourceManager();
1031 if (Tok
.is(tok::eof
) || !Tok
.getLocation().isFileID())
1034 PresumedLoc PLoc
= SourceMgr
.getPresumedLoc(Tok
.getLocation());
1035 if (PLoc
.isInvalid())
1038 if (strcmp(PLoc
.getFilename(), "<built-in>"))
1042 // Read all the preprocessed tokens, printing them out to the stream.
1043 PrintPreprocessedTokens(PP
, Tok
, Callbacks
);
1046 // Remove the handlers we just added to leave the preprocessor in a sane state
1047 // so that it can be reused (for example by a clang::Parser instance).
1048 PP
.RemovePragmaHandler(MicrosoftExtHandler
.get());
1049 PP
.RemovePragmaHandler("GCC", GCCHandler
.get());
1050 PP
.RemovePragmaHandler("clang", ClangHandler
.get());
1051 PP
.RemovePragmaHandler("omp", OpenMPHandler
.get());