1 //===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the TokenLexer interface.
12 //===----------------------------------------------------------------------===//
14 #include "clang/Lex/TokenLexer.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Lex/LexDiagnostic.h"
17 #include "clang/Lex/MacroArgs.h"
18 #include "clang/Lex/MacroInfo.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "llvm/ADT/SmallString.h"
21 using namespace clang
;
24 /// Create a TokenLexer for the specified macro with the specified actual
25 /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
26 void TokenLexer::Init(Token
&Tok
, SourceLocation ELEnd
, MacroInfo
*MI
,
28 // If the client is reusing a TokenLexer, make sure to free any memory
29 // associated with it.
36 ExpandLocStart
= Tok
.getLocation();
38 AtStartOfLine
= Tok
.isAtStartOfLine();
39 HasLeadingSpace
= Tok
.hasLeadingSpace();
40 NextTokGetsSpace
= false;
41 Tokens
= &*Macro
->tokens_begin();
43 DisableMacroExpansion
= false;
44 NumTokens
= Macro
->tokens_end()-Macro
->tokens_begin();
45 MacroExpansionStart
= SourceLocation();
47 SourceManager
&SM
= PP
.getSourceManager();
48 MacroStartSLocOffset
= SM
.getNextLocalOffset();
51 assert(Tokens
[0].getLocation().isValid());
52 assert((Tokens
[0].getLocation().isFileID() || Tokens
[0].is(tok::comment
)) &&
53 "Macro defined in macro?");
54 assert(ExpandLocStart
.isValid());
56 // Reserve a source location entry chunk for the length of the macro
57 // definition. Tokens that get lexed directly from the definition will
58 // have their locations pointing inside this chunk. This is to avoid
59 // creating separate source location entries for each token.
60 MacroDefStart
= SM
.getExpansionLoc(Tokens
[0].getLocation());
61 MacroDefLength
= Macro
->getDefinitionLength(SM
);
62 MacroExpansionStart
= SM
.createExpansionLoc(MacroDefStart
,
68 // If this is a function-like macro, expand the arguments and change
69 // Tokens to point to the expanded tokens.
70 if (Macro
->isFunctionLike() && Macro
->getNumArgs())
71 ExpandFunctionArguments();
73 // Mark the macro as currently disabled, so that it is not recursively
74 // expanded. The macro must be disabled only after argument pre-expansion of
75 // function-like macro arguments occurs.
76 Macro
->DisableMacro();
81 /// Create a TokenLexer for the specified token stream. This does not
82 /// take ownership of the specified token vector.
83 void TokenLexer::Init(const Token
*TokArray
, unsigned NumToks
,
84 bool disableMacroExpansion
, bool ownsTokens
) {
85 // If the client is reusing a TokenLexer, make sure to free any memory
86 // associated with it.
92 OwnsTokens
= ownsTokens
;
93 DisableMacroExpansion
= disableMacroExpansion
;
96 ExpandLocStart
= ExpandLocEnd
= SourceLocation();
97 AtStartOfLine
= false;
98 HasLeadingSpace
= false;
99 NextTokGetsSpace
= false;
100 MacroExpansionStart
= SourceLocation();
102 // Set HasLeadingSpace/AtStartOfLine so that the first token will be
103 // returned unmodified.
105 AtStartOfLine
= TokArray
[0].isAtStartOfLine();
106 HasLeadingSpace
= TokArray
[0].hasLeadingSpace();
111 void TokenLexer::destroy() {
112 // If this was a function-like macro that actually uses its arguments, delete
113 // the expanded tokens.
120 // TokenLexer owns its formal arguments.
121 if (ActualArgs
) ActualArgs
->destroy(PP
);
124 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
125 SmallVectorImpl
<Token
> &ResultToks
, bool HasPasteOperator
, MacroInfo
*Macro
,
126 unsigned MacroArgNo
, Preprocessor
&PP
) {
127 // Is the macro argument __VA_ARGS__?
128 if (!Macro
->isVariadic() || MacroArgNo
!= Macro
->getNumArgs()-1)
131 // In Microsoft-compatibility mode, a comma is removed in the expansion
132 // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is
133 // not supported by gcc.
134 if (!HasPasteOperator
&& !PP
.getLangOpts().MSVCCompat
)
137 // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
138 // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
139 // named arguments, where it remains. In all other modes, including C99
140 // with GNU extensions, it is removed regardless of named arguments.
141 // Microsoft also appears to support this extension, unofficially.
142 if (PP
.getLangOpts().C99
&& !PP
.getLangOpts().GNUMode
143 && Macro
->getNumArgs() < 2)
146 // Is a comma available to be removed?
147 if (ResultToks
.empty() || !ResultToks
.back().is(tok::comma
))
150 // Issue an extension diagnostic for the paste operator.
151 if (HasPasteOperator
)
152 PP
.Diag(ResultToks
.back().getLocation(), diag::ext_paste_comma
);
155 ResultToks
.pop_back();
157 // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
158 // then removal of the comma should produce a placemarker token (in C99
159 // terms) which we model by popping off the previous ##, giving us a plain
160 // "X" when __VA_ARGS__ is empty.
161 if (!ResultToks
.empty() && ResultToks
.back().is(tok::hashhash
))
162 ResultToks
.pop_back();
164 // Never add a space, even if the comma, ##, or arg had a space.
165 NextTokGetsSpace
= false;
169 /// Expand the arguments of a function-like macro so that we can quickly
170 /// return preexpanded tokens from Tokens.
171 void TokenLexer::ExpandFunctionArguments() {
173 SmallVector
<Token
, 128> ResultToks
;
175 // Loop through 'Tokens', expanding them into ResultToks. Keep
176 // track of whether we change anything. If not, no need to keep them. If so,
177 // we install the newly expanded sequence as the new 'Tokens' list.
178 bool MadeChange
= false;
180 for (unsigned i
= 0, e
= NumTokens
; i
!= e
; ++i
) {
181 // If we found the stringify operator, get the argument stringified. The
182 // preprocessor already verified that the following token is a macro name
183 // when the #define was parsed.
184 const Token
&CurTok
= Tokens
[i
];
185 if (i
!= 0 && !Tokens
[i
-1].is(tok::hashhash
) && CurTok
.hasLeadingSpace())
186 NextTokGetsSpace
= true;
188 if (CurTok
.is(tok::hash
) || CurTok
.is(tok::hashat
)) {
189 int ArgNo
= Macro
->getArgumentNum(Tokens
[i
+1].getIdentifierInfo());
190 assert(ArgNo
!= -1 && "Token following # is not an argument?");
192 SourceLocation ExpansionLocStart
=
193 getExpansionLocForMacroDefLoc(CurTok
.getLocation());
194 SourceLocation ExpansionLocEnd
=
195 getExpansionLocForMacroDefLoc(Tokens
[i
+1].getLocation());
198 if (CurTok
.is(tok::hash
)) // Stringify
199 Res
= ActualArgs
->getStringifiedArgument(ArgNo
, PP
,
203 // 'charify': don't bother caching these.
204 Res
= MacroArgs::StringifyArgument(ActualArgs
->getUnexpArgument(ArgNo
),
209 Res
.setFlag(Token::StringifiedInMacro
);
211 // The stringified/charified string leading space flag gets set to match
212 // the #/#@ operator.
213 if (NextTokGetsSpace
)
214 Res
.setFlag(Token::LeadingSpace
);
216 ResultToks
.push_back(Res
);
218 ++i
; // Skip arg name.
219 NextTokGetsSpace
= false;
223 // Find out if there is a paste (##) operator before or after the token.
224 bool NonEmptyPasteBefore
=
225 !ResultToks
.empty() && ResultToks
.back().is(tok::hashhash
);
226 bool PasteBefore
= i
!= 0 && Tokens
[i
-1].is(tok::hashhash
);
227 bool PasteAfter
= i
+1 != e
&& Tokens
[i
+1].is(tok::hashhash
);
228 assert(!NonEmptyPasteBefore
|| PasteBefore
);
230 // Otherwise, if this is not an argument token, just add the token to the
232 IdentifierInfo
*II
= CurTok
.getIdentifierInfo();
233 int ArgNo
= II
? Macro
->getArgumentNum(II
) : -1;
235 // This isn't an argument, just add it.
236 ResultToks
.push_back(CurTok
);
238 if (NextTokGetsSpace
) {
239 ResultToks
.back().setFlag(Token::LeadingSpace
);
240 NextTokGetsSpace
= false;
241 } else if (PasteBefore
&& !NonEmptyPasteBefore
)
242 ResultToks
.back().clearFlag(Token::LeadingSpace
);
247 // An argument is expanded somehow, the result is different than the
251 // Otherwise, this is a use of the argument.
253 // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
254 // are no trailing commas if __VA_ARGS__ is empty.
255 if (!PasteBefore
&& ActualArgs
->isVarargsElidedUse() &&
256 MaybeRemoveCommaBeforeVaArgs(ResultToks
,
257 /*HasPasteOperator=*/false,
261 // If it is not the LHS/RHS of a ## operator, we must pre-expand the
262 // argument and substitute the expanded tokens into the result. This is
264 if (!PasteBefore
&& !PasteAfter
) {
265 const Token
*ResultArgToks
;
267 // Only preexpand the argument if it could possibly need it. This
268 // avoids some work in common cases.
269 const Token
*ArgTok
= ActualArgs
->getUnexpArgument(ArgNo
);
270 if (ActualArgs
->ArgNeedsPreexpansion(ArgTok
, PP
))
271 ResultArgToks
= &ActualArgs
->getPreExpArgument(ArgNo
, Macro
, PP
)[0];
273 ResultArgToks
= ArgTok
; // Use non-preexpanded tokens.
275 // If the arg token expanded into anything, append it.
276 if (ResultArgToks
->isNot(tok::eof
)) {
277 unsigned FirstResult
= ResultToks
.size();
278 unsigned NumToks
= MacroArgs::getArgLength(ResultArgToks
);
279 ResultToks
.append(ResultArgToks
, ResultArgToks
+NumToks
);
281 // In Microsoft-compatibility mode, we follow MSVC's preprocessing
282 // behavior by not considering single commas from nested macro
283 // expansions as argument separators. Set a flag on the token so we can
284 // test for this later when the macro expansion is processed.
285 if (PP
.getLangOpts().MSVCCompat
&& NumToks
== 1 &&
286 ResultToks
.back().is(tok::comma
))
287 ResultToks
.back().setFlag(Token::IgnoredComma
);
289 // If the '##' came from expanding an argument, turn it into 'unknown'
291 for (unsigned i
= FirstResult
, e
= ResultToks
.size(); i
!= e
; ++i
) {
292 Token
&Tok
= ResultToks
[i
];
293 if (Tok
.is(tok::hashhash
))
294 Tok
.setKind(tok::unknown
);
297 if(ExpandLocStart
.isValid()) {
298 updateLocForMacroArgTokens(CurTok
.getLocation(),
299 ResultToks
.begin()+FirstResult
,
303 // If any tokens were substituted from the argument, the whitespace
304 // before the first token should match the whitespace of the arg
306 ResultToks
[FirstResult
].setFlagValue(Token::LeadingSpace
,
308 NextTokGetsSpace
= false;
313 // Okay, we have a token that is either the LHS or RHS of a paste (##)
314 // argument. It gets substituted as its non-pre-expanded tokens.
315 const Token
*ArgToks
= ActualArgs
->getUnexpArgument(ArgNo
);
316 unsigned NumToks
= MacroArgs::getArgLength(ArgToks
);
317 if (NumToks
) { // Not an empty argument?
318 // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
319 // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
320 // the expander trys to paste ',' with the first token of the __VA_ARGS__
322 if (NonEmptyPasteBefore
&& ResultToks
.size() >= 2 &&
323 ResultToks
[ResultToks
.size()-2].is(tok::comma
) &&
324 (unsigned)ArgNo
== Macro
->getNumArgs()-1 &&
325 Macro
->isVariadic()) {
326 // Remove the paste operator, report use of the extension.
327 PP
.Diag(ResultToks
.pop_back_val().getLocation(), diag::ext_paste_comma
);
330 ResultToks
.append(ArgToks
, ArgToks
+NumToks
);
332 // If the '##' came from expanding an argument, turn it into 'unknown'
334 for (unsigned i
= ResultToks
.size() - NumToks
, e
= ResultToks
.size();
336 Token
&Tok
= ResultToks
[i
];
337 if (Tok
.is(tok::hashhash
))
338 Tok
.setKind(tok::unknown
);
341 if (ExpandLocStart
.isValid()) {
342 updateLocForMacroArgTokens(CurTok
.getLocation(),
343 ResultToks
.end()-NumToks
, ResultToks
.end());
346 // If this token (the macro argument) was supposed to get leading
347 // whitespace, transfer this information onto the first token of the
350 // Do not do this if the paste operator occurs before the macro argument,
351 // as in "A ## MACROARG". In valid code, the first token will get
352 // smooshed onto the preceding one anyway (forming AMACROARG). In
353 // assembler-with-cpp mode, invalid pastes are allowed through: in this
354 // case, we do not want the extra whitespace to be added. For example,
355 // we want ". ## foo" -> ".foo" not ". foo".
356 if (NextTokGetsSpace
)
357 ResultToks
[ResultToks
.size()-NumToks
].setFlag(Token::LeadingSpace
);
359 NextTokGetsSpace
= false;
363 // If an empty argument is on the LHS or RHS of a paste, the standard (C99
364 // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
365 // implement this by eating ## operators when a LHS or RHS expands to
368 // Discard the argument token and skip (don't copy to the expansion
369 // buffer) the paste operator after it.
374 // If this is on the RHS of a paste operator, we've already copied the
375 // paste operator to the ResultToks list, unless the LHS was empty too.
378 if (NonEmptyPasteBefore
) {
379 assert(ResultToks
.back().is(tok::hashhash
));
380 ResultToks
.pop_back();
383 // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
384 // and if the macro had at least one real argument, and if the token before
385 // the ## was a comma, remove the comma. This is a GCC extension which is
386 // disabled when using -std=c99.
387 if (ActualArgs
->isVarargsElidedUse())
388 MaybeRemoveCommaBeforeVaArgs(ResultToks
,
389 /*HasPasteOperator=*/true,
395 // If anything changed, install this as the new Tokens list.
397 assert(!OwnsTokens
&& "This would leak if we already own the token list");
398 // This is deleted in the dtor.
399 NumTokens
= ResultToks
.size();
400 // The tokens will be added to Preprocessor's cache and will be removed
401 // when this TokenLexer finishes lexing them.
402 Tokens
= PP
.cacheMacroExpandedTokens(this, ResultToks
);
404 // The preprocessor cache of macro expanded tokens owns these tokens,not us.
409 /// \brief Checks if two tokens form wide string literal.
410 static bool isWideStringLiteralFromMacro(const Token
&FirstTok
,
411 const Token
&SecondTok
) {
412 return FirstTok
.is(tok::identifier
) &&
413 FirstTok
.getIdentifierInfo()->isStr("L") && SecondTok
.isLiteral() &&
414 SecondTok
.stringifiedInMacro();
417 /// Lex - Lex and return a token from this macro stream.
419 bool TokenLexer::Lex(Token
&Tok
) {
420 // Lexing off the end of the macro, pop this macro off the expansion stack.
422 // If this is a macro (not a token stream), mark the macro enabled now
423 // that it is no longer being expanded.
424 if (Macro
) Macro
->EnableMacro();
427 Tok
.setFlagValue(Token::StartOfLine
, AtStartOfLine
);
428 Tok
.setFlagValue(Token::LeadingSpace
, HasLeadingSpace
|| NextTokGetsSpace
);
430 Tok
.setFlag(Token::LeadingEmptyMacro
);
431 return PP
.HandleEndOfTokenLexer(Tok
);
434 SourceManager
&SM
= PP
.getSourceManager();
436 // If this is the first token of the expanded result, we inherit spacing
438 bool isFirstToken
= CurToken
== 0;
440 // Get the next token to return.
441 Tok
= Tokens
[CurToken
++];
443 bool TokenIsFromPaste
= false;
445 // If this token is followed by a token paste (##) operator, paste the tokens!
446 // Note that ## is a normal token when not expanding a macro.
447 if (!isAtEnd() && Macro
&&
448 (Tokens
[CurToken
].is(tok::hashhash
) ||
449 // Special processing of L#x macros in -fms-compatibility mode.
450 // Microsoft compiler is able to form a wide string literal from
451 // 'L#macro_arg' construct in a function-like macro.
452 (PP
.getLangOpts().MSVCCompat
&&
453 isWideStringLiteralFromMacro(Tok
, Tokens
[CurToken
])))) {
454 // When handling the microsoft /##/ extension, the final token is
455 // returned by PasteTokens, not the pasted token.
456 if (PasteTokens(Tok
))
459 TokenIsFromPaste
= true;
462 // The token's current location indicate where the token was lexed from. We
463 // need this information to compute the spelling of the token, but any
464 // diagnostics for the expanded token should appear as if they came from
465 // ExpansionLoc. Pull this information together into a new SourceLocation
466 // that captures all of this.
467 if (ExpandLocStart
.isValid() && // Don't do this for token streams.
468 // Check that the token's location was not already set properly.
469 SM
.isBeforeInSLocAddrSpace(Tok
.getLocation(), MacroStartSLocOffset
)) {
470 SourceLocation instLoc
;
471 if (Tok
.is(tok::comment
)) {
472 instLoc
= SM
.createExpansionLoc(Tok
.getLocation(),
477 instLoc
= getExpansionLocForMacroDefLoc(Tok
.getLocation());
480 Tok
.setLocation(instLoc
);
483 // If this is the first token, set the lexical properties of the token to
484 // match the lexical properties of the macro identifier.
486 Tok
.setFlagValue(Token::StartOfLine
, AtStartOfLine
);
487 Tok
.setFlagValue(Token::LeadingSpace
, HasLeadingSpace
);
489 // If this is not the first token, we may still need to pass through
490 // leading whitespace if we've expanded a macro.
491 if (AtStartOfLine
) Tok
.setFlag(Token::StartOfLine
);
492 if (HasLeadingSpace
) Tok
.setFlag(Token::LeadingSpace
);
494 AtStartOfLine
= false;
495 HasLeadingSpace
= false;
497 // Handle recursive expansion!
498 if (!Tok
.isAnnotation() && Tok
.getIdentifierInfo() != nullptr) {
499 // Change the kind of this identifier to the appropriate token kind, e.g.
500 // turning "for" into a keyword.
501 IdentifierInfo
*II
= Tok
.getIdentifierInfo();
502 Tok
.setKind(II
->getTokenID());
504 // If this identifier was poisoned and from a paste, emit an error. This
505 // won't be handled by Preprocessor::HandleIdentifier because this is coming
506 // from a macro expansion.
507 if (II
->isPoisoned() && TokenIsFromPaste
) {
508 PP
.HandlePoisonedIdentifier(Tok
);
511 if (!DisableMacroExpansion
&& II
->isHandleIdentifierCase())
512 return PP
.HandleIdentifier(Tok
);
515 // Otherwise, return a normal token.
519 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
520 /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
521 /// are more ## after it, chomp them iteratively. Return the result as Tok.
522 /// If this returns true, the caller should immediately return the token.
523 bool TokenLexer::PasteTokens(Token
&Tok
) {
524 SmallString
<128> Buffer
;
525 const char *ResultTokStrPtr
= nullptr;
526 SourceLocation StartLoc
= Tok
.getLocation();
527 SourceLocation PasteOpLoc
;
529 // Consume the ## operator if any.
530 PasteOpLoc
= Tokens
[CurToken
].getLocation();
531 if (Tokens
[CurToken
].is(tok::hashhash
))
533 assert(!isAtEnd() && "No token on the RHS of a paste operator!");
535 // Get the RHS token.
536 const Token
&RHS
= Tokens
[CurToken
];
538 // Allocate space for the result token. This is guaranteed to be enough for
540 Buffer
.resize(Tok
.getLength() + RHS
.getLength());
542 // Get the spelling of the LHS token in Buffer.
543 const char *BufPtr
= &Buffer
[0];
544 bool Invalid
= false;
545 unsigned LHSLen
= PP
.getSpelling(Tok
, BufPtr
, &Invalid
);
546 if (BufPtr
!= &Buffer
[0]) // Really, we want the chars in Buffer!
547 memcpy(&Buffer
[0], BufPtr
, LHSLen
);
551 BufPtr
= Buffer
.data() + LHSLen
;
552 unsigned RHSLen
= PP
.getSpelling(RHS
, BufPtr
, &Invalid
);
555 if (RHSLen
&& BufPtr
!= &Buffer
[LHSLen
])
556 // Really, we want the chars in Buffer!
557 memcpy(&Buffer
[LHSLen
], BufPtr
, RHSLen
);
559 // Trim excess space.
560 Buffer
.resize(LHSLen
+RHSLen
);
562 // Plop the pasted result (including the trailing newline and null) into a
563 // scratch buffer where we can lex it.
565 ResultTokTmp
.startToken();
567 // Claim that the tmp token is a string_literal so that we can get the
568 // character pointer back from CreateString in getLiteralData().
569 ResultTokTmp
.setKind(tok::string_literal
);
570 PP
.CreateString(Buffer
, ResultTokTmp
);
571 SourceLocation ResultTokLoc
= ResultTokTmp
.getLocation();
572 ResultTokStrPtr
= ResultTokTmp
.getLiteralData();
574 // Lex the resultant pasted token into Result.
577 if (Tok
.isAnyIdentifier() && RHS
.isAnyIdentifier()) {
578 // Common paste case: identifier+identifier = identifier. Avoid creating
579 // a lexer and other overhead.
580 PP
.IncrementPasteCounter(true);
582 Result
.setKind(tok::raw_identifier
);
583 Result
.setRawIdentifierData(ResultTokStrPtr
);
584 Result
.setLocation(ResultTokLoc
);
585 Result
.setLength(LHSLen
+RHSLen
);
587 PP
.IncrementPasteCounter(false);
589 assert(ResultTokLoc
.isFileID() &&
590 "Should be a raw location into scratch buffer");
591 SourceManager
&SourceMgr
= PP
.getSourceManager();
592 FileID LocFileID
= SourceMgr
.getFileID(ResultTokLoc
);
594 bool Invalid
= false;
595 const char *ScratchBufStart
596 = SourceMgr
.getBufferData(LocFileID
, &Invalid
).data();
600 // Make a lexer to lex this string from. Lex just this one token.
601 // Make a lexer object so that we lex and expand the paste result.
602 Lexer
TL(SourceMgr
.getLocForStartOfFile(LocFileID
),
603 PP
.getLangOpts(), ScratchBufStart
,
604 ResultTokStrPtr
, ResultTokStrPtr
+LHSLen
+RHSLen
);
606 // Lex a token in raw mode. This way it won't look up identifiers
607 // automatically, lexing off the end will return an eof token, and
608 // warnings are disabled. This returns true if the result token is the
610 bool isInvalid
= !TL
.LexFromRawLexer(Result
);
612 // If we got an EOF token, we didn't form even ONE token. For example, we
613 // did "/ ## /" to get "//".
614 isInvalid
|= Result
.is(tok::eof
);
616 // If pasting the two tokens didn't form a full new token, this is an
617 // error. This occurs with "x ## +" and other stuff. Return with Tok
618 // unmodified and with RHS as the next token to lex.
620 // Test for the Microsoft extension of /##/ turning into // here on the
622 if (PP
.getLangOpts().MicrosoftExt
&& Tok
.is(tok::slash
) &&
623 RHS
.is(tok::slash
)) {
624 HandleMicrosoftCommentPaste(Tok
);
628 // Do not emit the error when preprocessing assembler code.
629 if (!PP
.getLangOpts().AsmPreprocessor
) {
630 // Explicitly convert the token location to have proper expansion
631 // information so that the user knows where it came from.
632 SourceManager
&SM
= PP
.getSourceManager();
634 SM
.createExpansionLoc(PasteOpLoc
, ExpandLocStart
, ExpandLocEnd
, 2);
635 // If we're in microsoft extensions mode, downgrade this from a hard
636 // error to an extension that defaults to an error. This allows
638 PP
.Diag(Loc
, PP
.getLangOpts().MicrosoftExt
? diag::ext_pp_bad_paste_ms
639 : diag::err_pp_bad_paste
)
643 // An error has occurred so exit loop.
647 // Turn ## into 'unknown' to avoid # ## # from looking like a paste
649 if (Result
.is(tok::hashhash
))
650 Result
.setKind(tok::unknown
);
653 // Transfer properties of the LHS over the Result.
654 Result
.setFlagValue(Token::StartOfLine
, Tok
.isAtStartOfLine());
655 Result
.setFlagValue(Token::LeadingSpace
, Tok
.hasLeadingSpace());
657 // Finally, replace LHS with the result, consume the RHS, and iterate.
660 } while (!isAtEnd() && Tokens
[CurToken
].is(tok::hashhash
));
662 SourceLocation EndLoc
= Tokens
[CurToken
- 1].getLocation();
664 // The token's current location indicate where the token was lexed from. We
665 // need this information to compute the spelling of the token, but any
666 // diagnostics for the expanded token should appear as if the token was
667 // expanded from the full ## expression. Pull this information together into
668 // a new SourceLocation that captures all of this.
669 SourceManager
&SM
= PP
.getSourceManager();
670 if (StartLoc
.isFileID())
671 StartLoc
= getExpansionLocForMacroDefLoc(StartLoc
);
672 if (EndLoc
.isFileID())
673 EndLoc
= getExpansionLocForMacroDefLoc(EndLoc
);
674 FileID MacroFID
= SM
.getFileID(MacroExpansionStart
);
675 while (SM
.getFileID(StartLoc
) != MacroFID
)
676 StartLoc
= SM
.getImmediateExpansionRange(StartLoc
).first
;
677 while (SM
.getFileID(EndLoc
) != MacroFID
)
678 EndLoc
= SM
.getImmediateExpansionRange(EndLoc
).second
;
680 Tok
.setLocation(SM
.createExpansionLoc(Tok
.getLocation(), StartLoc
, EndLoc
,
683 // Now that we got the result token, it will be subject to expansion. Since
684 // token pasting re-lexes the result token in raw mode, identifier information
685 // isn't looked up. As such, if the result is an identifier, look up id info.
686 if (Tok
.is(tok::raw_identifier
)) {
687 // Look up the identifier info for the token. We disabled identifier lookup
688 // by saying we're skipping contents, so we need to do this manually.
689 PP
.LookUpIdentifierInfo(Tok
);
694 /// isNextTokenLParen - If the next token lexed will pop this macro off the
695 /// expansion stack, return 2. If the next unexpanded token is a '(', return
696 /// 1, otherwise return 0.
697 unsigned TokenLexer::isNextTokenLParen() const {
701 return Tokens
[CurToken
].is(tok::l_paren
);
704 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
705 /// preprocessor directive.
706 bool TokenLexer::isParsingPreprocessorDirective() const {
707 return Tokens
[NumTokens
-1].is(tok::eod
) && !isAtEnd();
710 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
711 /// together to form a comment that comments out everything in the current
712 /// macro, other active macros, and anything left on the current physical
713 /// source line of the expanded buffer. Handle this by returning the
714 /// first token on the next line.
715 void TokenLexer::HandleMicrosoftCommentPaste(Token
&Tok
) {
716 // We 'comment out' the rest of this macro by just ignoring the rest of the
717 // tokens that have not been lexed yet, if any.
719 // Since this must be a macro, mark the macro enabled now that it is no longer
721 assert(Macro
&& "Token streams can't paste comments");
722 Macro
->EnableMacro();
724 PP
.HandleMicrosoftCommentPaste(Tok
);
727 /// \brief If \arg loc is a file ID and points inside the current macro
728 /// definition, returns the appropriate source location pointing at the
729 /// macro expansion source location entry, otherwise it returns an invalid
732 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc
) const {
733 assert(ExpandLocStart
.isValid() && MacroExpansionStart
.isValid() &&
734 "Not appropriate for token streams");
735 assert(loc
.isValid() && loc
.isFileID());
737 SourceManager
&SM
= PP
.getSourceManager();
738 assert(SM
.isInSLocAddrSpace(loc
, MacroDefStart
, MacroDefLength
) &&
739 "Expected loc to come from the macro definition");
741 unsigned relativeOffset
= 0;
742 SM
.isInSLocAddrSpace(loc
, MacroDefStart
, MacroDefLength
, &relativeOffset
);
743 return MacroExpansionStart
.getLocWithOffset(relativeOffset
);
746 /// \brief Finds the tokens that are consecutive (from the same FileID)
747 /// creates a single SLocEntry, and assigns SourceLocations to each token that
748 /// point to that SLocEntry. e.g for
749 /// assert(foo == bar);
750 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
751 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
753 /// \arg begin_tokens will be updated to a position past all the found
754 /// consecutive tokens.
755 static void updateConsecutiveMacroArgTokens(SourceManager
&SM
,
756 SourceLocation InstLoc
,
757 Token
*&begin_tokens
,
758 Token
* end_tokens
) {
759 assert(begin_tokens
< end_tokens
);
761 SourceLocation FirstLoc
= begin_tokens
->getLocation();
762 SourceLocation CurLoc
= FirstLoc
;
764 // Compare the source location offset of tokens and group together tokens that
765 // are close, even if their locations point to different FileIDs. e.g.
767 // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs)
769 // |bar foo cake| (one SLocEntry chunk for all tokens)
771 // we can perform this "merge" since the token's spelling location depends
772 // on the relative offset.
774 Token
*NextTok
= begin_tokens
+ 1;
775 for (; NextTok
< end_tokens
; ++NextTok
) {
776 SourceLocation NextLoc
= NextTok
->getLocation();
777 if (CurLoc
.isFileID() != NextLoc
.isFileID())
778 break; // Token from different kind of FileID.
781 if (!SM
.isInSameSLocAddrSpace(CurLoc
, NextLoc
, &RelOffs
))
782 break; // Token from different local/loaded location.
783 // Check that token is not before the previous token or more than 50
784 // "characters" away.
785 if (RelOffs
< 0 || RelOffs
> 50)
790 // For the consecutive tokens, find the length of the SLocEntry to contain
792 Token
&LastConsecutiveTok
= *(NextTok
-1);
794 SM
.isInSameSLocAddrSpace(FirstLoc
, LastConsecutiveTok
.getLocation(),
796 unsigned FullLength
= LastRelOffs
+ LastConsecutiveTok
.getLength();
798 // Create a macro expansion SLocEntry that will "contain" all of the tokens.
799 SourceLocation Expansion
=
800 SM
.createMacroArgExpansionLoc(FirstLoc
, InstLoc
,FullLength
);
802 // Change the location of the tokens from the spelling location to the new
803 // expanded location.
804 for (; begin_tokens
< NextTok
; ++begin_tokens
) {
805 Token
&Tok
= *begin_tokens
;
807 SM
.isInSameSLocAddrSpace(FirstLoc
, Tok
.getLocation(), &RelOffs
);
808 Tok
.setLocation(Expansion
.getLocWithOffset(RelOffs
));
812 /// \brief Creates SLocEntries and updates the locations of macro argument
813 /// tokens to their new expanded locations.
815 /// \param ArgIdDefLoc the location of the macro argument id inside the macro
817 /// \param Tokens the macro argument tokens to update.
818 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc
,
821 SourceManager
&SM
= PP
.getSourceManager();
823 SourceLocation InstLoc
=
824 getExpansionLocForMacroDefLoc(ArgIdSpellLoc
);
826 while (begin_tokens
< end_tokens
) {
827 // If there's only one token just create a SLocEntry for it.
828 if (end_tokens
- begin_tokens
== 1) {
829 Token
&Tok
= *begin_tokens
;
830 Tok
.setLocation(SM
.createMacroArgExpansionLoc(Tok
.getLocation(),
836 updateConsecutiveMacroArgTokens(SM
, InstLoc
, begin_tokens
, end_tokens
);
840 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token
&Result
) {
841 AtStartOfLine
= Result
.isAtStartOfLine();
842 HasLeadingSpace
= Result
.hasLeadingSpace();