[AArch64] Default to SEH exception handling on MinGW
[llvm-complete.git] / lib / Support / FileCheck.cpp
blobc5946df4fec9d0b5779f54e986b560a2fc22ff1d
1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // FileCheck does a line-by line check of a file that validates whether it
10 // contains the expected content. This is useful for regression tests etc.
12 // This file implements most of the API that will be used by the FileCheck utility
13 // as well as various unittests.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Support/FileCheck.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/Support/FormatVariadic.h"
19 #include <cstdint>
20 #include <list>
21 #include <map>
22 #include <tuple>
23 #include <utility>
25 using namespace llvm;
27 llvm::Optional<std::string> FileCheckPatternSubstitution::getResult() const {
28 if (IsNumExpr) {
29 return utostr(NumExpr->getValue());
30 } else {
31 // Look up the value and escape it so that we can put it into the
32 // regex.
33 llvm::Optional<StringRef> VarVal = Context->getPatternVarValue(FromStr);
34 if (!VarVal)
35 return llvm::None;
36 return Regex::escape(*VarVal);
40 StringRef FileCheckPatternSubstitution::getUndefVarName() const {
41 // Parsing guarantees only @LINE is ever referenced and it is not undefined
42 // by ClearLocalVars.
43 if (IsNumExpr)
44 return StringRef();
46 if (!Context->getPatternVarValue(FromStr))
47 return FromStr;
49 return StringRef();
52 bool FileCheckPattern::isValidVarNameStart(char C) {
53 return C == '_' || isalpha(C);
56 bool FileCheckPattern::parseVariable(StringRef Str, bool &IsPseudo,
57 unsigned &TrailIdx) {
58 if (Str.empty())
59 return true;
61 bool ParsedOneChar = false;
62 unsigned I = 0;
63 IsPseudo = Str[0] == '@';
65 // Global vars start with '$'.
66 if (Str[0] == '$' || IsPseudo)
67 ++I;
69 for (unsigned E = Str.size(); I != E; ++I) {
70 if (!ParsedOneChar && !isValidVarNameStart(Str[I]))
71 return true;
73 // Variable names are composed of alphanumeric characters and underscores.
74 if (Str[I] != '_' && !isalnum(Str[I]))
75 break;
76 ParsedOneChar = true;
79 TrailIdx = I;
80 return false;
83 // StringRef holding all characters considered as horizontal whitespaces by
84 // FileCheck input canonicalization.
85 StringRef SpaceChars = " \t";
87 // Parsing helper function that strips the first character in S and returns it.
88 static char popFront(StringRef &S) {
89 char C = S.front();
90 S = S.drop_front();
91 return C;
94 FileCheckNumExpr *
95 FileCheckPattern::parseNumericExpression(StringRef Name, StringRef Trailer,
96 const SourceMgr &SM) const {
97 if (!Name.equals("@LINE")) {
98 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
99 "invalid pseudo numeric variable '" + Name + "'");
100 return nullptr;
103 // Check if this is a supported operation and select function to perform it.
104 Trailer = Trailer.ltrim(SpaceChars);
105 if (Trailer.empty())
106 return Context->makeNumExpr(LineNumber);
107 SMLoc OpLoc = SMLoc::getFromPointer(Trailer.data());
108 char Operator = popFront(Trailer);
110 // Parse right operand.
111 Trailer = Trailer.ltrim(SpaceChars);
112 if (Trailer.empty()) {
113 SM.PrintMessage(SMLoc::getFromPointer(Trailer.data()), SourceMgr::DK_Error,
114 "missing operand in numeric expression '" + Trailer + "'");
115 return nullptr;
117 uint64_t Offset;
118 if (Trailer.consumeInteger(10, Offset)) {
119 SM.PrintMessage(SMLoc::getFromPointer(Trailer.data()), SourceMgr::DK_Error,
120 "invalid offset in numeric expression '" + Trailer + "'");
121 return nullptr;
123 Trailer = Trailer.ltrim(SpaceChars);
124 if (!Trailer.empty()) {
125 SM.PrintMessage(SMLoc::getFromPointer(Trailer.data()), SourceMgr::DK_Error,
126 "unexpected characters at end of numeric expression '" +
127 Trailer + "'");
128 return nullptr;
131 uint64_t Value;
132 switch (Operator) {
133 case '+':
134 Value = LineNumber + Offset;
135 break;
136 case '-':
137 Value = LineNumber - Offset;
138 break;
139 default:
140 SM.PrintMessage(OpLoc, SourceMgr::DK_Error,
141 Twine("unsupported numeric operation '") + Twine(Operator) +
142 "'");
143 return nullptr;
145 return Context->makeNumExpr(Value);
148 /// Parses the given string into the Pattern.
150 /// \p Prefix provides which prefix is being matched, \p SM provides the
151 /// SourceMgr used for error reports, and \p LineNumber is the line number in
152 /// the input file from which the pattern string was read. Returns true in
153 /// case of an error, false otherwise.
154 bool FileCheckPattern::ParsePattern(StringRef PatternStr, StringRef Prefix,
155 SourceMgr &SM, unsigned LineNumber,
156 const FileCheckRequest &Req) {
157 bool MatchFullLinesHere = Req.MatchFullLines && CheckTy != Check::CheckNot;
159 this->LineNumber = LineNumber;
160 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
162 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
163 // Ignore trailing whitespace.
164 while (!PatternStr.empty() &&
165 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
166 PatternStr = PatternStr.substr(0, PatternStr.size() - 1);
168 // Check that there is something on the line.
169 if (PatternStr.empty() && CheckTy != Check::CheckEmpty) {
170 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
171 "found empty check string with prefix '" + Prefix + ":'");
172 return true;
175 if (!PatternStr.empty() && CheckTy == Check::CheckEmpty) {
176 SM.PrintMessage(
177 PatternLoc, SourceMgr::DK_Error,
178 "found non-empty check string for empty check with prefix '" + Prefix +
179 ":'");
180 return true;
183 if (CheckTy == Check::CheckEmpty) {
184 RegExStr = "(\n$)";
185 return false;
188 // Check to see if this is a fixed string, or if it has regex pieces.
189 if (!MatchFullLinesHere &&
190 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
191 PatternStr.find("[[") == StringRef::npos))) {
192 FixedStr = PatternStr;
193 return false;
196 if (MatchFullLinesHere) {
197 RegExStr += '^';
198 if (!Req.NoCanonicalizeWhiteSpace)
199 RegExStr += " *";
202 // Paren value #0 is for the fully matched string. Any new parenthesized
203 // values add from there.
204 unsigned CurParen = 1;
206 // Otherwise, there is at least one regex piece. Build up the regex pattern
207 // by escaping scary characters in fixed strings, building up one big regex.
208 while (!PatternStr.empty()) {
209 // RegEx matches.
210 if (PatternStr.startswith("{{")) {
211 // This is the start of a regex match. Scan for the }}.
212 size_t End = PatternStr.find("}}");
213 if (End == StringRef::npos) {
214 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
215 SourceMgr::DK_Error,
216 "found start of regex string with no end '}}'");
217 return true;
220 // Enclose {{}} patterns in parens just like [[]] even though we're not
221 // capturing the result for any purpose. This is required in case the
222 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
223 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
224 RegExStr += '(';
225 ++CurParen;
227 if (AddRegExToRegEx(PatternStr.substr(2, End - 2), CurParen, SM))
228 return true;
229 RegExStr += ')';
231 PatternStr = PatternStr.substr(End + 2);
232 continue;
235 // Pattern and numeric expression matches. Pattern expressions come in two
236 // forms: [[foo:.*]] and [[foo]]. The former matches .* (or some other
237 // regex) and assigns it to the FileCheck variable 'foo'. The latter
238 // substitutes foo's value. Numeric expressions start with a '#' sign after
239 // the double brackets and only have the substitution form. Pattern
240 // variables must satisfy the regular expression "[a-zA-Z_][0-9a-zA-Z_]*"
241 // to be valid, as this helps catch some common errors. Numeric expressions
242 // only support the @LINE pseudo numeric variable.
243 if (PatternStr.startswith("[[")) {
244 StringRef UnparsedPatternStr = PatternStr.substr(2);
245 // Find the closing bracket pair ending the match. End is going to be an
246 // offset relative to the beginning of the match string.
247 size_t End = FindRegexVarEnd(UnparsedPatternStr, SM);
248 StringRef MatchStr = UnparsedPatternStr.substr(0, End);
249 bool IsNumExpr = MatchStr.consume_front("#");
250 const char *RefTypeStr =
251 IsNumExpr ? "numeric expression" : "pattern variable";
253 if (End == StringRef::npos) {
254 SM.PrintMessage(
255 SMLoc::getFromPointer(PatternStr.data()), SourceMgr::DK_Error,
256 Twine("Invalid ") + RefTypeStr + " reference, no ]] found");
257 return true;
259 // Strip the subtitution we are parsing. End points to the start of the
260 // "]]" closing the expression so account for it in computing the index
261 // of the first unparsed character.
262 PatternStr = UnparsedPatternStr.substr(End + 2);
264 size_t VarEndIdx = MatchStr.find(":");
265 if (IsNumExpr)
266 MatchStr = MatchStr.ltrim(SpaceChars);
267 else {
268 size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t");
269 if (SpacePos != StringRef::npos) {
270 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data() + SpacePos),
271 SourceMgr::DK_Error, "unexpected whitespace");
272 return true;
276 // Get the regex name (e.g. "foo") and verify it is well formed.
277 bool IsPseudo;
278 unsigned TrailIdx;
279 if (parseVariable(MatchStr, IsPseudo, TrailIdx)) {
280 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data()),
281 SourceMgr::DK_Error, "invalid variable name");
282 return true;
285 size_t SubstInsertIdx = RegExStr.size();
286 FileCheckNumExpr *NumExpr;
288 StringRef Name = MatchStr.substr(0, TrailIdx);
289 StringRef Trailer = MatchStr.substr(TrailIdx);
290 bool IsVarDef = (VarEndIdx != StringRef::npos);
292 if (IsVarDef && (IsPseudo || !Trailer.consume_front(":"))) {
293 SM.PrintMessage(SMLoc::getFromPointer(MatchStr.data()),
294 SourceMgr::DK_Error,
295 "invalid name in pattern variable definition");
296 return true;
299 if (!IsVarDef && IsPseudo) {
300 NumExpr = parseNumericExpression(Name, Trailer, SM);
301 if (NumExpr == nullptr)
302 return true;
303 IsNumExpr = true;
306 // Handle [[foo]].
307 if (!IsVarDef) {
308 // Handle use of pattern variables that were defined earlier on the
309 // same line by emitting a backreference.
310 if (!IsNumExpr && VariableDefs.find(Name) != VariableDefs.end()) {
311 unsigned CaptureParen = VariableDefs[Name];
312 if (CaptureParen < 1 || CaptureParen > 9) {
313 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
314 SourceMgr::DK_Error,
315 "Can't back-reference more than 9 variables");
316 return true;
318 AddBackrefToRegEx(CaptureParen);
319 } else {
320 // Handle use of pattern variables ([[<var>]]) defined in previous
321 // CHECK pattern or use of a numeric expression.
322 FileCheckPatternSubstitution Substitution =
323 IsNumExpr ? FileCheckPatternSubstitution(Context, MatchStr,
324 NumExpr, SubstInsertIdx)
325 : FileCheckPatternSubstitution(Context, MatchStr,
326 SubstInsertIdx);
327 Substitutions.push_back(Substitution);
329 continue;
332 // Handle [[foo:.*]].
333 VariableDefs[Name] = CurParen;
334 RegExStr += '(';
335 ++CurParen;
337 if (AddRegExToRegEx(Trailer, CurParen, SM))
338 return true;
340 RegExStr += ')';
343 // Handle fixed string matches.
344 // Find the end, which is the start of the next regex.
345 size_t FixedMatchEnd = PatternStr.find("{{");
346 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
347 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
348 PatternStr = PatternStr.substr(FixedMatchEnd);
351 if (MatchFullLinesHere) {
352 if (!Req.NoCanonicalizeWhiteSpace)
353 RegExStr += " *";
354 RegExStr += '$';
357 return false;
360 bool FileCheckPattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) {
361 Regex R(RS);
362 std::string Error;
363 if (!R.isValid(Error)) {
364 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
365 "invalid regex: " + Error);
366 return true;
369 RegExStr += RS.str();
370 CurParen += R.getNumMatches();
371 return false;
374 void FileCheckPattern::AddBackrefToRegEx(unsigned BackrefNum) {
375 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
376 std::string Backref = std::string("\\") + std::string(1, '0' + BackrefNum);
377 RegExStr += Backref;
380 /// Matches the pattern string against the input buffer \p Buffer
382 /// This returns the position that is matched or npos if there is no match. If
383 /// there is a match, the size of the matched string is returned in \p
384 /// MatchLen.
386 /// The GlobalVariableTable StringMap in the FileCheckPatternContext class
387 /// instance provides the current values of FileCheck pattern variables and is
388 /// updated if this match defines new values.
389 size_t FileCheckPattern::match(StringRef Buffer, size_t &MatchLen) const {
390 // If this is the EOF pattern, match it immediately.
391 if (CheckTy == Check::CheckEOF) {
392 MatchLen = 0;
393 return Buffer.size();
396 // If this is a fixed string pattern, just match it now.
397 if (!FixedStr.empty()) {
398 MatchLen = FixedStr.size();
399 return Buffer.find(FixedStr);
402 // Regex match.
404 // If there are variable uses, we need to create a temporary string with the
405 // actual value.
406 StringRef RegExToMatch = RegExStr;
407 std::string TmpStr;
408 if (!Substitutions.empty()) {
409 TmpStr = RegExStr;
411 size_t InsertOffset = 0;
412 // Substitute all pattern variables and numeric expressions whose value is
413 // known just now. Use of pattern variables defined on the same line are
414 // handled by back-references.
415 for (const auto &Substitution : Substitutions) {
416 // Substitute and check for failure (e.g. use of undefined variable).
417 llvm::Optional<std::string> Value = Substitution.getResult();
418 if (!Value)
419 return StringRef::npos;
421 // Plop it into the regex at the adjusted offset.
422 TmpStr.insert(TmpStr.begin() + Substitution.getIndex() + InsertOffset,
423 Value->begin(), Value->end());
424 InsertOffset += Value->size();
427 // Match the newly constructed regex.
428 RegExToMatch = TmpStr;
431 SmallVector<StringRef, 4> MatchInfo;
432 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
433 return StringRef::npos;
435 // Successful regex match.
436 assert(!MatchInfo.empty() && "Didn't get any match");
437 StringRef FullMatch = MatchInfo[0];
439 // If this defines any pattern variables, remember their values.
440 for (const auto &VariableDef : VariableDefs) {
441 assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
442 Context->GlobalVariableTable[VariableDef.first] =
443 MatchInfo[VariableDef.second];
446 // Like CHECK-NEXT, CHECK-EMPTY's match range is considered to start after
447 // the required preceding newline, which is consumed by the pattern in the
448 // case of CHECK-EMPTY but not CHECK-NEXT.
449 size_t MatchStartSkip = CheckTy == Check::CheckEmpty;
450 MatchLen = FullMatch.size() - MatchStartSkip;
451 return FullMatch.data() - Buffer.data() + MatchStartSkip;
454 /// Computes an arbitrary estimate for the quality of matching this pattern at
455 /// the start of \p Buffer; a distance of zero should correspond to a perfect
456 /// match.
457 unsigned FileCheckPattern::computeMatchDistance(StringRef Buffer) const {
458 // Just compute the number of matching characters. For regular expressions, we
459 // just compare against the regex itself and hope for the best.
461 // FIXME: One easy improvement here is have the regex lib generate a single
462 // example regular expression which matches, and use that as the example
463 // string.
464 StringRef ExampleString(FixedStr);
465 if (ExampleString.empty())
466 ExampleString = RegExStr;
468 // Only compare up to the first line in the buffer, or the string size.
469 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
470 BufferPrefix = BufferPrefix.split('\n').first;
471 return BufferPrefix.edit_distance(ExampleString);
474 void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer,
475 SMRange MatchRange) const {
476 // Print what we know about substitutions. This covers both uses of pattern
477 // variables and numeric subsitutions.
478 if (!Substitutions.empty()) {
479 for (const auto &Substitution : Substitutions) {
480 SmallString<256> Msg;
481 raw_svector_ostream OS(Msg);
482 bool IsNumExpr = Substitution.isNumExpr();
483 llvm::Optional<std::string> MatchedValue = Substitution.getResult();
485 // Substitution failed or is not known at match time, print the undefined
486 // variable it uses.
487 if (!MatchedValue) {
488 StringRef UndefVarName = Substitution.getUndefVarName();
489 if (UndefVarName.empty())
490 continue;
491 OS << "uses undefined variable \"";
492 OS.write_escaped(UndefVarName) << "\"";
493 } else {
494 // Substitution succeeded. Print substituted value.
495 if (IsNumExpr)
496 OS << "with numeric expression \"";
497 else
498 OS << "with variable \"";
499 OS.write_escaped(Substitution.getFromString()) << "\" equal to \"";
500 OS.write_escaped(*MatchedValue) << "\"";
503 if (MatchRange.isValid())
504 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, OS.str(),
505 {MatchRange});
506 else
507 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
508 SourceMgr::DK_Note, OS.str());
513 static SMRange ProcessMatchResult(FileCheckDiag::MatchType MatchTy,
514 const SourceMgr &SM, SMLoc Loc,
515 Check::FileCheckType CheckTy,
516 StringRef Buffer, size_t Pos, size_t Len,
517 std::vector<FileCheckDiag> *Diags,
518 bool AdjustPrevDiag = false) {
519 SMLoc Start = SMLoc::getFromPointer(Buffer.data() + Pos);
520 SMLoc End = SMLoc::getFromPointer(Buffer.data() + Pos + Len);
521 SMRange Range(Start, End);
522 if (Diags) {
523 if (AdjustPrevDiag)
524 Diags->rbegin()->MatchTy = MatchTy;
525 else
526 Diags->emplace_back(SM, CheckTy, Loc, MatchTy, Range);
528 return Range;
531 void FileCheckPattern::printFuzzyMatch(
532 const SourceMgr &SM, StringRef Buffer,
533 std::vector<FileCheckDiag> *Diags) const {
534 // Attempt to find the closest/best fuzzy match. Usually an error happens
535 // because some string in the output didn't exactly match. In these cases, we
536 // would like to show the user a best guess at what "should have" matched, to
537 // save them having to actually check the input manually.
538 size_t NumLinesForward = 0;
539 size_t Best = StringRef::npos;
540 double BestQuality = 0;
542 // Use an arbitrary 4k limit on how far we will search.
543 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
544 if (Buffer[i] == '\n')
545 ++NumLinesForward;
547 // Patterns have leading whitespace stripped, so skip whitespace when
548 // looking for something which looks like a pattern.
549 if (Buffer[i] == ' ' || Buffer[i] == '\t')
550 continue;
552 // Compute the "quality" of this match as an arbitrary combination of the
553 // match distance and the number of lines skipped to get to this match.
554 unsigned Distance = computeMatchDistance(Buffer.substr(i));
555 double Quality = Distance + (NumLinesForward / 100.);
557 if (Quality < BestQuality || Best == StringRef::npos) {
558 Best = i;
559 BestQuality = Quality;
563 // Print the "possible intended match here" line if we found something
564 // reasonable and not equal to what we showed in the "scanning from here"
565 // line.
566 if (Best && Best != StringRef::npos && BestQuality < 50) {
567 SMRange MatchRange =
568 ProcessMatchResult(FileCheckDiag::MatchFuzzy, SM, getLoc(),
569 getCheckTy(), Buffer, Best, 0, Diags);
570 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note,
571 "possible intended match here");
573 // FIXME: If we wanted to be really friendly we would show why the match
574 // failed, as it can be hard to spot simple one character differences.
578 llvm::Optional<StringRef>
579 FileCheckPatternContext::getPatternVarValue(StringRef VarName) {
580 auto VarIter = GlobalVariableTable.find(VarName);
581 if (VarIter == GlobalVariableTable.end())
582 return llvm::None;
584 return VarIter->second;
587 template <class... Types>
588 FileCheckNumExpr *FileCheckPatternContext::makeNumExpr(Types... Args) {
589 NumExprs.emplace_back(new FileCheckNumExpr(Args...));
590 return NumExprs.back().get();
593 /// Finds the closing sequence of a regex variable usage or definition.
595 /// \p Str has to point in the beginning of the definition (right after the
596 /// opening sequence). Returns the offset of the closing sequence within Str,
597 /// or npos if it was not found.
598 size_t FileCheckPattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
599 // Offset keeps track of the current offset within the input Str
600 size_t Offset = 0;
601 // [...] Nesting depth
602 size_t BracketDepth = 0;
604 while (!Str.empty()) {
605 if (Str.startswith("]]") && BracketDepth == 0)
606 return Offset;
607 if (Str[0] == '\\') {
608 // Backslash escapes the next char within regexes, so skip them both.
609 Str = Str.substr(2);
610 Offset += 2;
611 } else {
612 switch (Str[0]) {
613 default:
614 break;
615 case '[':
616 BracketDepth++;
617 break;
618 case ']':
619 if (BracketDepth == 0) {
620 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
621 SourceMgr::DK_Error,
622 "missing closing \"]\" for regex variable");
623 exit(1);
625 BracketDepth--;
626 break;
628 Str = Str.substr(1);
629 Offset++;
633 return StringRef::npos;
636 /// Canonicalize whitespaces in the file. Line endings are replaced with
637 /// UNIX-style '\n'.
638 StringRef
639 llvm::FileCheck::CanonicalizeFile(MemoryBuffer &MB,
640 SmallVectorImpl<char> &OutputBuffer) {
641 OutputBuffer.reserve(MB.getBufferSize());
643 for (const char *Ptr = MB.getBufferStart(), *End = MB.getBufferEnd();
644 Ptr != End; ++Ptr) {
645 // Eliminate trailing dosish \r.
646 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
647 continue;
650 // If current char is not a horizontal whitespace or if horizontal
651 // whitespace canonicalization is disabled, dump it to output as is.
652 if (Req.NoCanonicalizeWhiteSpace || (*Ptr != ' ' && *Ptr != '\t')) {
653 OutputBuffer.push_back(*Ptr);
654 continue;
657 // Otherwise, add one space and advance over neighboring space.
658 OutputBuffer.push_back(' ');
659 while (Ptr + 1 != End && (Ptr[1] == ' ' || Ptr[1] == '\t'))
660 ++Ptr;
663 // Add a null byte and then return all but that byte.
664 OutputBuffer.push_back('\0');
665 return StringRef(OutputBuffer.data(), OutputBuffer.size() - 1);
668 FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
669 const Check::FileCheckType &CheckTy,
670 SMLoc CheckLoc, MatchType MatchTy,
671 SMRange InputRange)
672 : CheckTy(CheckTy), MatchTy(MatchTy) {
673 auto Start = SM.getLineAndColumn(InputRange.Start);
674 auto End = SM.getLineAndColumn(InputRange.End);
675 InputStartLine = Start.first;
676 InputStartCol = Start.second;
677 InputEndLine = End.first;
678 InputEndCol = End.second;
679 Start = SM.getLineAndColumn(CheckLoc);
680 CheckLine = Start.first;
681 CheckCol = Start.second;
684 static bool IsPartOfWord(char c) {
685 return (isalnum(c) || c == '-' || c == '_');
688 Check::FileCheckType &Check::FileCheckType::setCount(int C) {
689 assert(Count > 0 && "zero and negative counts are not supported");
690 assert((C == 1 || Kind == CheckPlain) &&
691 "count supported only for plain CHECK directives");
692 Count = C;
693 return *this;
696 // Get a description of the type.
697 std::string Check::FileCheckType::getDescription(StringRef Prefix) const {
698 switch (Kind) {
699 case Check::CheckNone:
700 return "invalid";
701 case Check::CheckPlain:
702 if (Count > 1)
703 return Prefix.str() + "-COUNT";
704 return Prefix;
705 case Check::CheckNext:
706 return Prefix.str() + "-NEXT";
707 case Check::CheckSame:
708 return Prefix.str() + "-SAME";
709 case Check::CheckNot:
710 return Prefix.str() + "-NOT";
711 case Check::CheckDAG:
712 return Prefix.str() + "-DAG";
713 case Check::CheckLabel:
714 return Prefix.str() + "-LABEL";
715 case Check::CheckEmpty:
716 return Prefix.str() + "-EMPTY";
717 case Check::CheckEOF:
718 return "implicit EOF";
719 case Check::CheckBadNot:
720 return "bad NOT";
721 case Check::CheckBadCount:
722 return "bad COUNT";
724 llvm_unreachable("unknown FileCheckType");
727 static std::pair<Check::FileCheckType, StringRef>
728 FindCheckType(StringRef Buffer, StringRef Prefix) {
729 if (Buffer.size() <= Prefix.size())
730 return {Check::CheckNone, StringRef()};
732 char NextChar = Buffer[Prefix.size()];
734 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
735 // Verify that the : is present after the prefix.
736 if (NextChar == ':')
737 return {Check::CheckPlain, Rest};
739 if (NextChar != '-')
740 return {Check::CheckNone, StringRef()};
742 if (Rest.consume_front("COUNT-")) {
743 int64_t Count;
744 if (Rest.consumeInteger(10, Count))
745 // Error happened in parsing integer.
746 return {Check::CheckBadCount, Rest};
747 if (Count <= 0 || Count > INT32_MAX)
748 return {Check::CheckBadCount, Rest};
749 if (!Rest.consume_front(":"))
750 return {Check::CheckBadCount, Rest};
751 return {Check::FileCheckType(Check::CheckPlain).setCount(Count), Rest};
754 if (Rest.consume_front("NEXT:"))
755 return {Check::CheckNext, Rest};
757 if (Rest.consume_front("SAME:"))
758 return {Check::CheckSame, Rest};
760 if (Rest.consume_front("NOT:"))
761 return {Check::CheckNot, Rest};
763 if (Rest.consume_front("DAG:"))
764 return {Check::CheckDAG, Rest};
766 if (Rest.consume_front("LABEL:"))
767 return {Check::CheckLabel, Rest};
769 if (Rest.consume_front("EMPTY:"))
770 return {Check::CheckEmpty, Rest};
772 // You can't combine -NOT with another suffix.
773 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
774 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
775 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:") ||
776 Rest.startswith("EMPTY-NOT:") || Rest.startswith("NOT-EMPTY:"))
777 return {Check::CheckBadNot, Rest};
779 return {Check::CheckNone, Rest};
782 // From the given position, find the next character after the word.
783 static size_t SkipWord(StringRef Str, size_t Loc) {
784 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
785 ++Loc;
786 return Loc;
789 /// Search the buffer for the first prefix in the prefix regular expression.
791 /// This searches the buffer using the provided regular expression, however it
792 /// enforces constraints beyond that:
793 /// 1) The found prefix must not be a suffix of something that looks like
794 /// a valid prefix.
795 /// 2) The found prefix must be followed by a valid check type suffix using \c
796 /// FindCheckType above.
798 /// Returns a pair of StringRefs into the Buffer, which combines:
799 /// - the first match of the regular expression to satisfy these two is
800 /// returned,
801 /// otherwise an empty StringRef is returned to indicate failure.
802 /// - buffer rewound to the location right after parsed suffix, for parsing
803 /// to continue from
805 /// If this routine returns a valid prefix, it will also shrink \p Buffer to
806 /// start at the beginning of the returned prefix, increment \p LineNumber for
807 /// each new line consumed from \p Buffer, and set \p CheckTy to the type of
808 /// check found by examining the suffix.
810 /// If no valid prefix is found, the state of Buffer, LineNumber, and CheckTy
811 /// is unspecified.
812 static std::pair<StringRef, StringRef>
813 FindFirstMatchingPrefix(Regex &PrefixRE, StringRef &Buffer,
814 unsigned &LineNumber, Check::FileCheckType &CheckTy) {
815 SmallVector<StringRef, 2> Matches;
817 while (!Buffer.empty()) {
818 // Find the first (longest) match using the RE.
819 if (!PrefixRE.match(Buffer, &Matches))
820 // No match at all, bail.
821 return {StringRef(), StringRef()};
823 StringRef Prefix = Matches[0];
824 Matches.clear();
826 assert(Prefix.data() >= Buffer.data() &&
827 Prefix.data() < Buffer.data() + Buffer.size() &&
828 "Prefix doesn't start inside of buffer!");
829 size_t Loc = Prefix.data() - Buffer.data();
830 StringRef Skipped = Buffer.substr(0, Loc);
831 Buffer = Buffer.drop_front(Loc);
832 LineNumber += Skipped.count('\n');
834 // Check that the matched prefix isn't a suffix of some other check-like
835 // word.
836 // FIXME: This is a very ad-hoc check. it would be better handled in some
837 // other way. Among other things it seems hard to distinguish between
838 // intentional and unintentional uses of this feature.
839 if (Skipped.empty() || !IsPartOfWord(Skipped.back())) {
840 // Now extract the type.
841 StringRef AfterSuffix;
842 std::tie(CheckTy, AfterSuffix) = FindCheckType(Buffer, Prefix);
844 // If we've found a valid check type for this prefix, we're done.
845 if (CheckTy != Check::CheckNone)
846 return {Prefix, AfterSuffix};
849 // If we didn't successfully find a prefix, we need to skip this invalid
850 // prefix and continue scanning. We directly skip the prefix that was
851 // matched and any additional parts of that check-like word.
852 Buffer = Buffer.drop_front(SkipWord(Buffer, Prefix.size()));
855 // We ran out of buffer while skipping partial matches so give up.
856 return {StringRef(), StringRef()};
859 /// Read the check file, which specifies the sequence of expected strings.
861 /// The strings are added to the CheckStrings vector. Returns true in case of
862 /// an error, false otherwise.
863 bool llvm::FileCheck::ReadCheckFile(
864 SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
865 std::vector<FileCheckString> &CheckStrings) {
866 if (PatternContext.defineCmdlineVariables(Req.GlobalDefines, SM))
867 return true;
869 std::vector<FileCheckPattern> ImplicitNegativeChecks;
870 for (const auto &PatternString : Req.ImplicitCheckNot) {
871 // Create a buffer with fake command line content in order to display the
872 // command line option responsible for the specific implicit CHECK-NOT.
873 std::string Prefix = "-implicit-check-not='";
874 std::string Suffix = "'";
875 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
876 Prefix + PatternString + Suffix, "command line");
878 StringRef PatternInBuffer =
879 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
880 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
882 ImplicitNegativeChecks.push_back(
883 FileCheckPattern(Check::CheckNot, &PatternContext));
884 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
885 "IMPLICIT-CHECK", SM, 0, Req);
888 std::vector<FileCheckPattern> DagNotMatches = ImplicitNegativeChecks;
890 // LineNumber keeps track of the line on which CheckPrefix instances are
891 // found.
892 unsigned LineNumber = 1;
894 while (1) {
895 Check::FileCheckType CheckTy;
897 // See if a prefix occurs in the memory buffer.
898 StringRef UsedPrefix;
899 StringRef AfterSuffix;
900 std::tie(UsedPrefix, AfterSuffix) =
901 FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
902 if (UsedPrefix.empty())
903 break;
904 assert(UsedPrefix.data() == Buffer.data() &&
905 "Failed to move Buffer's start forward, or pointed prefix outside "
906 "of the buffer!");
907 assert(AfterSuffix.data() >= Buffer.data() &&
908 AfterSuffix.data() < Buffer.data() + Buffer.size() &&
909 "Parsing after suffix doesn't start inside of buffer!");
911 // Location to use for error messages.
912 const char *UsedPrefixStart = UsedPrefix.data();
914 // Skip the buffer to the end of parsed suffix (or just prefix, if no good
915 // suffix was processed).
916 Buffer = AfterSuffix.empty() ? Buffer.drop_front(UsedPrefix.size())
917 : AfterSuffix;
919 // Complain about useful-looking but unsupported suffixes.
920 if (CheckTy == Check::CheckBadNot) {
921 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
922 "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
923 return true;
926 // Complain about invalid count specification.
927 if (CheckTy == Check::CheckBadCount) {
928 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Error,
929 "invalid count in -COUNT specification on prefix '" +
930 UsedPrefix + "'");
931 return true;
934 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
935 // leading whitespace.
936 if (!(Req.NoCanonicalizeWhiteSpace && Req.MatchFullLines))
937 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
939 // Scan ahead to the end of line.
940 size_t EOL = Buffer.find_first_of("\n\r");
942 // Remember the location of the start of the pattern, for diagnostics.
943 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
945 // Parse the pattern.
946 FileCheckPattern P(CheckTy, &PatternContext);
947 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber, Req))
948 return true;
950 // Verify that CHECK-LABEL lines do not define or use variables
951 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
952 SM.PrintMessage(
953 SMLoc::getFromPointer(UsedPrefixStart), SourceMgr::DK_Error,
954 "found '" + UsedPrefix + "-LABEL:'"
955 " with variable definition or use");
956 return true;
959 Buffer = Buffer.substr(EOL);
961 // Verify that CHECK-NEXT/SAME/EMPTY lines have at least one CHECK line before them.
962 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame ||
963 CheckTy == Check::CheckEmpty) &&
964 CheckStrings.empty()) {
965 StringRef Type = CheckTy == Check::CheckNext
966 ? "NEXT"
967 : CheckTy == Check::CheckEmpty ? "EMPTY" : "SAME";
968 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
969 SourceMgr::DK_Error,
970 "found '" + UsedPrefix + "-" + Type +
971 "' without previous '" + UsedPrefix + ": line");
972 return true;
975 // Handle CHECK-DAG/-NOT.
976 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
977 DagNotMatches.push_back(P);
978 continue;
981 // Okay, add the string we captured to the output vector and move on.
982 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
983 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
984 DagNotMatches = ImplicitNegativeChecks;
987 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
988 // prefix as a filler for the error message.
989 if (!DagNotMatches.empty()) {
990 CheckStrings.emplace_back(
991 FileCheckPattern(Check::CheckEOF, &PatternContext),
992 *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
993 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
996 if (CheckStrings.empty()) {
997 errs() << "error: no check strings found with prefix"
998 << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
999 auto I = Req.CheckPrefixes.begin();
1000 auto E = Req.CheckPrefixes.end();
1001 if (I != E) {
1002 errs() << "\'" << *I << ":'";
1003 ++I;
1005 for (; I != E; ++I)
1006 errs() << ", \'" << *I << ":'";
1008 errs() << '\n';
1009 return true;
1012 return false;
1015 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1016 StringRef Prefix, SMLoc Loc, const FileCheckPattern &Pat,
1017 int MatchedCount, StringRef Buffer, size_t MatchPos,
1018 size_t MatchLen, const FileCheckRequest &Req,
1019 std::vector<FileCheckDiag> *Diags) {
1020 bool PrintDiag = true;
1021 if (ExpectedMatch) {
1022 if (!Req.Verbose)
1023 return;
1024 if (!Req.VerboseVerbose && Pat.getCheckTy() == Check::CheckEOF)
1025 return;
1026 // Due to their verbosity, we don't print verbose diagnostics here if we're
1027 // gathering them for a different rendering, but we always print other
1028 // diagnostics.
1029 PrintDiag = !Diags;
1031 SMRange MatchRange = ProcessMatchResult(
1032 ExpectedMatch ? FileCheckDiag::MatchFoundAndExpected
1033 : FileCheckDiag::MatchFoundButExcluded,
1034 SM, Loc, Pat.getCheckTy(), Buffer, MatchPos, MatchLen, Diags);
1035 if (!PrintDiag)
1036 return;
1038 std::string Message = formatv("{0}: {1} string found in input",
1039 Pat.getCheckTy().getDescription(Prefix),
1040 (ExpectedMatch ? "expected" : "excluded"))
1041 .str();
1042 if (Pat.getCount() > 1)
1043 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1045 SM.PrintMessage(
1046 Loc, ExpectedMatch ? SourceMgr::DK_Remark : SourceMgr::DK_Error, Message);
1047 SM.PrintMessage(MatchRange.Start, SourceMgr::DK_Note, "found here",
1048 {MatchRange});
1049 Pat.printSubstitutions(SM, Buffer, MatchRange);
1052 static void PrintMatch(bool ExpectedMatch, const SourceMgr &SM,
1053 const FileCheckString &CheckStr, int MatchedCount,
1054 StringRef Buffer, size_t MatchPos, size_t MatchLen,
1055 FileCheckRequest &Req,
1056 std::vector<FileCheckDiag> *Diags) {
1057 PrintMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1058 MatchedCount, Buffer, MatchPos, MatchLen, Req, Diags);
1061 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1062 StringRef Prefix, SMLoc Loc,
1063 const FileCheckPattern &Pat, int MatchedCount,
1064 StringRef Buffer, bool VerboseVerbose,
1065 std::vector<FileCheckDiag> *Diags) {
1066 bool PrintDiag = true;
1067 if (!ExpectedMatch) {
1068 if (!VerboseVerbose)
1069 return;
1070 // Due to their verbosity, we don't print verbose diagnostics here if we're
1071 // gathering them for a different rendering, but we always print other
1072 // diagnostics.
1073 PrintDiag = !Diags;
1076 // If the current position is at the end of a line, advance to the start of
1077 // the next line.
1078 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1079 SMRange SearchRange = ProcessMatchResult(
1080 ExpectedMatch ? FileCheckDiag::MatchNoneButExpected
1081 : FileCheckDiag::MatchNoneAndExcluded,
1082 SM, Loc, Pat.getCheckTy(), Buffer, 0, Buffer.size(), Diags);
1083 if (!PrintDiag)
1084 return;
1086 // Print "not found" diagnostic.
1087 std::string Message = formatv("{0}: {1} string not found in input",
1088 Pat.getCheckTy().getDescription(Prefix),
1089 (ExpectedMatch ? "expected" : "excluded"))
1090 .str();
1091 if (Pat.getCount() > 1)
1092 Message += formatv(" ({0} out of {1})", MatchedCount, Pat.getCount()).str();
1093 SM.PrintMessage(
1094 Loc, ExpectedMatch ? SourceMgr::DK_Error : SourceMgr::DK_Remark, Message);
1096 // Print the "scanning from here" line.
1097 SM.PrintMessage(SearchRange.Start, SourceMgr::DK_Note, "scanning from here");
1099 // Allow the pattern to print additional information if desired.
1100 Pat.printSubstitutions(SM, Buffer);
1102 if (ExpectedMatch)
1103 Pat.printFuzzyMatch(SM, Buffer, Diags);
1106 static void PrintNoMatch(bool ExpectedMatch, const SourceMgr &SM,
1107 const FileCheckString &CheckStr, int MatchedCount,
1108 StringRef Buffer, bool VerboseVerbose,
1109 std::vector<FileCheckDiag> *Diags) {
1110 PrintNoMatch(ExpectedMatch, SM, CheckStr.Prefix, CheckStr.Loc, CheckStr.Pat,
1111 MatchedCount, Buffer, VerboseVerbose, Diags);
1114 /// Count the number of newlines in the specified range.
1115 static unsigned CountNumNewlinesBetween(StringRef Range,
1116 const char *&FirstNewLine) {
1117 unsigned NumNewLines = 0;
1118 while (1) {
1119 // Scan for newline.
1120 Range = Range.substr(Range.find_first_of("\n\r"));
1121 if (Range.empty())
1122 return NumNewLines;
1124 ++NumNewLines;
1126 // Handle \n\r and \r\n as a single newline.
1127 if (Range.size() > 1 && (Range[1] == '\n' || Range[1] == '\r') &&
1128 (Range[0] != Range[1]))
1129 Range = Range.substr(1);
1130 Range = Range.substr(1);
1132 if (NumNewLines == 1)
1133 FirstNewLine = Range.begin();
1137 /// Match check string and its "not strings" and/or "dag strings".
1138 size_t FileCheckString::Check(const SourceMgr &SM, StringRef Buffer,
1139 bool IsLabelScanMode, size_t &MatchLen,
1140 FileCheckRequest &Req,
1141 std::vector<FileCheckDiag> *Diags) const {
1142 size_t LastPos = 0;
1143 std::vector<const FileCheckPattern *> NotStrings;
1145 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1146 // bounds; we have not processed variable definitions within the bounded block
1147 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1148 // over the block again (including the last CHECK-LABEL) in normal mode.
1149 if (!IsLabelScanMode) {
1150 // Match "dag strings" (with mixed "not strings" if any).
1151 LastPos = CheckDag(SM, Buffer, NotStrings, Req, Diags);
1152 if (LastPos == StringRef::npos)
1153 return StringRef::npos;
1156 // Match itself from the last position after matching CHECK-DAG.
1157 size_t LastMatchEnd = LastPos;
1158 size_t FirstMatchPos = 0;
1159 // Go match the pattern Count times. Majority of patterns only match with
1160 // count 1 though.
1161 assert(Pat.getCount() != 0 && "pattern count can not be zero");
1162 for (int i = 1; i <= Pat.getCount(); i++) {
1163 StringRef MatchBuffer = Buffer.substr(LastMatchEnd);
1164 size_t CurrentMatchLen;
1165 // get a match at current start point
1166 size_t MatchPos = Pat.match(MatchBuffer, CurrentMatchLen);
1167 if (i == 1)
1168 FirstMatchPos = LastPos + MatchPos;
1170 // report
1171 if (MatchPos == StringRef::npos) {
1172 PrintNoMatch(true, SM, *this, i, MatchBuffer, Req.VerboseVerbose, Diags);
1173 return StringRef::npos;
1175 PrintMatch(true, SM, *this, i, MatchBuffer, MatchPos, CurrentMatchLen, Req,
1176 Diags);
1178 // move start point after the match
1179 LastMatchEnd += MatchPos + CurrentMatchLen;
1181 // Full match len counts from first match pos.
1182 MatchLen = LastMatchEnd - FirstMatchPos;
1184 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1185 // or CHECK-NOT
1186 if (!IsLabelScanMode) {
1187 size_t MatchPos = FirstMatchPos - LastPos;
1188 StringRef MatchBuffer = Buffer.substr(LastPos);
1189 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1191 // If this check is a "CHECK-NEXT", verify that the previous match was on
1192 // the previous line (i.e. that there is one newline between them).
1193 if (CheckNext(SM, SkippedRegion)) {
1194 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
1195 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1196 Diags, Req.Verbose);
1197 return StringRef::npos;
1200 // If this check is a "CHECK-SAME", verify that the previous match was on
1201 // the same line (i.e. that there is no newline between them).
1202 if (CheckSame(SM, SkippedRegion)) {
1203 ProcessMatchResult(FileCheckDiag::MatchFoundButWrongLine, SM, Loc,
1204 Pat.getCheckTy(), MatchBuffer, MatchPos, MatchLen,
1205 Diags, Req.Verbose);
1206 return StringRef::npos;
1209 // If this match had "not strings", verify that they don't exist in the
1210 // skipped region.
1211 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
1212 return StringRef::npos;
1215 return FirstMatchPos;
1218 /// Verify there is a single line in the given buffer.
1219 bool FileCheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1220 if (Pat.getCheckTy() != Check::CheckNext &&
1221 Pat.getCheckTy() != Check::CheckEmpty)
1222 return false;
1224 Twine CheckName =
1225 Prefix +
1226 Twine(Pat.getCheckTy() == Check::CheckEmpty ? "-EMPTY" : "-NEXT");
1228 // Count the number of newlines between the previous match and this one.
1229 const char *FirstNewLine = nullptr;
1230 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1232 if (NumNewLines == 0) {
1233 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1234 CheckName + ": is on the same line as previous match");
1235 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1236 "'next' match was here");
1237 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1238 "previous match ended here");
1239 return true;
1242 if (NumNewLines != 1) {
1243 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1244 CheckName +
1245 ": is not on the line after the previous match");
1246 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1247 "'next' match was here");
1248 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1249 "previous match ended here");
1250 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1251 "non-matching line after previous match is here");
1252 return true;
1255 return false;
1258 /// Verify there is no newline in the given buffer.
1259 bool FileCheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1260 if (Pat.getCheckTy() != Check::CheckSame)
1261 return false;
1263 // Count the number of newlines between the previous match and this one.
1264 const char *FirstNewLine = nullptr;
1265 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1267 if (NumNewLines != 0) {
1268 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1269 Prefix +
1270 "-SAME: is not on the same line as the previous match");
1271 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1272 "'next' match was here");
1273 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1274 "previous match ended here");
1275 return true;
1278 return false;
1281 /// Verify there's no "not strings" in the given buffer.
1282 bool FileCheckString::CheckNot(
1283 const SourceMgr &SM, StringRef Buffer,
1284 const std::vector<const FileCheckPattern *> &NotStrings,
1285 const FileCheckRequest &Req, std::vector<FileCheckDiag> *Diags) const {
1286 for (const FileCheckPattern *Pat : NotStrings) {
1287 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1289 size_t MatchLen = 0;
1290 size_t Pos = Pat->match(Buffer, MatchLen);
1292 if (Pos == StringRef::npos) {
1293 PrintNoMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer,
1294 Req.VerboseVerbose, Diags);
1295 continue;
1298 PrintMatch(false, SM, Prefix, Pat->getLoc(), *Pat, 1, Buffer, Pos, MatchLen,
1299 Req, Diags);
1301 return true;
1304 return false;
1307 /// Match "dag strings" and their mixed "not strings".
1308 size_t
1309 FileCheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1310 std::vector<const FileCheckPattern *> &NotStrings,
1311 const FileCheckRequest &Req,
1312 std::vector<FileCheckDiag> *Diags) const {
1313 if (DagNotStrings.empty())
1314 return 0;
1316 // The start of the search range.
1317 size_t StartPos = 0;
1319 struct MatchRange {
1320 size_t Pos;
1321 size_t End;
1323 // A sorted list of ranges for non-overlapping CHECK-DAG matches. Match
1324 // ranges are erased from this list once they are no longer in the search
1325 // range.
1326 std::list<MatchRange> MatchRanges;
1328 // We need PatItr and PatEnd later for detecting the end of a CHECK-DAG
1329 // group, so we don't use a range-based for loop here.
1330 for (auto PatItr = DagNotStrings.begin(), PatEnd = DagNotStrings.end();
1331 PatItr != PatEnd; ++PatItr) {
1332 const FileCheckPattern &Pat = *PatItr;
1333 assert((Pat.getCheckTy() == Check::CheckDAG ||
1334 Pat.getCheckTy() == Check::CheckNot) &&
1335 "Invalid CHECK-DAG or CHECK-NOT!");
1337 if (Pat.getCheckTy() == Check::CheckNot) {
1338 NotStrings.push_back(&Pat);
1339 continue;
1342 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1344 // CHECK-DAG always matches from the start.
1345 size_t MatchLen = 0, MatchPos = StartPos;
1347 // Search for a match that doesn't overlap a previous match in this
1348 // CHECK-DAG group.
1349 for (auto MI = MatchRanges.begin(), ME = MatchRanges.end(); true; ++MI) {
1350 StringRef MatchBuffer = Buffer.substr(MatchPos);
1351 size_t MatchPosBuf = Pat.match(MatchBuffer, MatchLen);
1352 // With a group of CHECK-DAGs, a single mismatching means the match on
1353 // that group of CHECK-DAGs fails immediately.
1354 if (MatchPosBuf == StringRef::npos) {
1355 PrintNoMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, MatchBuffer,
1356 Req.VerboseVerbose, Diags);
1357 return StringRef::npos;
1359 // Re-calc it as the offset relative to the start of the original string.
1360 MatchPos += MatchPosBuf;
1361 if (Req.VerboseVerbose)
1362 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
1363 MatchLen, Req, Diags);
1364 MatchRange M{MatchPos, MatchPos + MatchLen};
1365 if (Req.AllowDeprecatedDagOverlap) {
1366 // We don't need to track all matches in this mode, so we just maintain
1367 // one match range that encompasses the current CHECK-DAG group's
1368 // matches.
1369 if (MatchRanges.empty())
1370 MatchRanges.insert(MatchRanges.end(), M);
1371 else {
1372 auto Block = MatchRanges.begin();
1373 Block->Pos = std::min(Block->Pos, M.Pos);
1374 Block->End = std::max(Block->End, M.End);
1376 break;
1378 // Iterate previous matches until overlapping match or insertion point.
1379 bool Overlap = false;
1380 for (; MI != ME; ++MI) {
1381 if (M.Pos < MI->End) {
1382 // !Overlap => New match has no overlap and is before this old match.
1383 // Overlap => New match overlaps this old match.
1384 Overlap = MI->Pos < M.End;
1385 break;
1388 if (!Overlap) {
1389 // Insert non-overlapping match into list.
1390 MatchRanges.insert(MI, M);
1391 break;
1393 if (Req.VerboseVerbose) {
1394 // Due to their verbosity, we don't print verbose diagnostics here if
1395 // we're gathering them for a different rendering, but we always print
1396 // other diagnostics.
1397 if (!Diags) {
1398 SMLoc OldStart = SMLoc::getFromPointer(Buffer.data() + MI->Pos);
1399 SMLoc OldEnd = SMLoc::getFromPointer(Buffer.data() + MI->End);
1400 SMRange OldRange(OldStart, OldEnd);
1401 SM.PrintMessage(OldStart, SourceMgr::DK_Note,
1402 "match discarded, overlaps earlier DAG match here",
1403 {OldRange});
1404 } else
1405 Diags->rbegin()->MatchTy = FileCheckDiag::MatchFoundButDiscarded;
1407 MatchPos = MI->End;
1409 if (!Req.VerboseVerbose)
1410 PrintMatch(true, SM, Prefix, Pat.getLoc(), Pat, 1, Buffer, MatchPos,
1411 MatchLen, Req, Diags);
1413 // Handle the end of a CHECK-DAG group.
1414 if (std::next(PatItr) == PatEnd ||
1415 std::next(PatItr)->getCheckTy() == Check::CheckNot) {
1416 if (!NotStrings.empty()) {
1417 // If there are CHECK-NOTs between two CHECK-DAGs or from CHECK to
1418 // CHECK-DAG, verify that there are no 'not' strings occurred in that
1419 // region.
1420 StringRef SkippedRegion =
1421 Buffer.slice(StartPos, MatchRanges.begin()->Pos);
1422 if (CheckNot(SM, SkippedRegion, NotStrings, Req, Diags))
1423 return StringRef::npos;
1424 // Clear "not strings".
1425 NotStrings.clear();
1427 // All subsequent CHECK-DAGs and CHECK-NOTs should be matched from the
1428 // end of this CHECK-DAG group's match range.
1429 StartPos = MatchRanges.rbegin()->End;
1430 // Don't waste time checking for (impossible) overlaps before that.
1431 MatchRanges.clear();
1435 return StartPos;
1438 // A check prefix must contain only alphanumeric, hyphens and underscores.
1439 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1440 Regex Validator("^[a-zA-Z0-9_-]*$");
1441 return Validator.match(CheckPrefix);
1444 bool llvm::FileCheck::ValidateCheckPrefixes() {
1445 StringSet<> PrefixSet;
1447 for (StringRef Prefix : Req.CheckPrefixes) {
1448 // Reject empty prefixes.
1449 if (Prefix == "")
1450 return false;
1452 if (!PrefixSet.insert(Prefix).second)
1453 return false;
1455 if (!ValidateCheckPrefix(Prefix))
1456 return false;
1459 return true;
1462 // Combines the check prefixes into a single regex so that we can efficiently
1463 // scan for any of the set.
1465 // The semantics are that the longest-match wins which matches our regex
1466 // library.
1467 Regex llvm::FileCheck::buildCheckPrefixRegex() {
1468 // I don't think there's a way to specify an initial value for cl::list,
1469 // so if nothing was specified, add the default
1470 if (Req.CheckPrefixes.empty())
1471 Req.CheckPrefixes.push_back("CHECK");
1473 // We already validated the contents of CheckPrefixes so just concatenate
1474 // them as alternatives.
1475 SmallString<32> PrefixRegexStr;
1476 for (StringRef Prefix : Req.CheckPrefixes) {
1477 if (Prefix != Req.CheckPrefixes.front())
1478 PrefixRegexStr.push_back('|');
1480 PrefixRegexStr.append(Prefix);
1483 return Regex(PrefixRegexStr);
1486 bool FileCheckPatternContext::defineCmdlineVariables(
1487 std::vector<std::string> &CmdlineDefines, SourceMgr &SM) {
1488 assert(GlobalVariableTable.empty() &&
1489 "Overriding defined variable with command-line variable definitions");
1491 if (CmdlineDefines.empty())
1492 return false;
1494 // Create a string representing the vector of command-line definitions. Each
1495 // definition is on its own line and prefixed with a definition number to
1496 // clarify which definition a given diagnostic corresponds to.
1497 unsigned I = 0;
1498 bool ErrorFound = false;
1499 std::string CmdlineDefsDiag;
1500 StringRef Prefix1 = "Global define #";
1501 StringRef Prefix2 = ": ";
1502 for (StringRef CmdlineDef : CmdlineDefines)
1503 CmdlineDefsDiag +=
1504 (Prefix1 + Twine(++I) + Prefix2 + CmdlineDef + "\n").str();
1506 std::unique_ptr<MemoryBuffer> CmdLineDefsDiagBuffer =
1507 MemoryBuffer::getMemBufferCopy(CmdlineDefsDiag, "Global defines");
1508 StringRef CmdlineDefsDiagRef = CmdLineDefsDiagBuffer->getBuffer();
1509 SM.AddNewSourceBuffer(std::move(CmdLineDefsDiagBuffer), SMLoc());
1511 SmallVector<StringRef, 4> CmdlineDefsDiagVec;
1512 CmdlineDefsDiagRef.split(CmdlineDefsDiagVec, '\n', -1 /*MaxSplit*/,
1513 false /*KeepEmpty*/);
1514 for (StringRef CmdlineDefDiag : CmdlineDefsDiagVec) {
1515 unsigned NameStart = CmdlineDefDiag.find(Prefix2) + Prefix2.size();
1516 if (CmdlineDefDiag.substr(NameStart).find('=') == StringRef::npos) {
1517 SM.PrintMessage(SMLoc::getFromPointer(CmdlineDefDiag.data()),
1518 SourceMgr::DK_Error,
1519 "Missing equal sign in global definition");
1520 ErrorFound = true;
1521 continue;
1523 std::pair<StringRef, StringRef> CmdlineNameVal =
1524 CmdlineDefDiag.substr(NameStart).split('=');
1525 StringRef Name = CmdlineNameVal.first;
1526 bool IsPseudo;
1527 unsigned TrailIdx;
1528 if (FileCheckPattern::parseVariable(Name, IsPseudo, TrailIdx) || IsPseudo ||
1529 TrailIdx != Name.size() || Name.empty()) {
1530 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
1531 "invalid name for variable definition '" + Name + "'");
1532 ErrorFound = true;
1533 continue;
1535 GlobalVariableTable.insert(CmdlineNameVal);
1538 return ErrorFound;
1541 void FileCheckPatternContext::clearLocalVars() {
1542 SmallVector<StringRef, 16> LocalPatternVars, LocalNumericVars;
1543 for (const StringMapEntry<StringRef> &Var : GlobalVariableTable)
1544 if (Var.first()[0] != '$')
1545 LocalPatternVars.push_back(Var.first());
1547 for (const auto &Var : LocalPatternVars)
1548 GlobalVariableTable.erase(Var);
1551 /// Check the input to FileCheck provided in the \p Buffer against the \p
1552 /// CheckStrings read from the check file.
1554 /// Returns false if the input fails to satisfy the checks.
1555 bool llvm::FileCheck::CheckInput(SourceMgr &SM, StringRef Buffer,
1556 ArrayRef<FileCheckString> CheckStrings,
1557 std::vector<FileCheckDiag> *Diags) {
1558 bool ChecksFailed = false;
1560 unsigned i = 0, j = 0, e = CheckStrings.size();
1561 while (true) {
1562 StringRef CheckRegion;
1563 if (j == e) {
1564 CheckRegion = Buffer;
1565 } else {
1566 const FileCheckString &CheckLabelStr = CheckStrings[j];
1567 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1568 ++j;
1569 continue;
1572 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1573 size_t MatchLabelLen = 0;
1574 size_t MatchLabelPos =
1575 CheckLabelStr.Check(SM, Buffer, true, MatchLabelLen, Req, Diags);
1576 if (MatchLabelPos == StringRef::npos)
1577 // Immediately bail if CHECK-LABEL fails, nothing else we can do.
1578 return false;
1580 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1581 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1582 ++j;
1585 if (Req.EnableVarScope)
1586 PatternContext.clearLocalVars();
1588 for (; i != j; ++i) {
1589 const FileCheckString &CheckStr = CheckStrings[i];
1591 // Check each string within the scanned region, including a second check
1592 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1593 size_t MatchLen = 0;
1594 size_t MatchPos =
1595 CheckStr.Check(SM, CheckRegion, false, MatchLen, Req, Diags);
1597 if (MatchPos == StringRef::npos) {
1598 ChecksFailed = true;
1599 i = j;
1600 break;
1603 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1606 if (j == e)
1607 break;
1610 // Success if no checks failed.
1611 return !ChecksFailed;