[clang-format] Fix a bug in aligning comments above PPDirective (#72791)
[llvm-project.git] / clang / unittests / AST / CommentLexer.cpp
blob1e7bad89898f4c23d18d3d89f0492d475136c983
1 //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "clang/AST/CommentLexer.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/Basic/CommentOptions.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticOptions.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "gtest/gtest.h"
18 #include <vector>
20 using namespace llvm;
21 using namespace clang;
23 namespace clang {
24 namespace comments {
26 namespace {
27 class CommentLexerTest : public ::testing::Test {
28 protected:
29 CommentLexerTest()
30 : FileMgr(FileMgrOpts),
31 DiagID(new DiagnosticIDs()),
32 Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
33 SourceMgr(Diags, FileMgr),
34 Traits(Allocator, CommentOptions()) {
37 FileSystemOptions FileMgrOpts;
38 FileManager FileMgr;
39 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
40 DiagnosticsEngine Diags;
41 SourceManager SourceMgr;
42 llvm::BumpPtrAllocator Allocator;
43 CommandTraits Traits;
45 void lexString(const char *Source, std::vector<Token> &Toks);
47 StringRef getCommandName(const Token &Tok) {
48 return Traits.getCommandInfo(Tok.getCommandID())->Name;
51 StringRef getVerbatimBlockName(const Token &Tok) {
52 return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
55 StringRef getVerbatimLineName(const Token &Tok) {
56 return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
60 void CommentLexerTest::lexString(const char *Source,
61 std::vector<Token> &Toks) {
62 std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Source);
63 FileID File = SourceMgr.createFileID(std::move(Buf));
64 SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
66 Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source));
68 while (1) {
69 Token Tok;
70 L.lex(Tok);
71 if (Tok.is(tok::eof))
72 break;
73 Toks.push_back(Tok);
77 } // unnamed namespace
79 // Empty source range should be handled.
80 TEST_F(CommentLexerTest, Basic1) {
81 const char *Source = "";
82 std::vector<Token> Toks;
84 lexString(Source, Toks);
86 ASSERT_EQ(0U, Toks.size());
89 // Empty comments should be handled.
90 TEST_F(CommentLexerTest, Basic2) {
91 const char *Sources[] = {
92 "//", "///", "//!", "///<", "//!<"
94 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
95 std::vector<Token> Toks;
97 lexString(Sources[i], Toks);
99 ASSERT_EQ(1U, Toks.size());
101 ASSERT_EQ(tok::newline, Toks[0].getKind());
105 // Empty comments should be handled.
106 TEST_F(CommentLexerTest, Basic3) {
107 const char *Sources[] = {
108 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
110 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
111 std::vector<Token> Toks;
113 lexString(Sources[i], Toks);
115 ASSERT_EQ(2U, Toks.size());
117 ASSERT_EQ(tok::newline, Toks[0].getKind());
118 ASSERT_EQ(tok::newline, Toks[1].getKind());
122 // Single comment with plain text.
123 TEST_F(CommentLexerTest, Basic4) {
124 const char *Sources[] = {
125 "// Meow", "/// Meow", "//! Meow",
126 "// Meow\n", "// Meow\r\n", "//! Meow\r",
129 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
130 std::vector<Token> Toks;
132 lexString(Sources[i], Toks);
134 ASSERT_EQ(2U, Toks.size());
136 ASSERT_EQ(tok::text, Toks[0].getKind());
137 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
139 ASSERT_EQ(tok::newline, Toks[1].getKind());
143 // Single comment with plain text.
144 TEST_F(CommentLexerTest, Basic5) {
145 const char *Sources[] = {
146 "/* Meow*/", "/** Meow*/", "/*! Meow*/"
149 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
150 std::vector<Token> Toks;
152 lexString(Sources[i], Toks);
154 ASSERT_EQ(3U, Toks.size());
156 ASSERT_EQ(tok::text, Toks[0].getKind());
157 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
159 ASSERT_EQ(tok::newline, Toks[1].getKind());
160 ASSERT_EQ(tok::newline, Toks[2].getKind());
164 // Test newline escaping.
165 TEST_F(CommentLexerTest, Basic6) {
166 const char *Sources[] = {
167 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n",
168 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
169 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r"
172 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
173 std::vector<Token> Toks;
175 lexString(Sources[i], Toks);
177 ASSERT_EQ(10U, Toks.size());
179 ASSERT_EQ(tok::text, Toks[0].getKind());
180 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
181 ASSERT_EQ(tok::text, Toks[1].getKind());
182 ASSERT_EQ(StringRef("\\"), Toks[1].getText());
183 ASSERT_EQ(tok::newline, Toks[2].getKind());
185 ASSERT_EQ(tok::text, Toks[3].getKind());
186 ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
187 ASSERT_EQ(tok::text, Toks[4].getKind());
188 ASSERT_EQ(StringRef("\\"), Toks[4].getText());
189 ASSERT_EQ(tok::text, Toks[5].getKind());
190 ASSERT_EQ(StringRef(" "), Toks[5].getText());
191 ASSERT_EQ(tok::newline, Toks[6].getKind());
193 ASSERT_EQ(tok::text, Toks[7].getKind());
194 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
195 ASSERT_EQ(tok::newline, Toks[8].getKind());
197 ASSERT_EQ(tok::newline, Toks[9].getKind());
201 // Check that we skip C-style aligned stars correctly.
202 TEST_F(CommentLexerTest, Basic7) {
203 const char *Source =
204 "/* Aaa\n"
205 " * Bbb\r\n"
206 "\t* Ccc\n"
207 " ! Ddd\n"
208 " * Eee\n"
209 " ** Fff\n"
210 " */";
211 std::vector<Token> Toks;
213 lexString(Source, Toks);
215 ASSERT_EQ(15U, Toks.size());
217 ASSERT_EQ(tok::text, Toks[0].getKind());
218 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
219 ASSERT_EQ(tok::newline, Toks[1].getKind());
221 ASSERT_EQ(tok::text, Toks[2].getKind());
222 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
223 ASSERT_EQ(tok::newline, Toks[3].getKind());
225 ASSERT_EQ(tok::text, Toks[4].getKind());
226 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
227 ASSERT_EQ(tok::newline, Toks[5].getKind());
229 ASSERT_EQ(tok::text, Toks[6].getKind());
230 ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText());
231 ASSERT_EQ(tok::newline, Toks[7].getKind());
233 ASSERT_EQ(tok::text, Toks[8].getKind());
234 ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
235 ASSERT_EQ(tok::newline, Toks[9].getKind());
237 ASSERT_EQ(tok::text, Toks[10].getKind());
238 ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
239 ASSERT_EQ(tok::newline, Toks[11].getKind());
241 ASSERT_EQ(tok::text, Toks[12].getKind());
242 ASSERT_EQ(StringRef(" "), Toks[12].getText());
244 ASSERT_EQ(tok::newline, Toks[13].getKind());
245 ASSERT_EQ(tok::newline, Toks[14].getKind());
248 // A command marker followed by comment end.
249 TEST_F(CommentLexerTest, DoxygenCommand1) {
250 const char *Sources[] = { "//@", "///@", "//!@" };
251 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
252 std::vector<Token> Toks;
254 lexString(Sources[i], Toks);
256 ASSERT_EQ(2U, Toks.size());
258 ASSERT_EQ(tok::text, Toks[0].getKind());
259 ASSERT_EQ(StringRef("@"), Toks[0].getText());
261 ASSERT_EQ(tok::newline, Toks[1].getKind());
265 // A command marker followed by comment end.
266 TEST_F(CommentLexerTest, DoxygenCommand2) {
267 const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
268 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
269 std::vector<Token> Toks;
271 lexString(Sources[i], Toks);
273 ASSERT_EQ(3U, Toks.size());
275 ASSERT_EQ(tok::text, Toks[0].getKind());
276 ASSERT_EQ(StringRef("@"), Toks[0].getText());
278 ASSERT_EQ(tok::newline, Toks[1].getKind());
279 ASSERT_EQ(tok::newline, Toks[2].getKind());
283 // A command marker followed by comment end.
284 TEST_F(CommentLexerTest, DoxygenCommand3) {
285 const char *Sources[] = { "/*\\*/", "/**\\*/" };
286 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
287 std::vector<Token> Toks;
289 lexString(Sources[i], Toks);
291 ASSERT_EQ(3U, Toks.size());
293 ASSERT_EQ(tok::text, Toks[0].getKind());
294 ASSERT_EQ(StringRef("\\"), Toks[0].getText());
296 ASSERT_EQ(tok::newline, Toks[1].getKind());
297 ASSERT_EQ(tok::newline, Toks[2].getKind());
301 // Doxygen escape sequences.
302 TEST_F(CommentLexerTest, DoxygenCommand4) {
303 const char *Sources[] = {
304 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
305 "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
307 const char *Text[] = {
308 " ",
309 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
310 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ",
311 "::", ""
314 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
315 std::vector<Token> Toks;
317 lexString(Sources[i], Toks);
319 ASSERT_EQ(std::size(Text), Toks.size());
321 for (size_t j = 0, e = Toks.size(); j != e; j++) {
322 if(Toks[j].is(tok::text)) {
323 ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
324 << "index " << i;
330 // A command marker followed by a non-letter that is not a part of an escape
331 // sequence.
332 TEST_F(CommentLexerTest, DoxygenCommand5) {
333 const char *Source = "/// \\^ \\0";
334 std::vector<Token> Toks;
336 lexString(Source, Toks);
338 ASSERT_EQ(6U, Toks.size());
340 ASSERT_EQ(tok::text, Toks[0].getKind());
341 ASSERT_EQ(StringRef(" "), Toks[0].getText());
343 ASSERT_EQ(tok::text, Toks[1].getKind());
344 ASSERT_EQ(StringRef("\\"), Toks[1].getText());
346 ASSERT_EQ(tok::text, Toks[2].getKind());
347 ASSERT_EQ(StringRef("^ "), Toks[2].getText());
349 ASSERT_EQ(tok::text, Toks[3].getKind());
350 ASSERT_EQ(StringRef("\\"), Toks[3].getText());
352 ASSERT_EQ(tok::text, Toks[4].getKind());
353 ASSERT_EQ(StringRef("0"), Toks[4].getText());
355 ASSERT_EQ(tok::newline, Toks[5].getKind());
358 TEST_F(CommentLexerTest, DoxygenCommand6) {
359 const char *Source = "/// \\brief Aaa.";
360 std::vector<Token> Toks;
362 lexString(Source, Toks);
364 ASSERT_EQ(4U, Toks.size());
366 ASSERT_EQ(tok::text, Toks[0].getKind());
367 ASSERT_EQ(StringRef(" "), Toks[0].getText());
369 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
370 ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
372 ASSERT_EQ(tok::text, Toks[2].getKind());
373 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
375 ASSERT_EQ(tok::newline, Toks[3].getKind());
378 TEST_F(CommentLexerTest, DoxygenCommand7) {
379 const char *Source = "/// \\em\\em \\em\t\\em\n";
380 std::vector<Token> Toks;
382 lexString(Source, Toks);
384 ASSERT_EQ(8U, Toks.size());
386 ASSERT_EQ(tok::text, Toks[0].getKind());
387 ASSERT_EQ(StringRef(" "), Toks[0].getText());
389 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
390 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
392 ASSERT_EQ(tok::backslash_command, Toks[2].getKind());
393 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
395 ASSERT_EQ(tok::text, Toks[3].getKind());
396 ASSERT_EQ(StringRef(" "), Toks[3].getText());
398 ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
399 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
401 ASSERT_EQ(tok::text, Toks[5].getKind());
402 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
404 ASSERT_EQ(tok::backslash_command, Toks[6].getKind());
405 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
407 ASSERT_EQ(tok::newline, Toks[7].getKind());
410 TEST_F(CommentLexerTest, DoxygenCommand8) {
411 const char *Source = "/// @em@em @em\t@em\n";
412 std::vector<Token> Toks;
414 lexString(Source, Toks);
416 ASSERT_EQ(8U, Toks.size());
418 ASSERT_EQ(tok::text, Toks[0].getKind());
419 ASSERT_EQ(StringRef(" "), Toks[0].getText());
421 ASSERT_EQ(tok::at_command, Toks[1].getKind());
422 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
424 ASSERT_EQ(tok::at_command, Toks[2].getKind());
425 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
427 ASSERT_EQ(tok::text, Toks[3].getKind());
428 ASSERT_EQ(StringRef(" "), Toks[3].getText());
430 ASSERT_EQ(tok::at_command, Toks[4].getKind());
431 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
433 ASSERT_EQ(tok::text, Toks[5].getKind());
434 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
436 ASSERT_EQ(tok::at_command, Toks[6].getKind());
437 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
439 ASSERT_EQ(tok::newline, Toks[7].getKind());
442 TEST_F(CommentLexerTest, DoxygenCommand9) {
443 const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
444 std::vector<Token> Toks;
446 lexString(Source, Toks);
448 ASSERT_EQ(8U, Toks.size());
450 ASSERT_EQ(tok::text, Toks[0].getKind());
451 ASSERT_EQ(StringRef(" "), Toks[0].getText());
453 ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
454 ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
456 ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
457 ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
459 ASSERT_EQ(tok::text, Toks[3].getKind());
460 ASSERT_EQ(StringRef(" "), Toks[3].getText());
462 ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
463 ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
465 ASSERT_EQ(tok::text, Toks[5].getKind());
466 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
468 ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
469 ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
471 ASSERT_EQ(tok::newline, Toks[7].getKind());
474 TEST_F(CommentLexerTest, DoxygenCommand10) {
475 const char *Source = "// \\c\n";
476 std::vector<Token> Toks;
478 lexString(Source, Toks);
480 ASSERT_EQ(3U, Toks.size());
482 ASSERT_EQ(tok::text, Toks[0].getKind());
483 ASSERT_EQ(StringRef(" "), Toks[0].getText());
485 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
486 ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
488 ASSERT_EQ(tok::newline, Toks[2].getKind());
491 TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
492 const char *Source =
493 "/// \\NewBlockCommand Aaa.\n"
494 "/// @NewBlockCommand Aaa.\n";
496 Traits.registerBlockCommand(StringRef("NewBlockCommand"));
498 std::vector<Token> Toks;
500 lexString(Source, Toks);
502 ASSERT_EQ(8U, Toks.size());
504 ASSERT_EQ(tok::text, Toks[0].getKind());
505 ASSERT_EQ(StringRef(" "), Toks[0].getText());
507 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
508 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
510 ASSERT_EQ(tok::text, Toks[2].getKind());
511 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
513 ASSERT_EQ(tok::newline, Toks[3].getKind());
515 ASSERT_EQ(tok::text, Toks[4].getKind());
516 ASSERT_EQ(StringRef(" "), Toks[4].getText());
518 ASSERT_EQ(tok::at_command, Toks[5].getKind());
519 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
521 ASSERT_EQ(tok::text, Toks[6].getKind());
522 ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
524 ASSERT_EQ(tok::newline, Toks[7].getKind());
527 TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
528 const char *Source =
529 "/// \\Foo\n"
530 "/// \\Bar Baz\n"
531 "/// \\Blech quux=corge\n";
533 Traits.registerBlockCommand(StringRef("Foo"));
534 Traits.registerBlockCommand(StringRef("Bar"));
535 Traits.registerBlockCommand(StringRef("Blech"));
537 std::vector<Token> Toks;
539 lexString(Source, Toks);
541 ASSERT_EQ(11U, Toks.size());
543 ASSERT_EQ(tok::text, Toks[0].getKind());
544 ASSERT_EQ(StringRef(" "), Toks[0].getText());
546 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
547 ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1]));
549 ASSERT_EQ(tok::newline, Toks[2].getKind());
551 ASSERT_EQ(tok::text, Toks[3].getKind());
552 ASSERT_EQ(StringRef(" "), Toks[3].getText());
554 ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
555 ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4]));
557 ASSERT_EQ(tok::text, Toks[5].getKind());
558 ASSERT_EQ(StringRef(" Baz"), Toks[5].getText());
560 ASSERT_EQ(tok::newline, Toks[6].getKind());
562 ASSERT_EQ(tok::text, Toks[7].getKind());
563 ASSERT_EQ(StringRef(" "), Toks[7].getText());
565 ASSERT_EQ(tok::backslash_command, Toks[8].getKind());
566 ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8]));
568 ASSERT_EQ(tok::text, Toks[9].getKind());
569 ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText());
571 ASSERT_EQ(tok::newline, Toks[10].getKind());
574 // Empty verbatim block.
575 TEST_F(CommentLexerTest, VerbatimBlock1) {
576 const char *Sources[] = {
577 "/// \\verbatim\\endverbatim\n//",
578 "/** \\verbatim\\endverbatim*/"
581 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
582 std::vector<Token> Toks;
584 lexString(Sources[i], Toks);
586 ASSERT_EQ(5U, Toks.size());
588 ASSERT_EQ(tok::text, Toks[0].getKind());
589 ASSERT_EQ(StringRef(" "), Toks[0].getText());
591 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
592 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
594 ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind());
595 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[2]));
597 ASSERT_EQ(tok::newline, Toks[3].getKind());
598 ASSERT_EQ(tok::newline, Toks[4].getKind());
602 // Empty verbatim block without an end command.
603 TEST_F(CommentLexerTest, VerbatimBlock2) {
604 const char *Source = "/// \\verbatim";
606 std::vector<Token> Toks;
608 lexString(Source, Toks);
610 ASSERT_EQ(3U, Toks.size());
612 ASSERT_EQ(tok::text, Toks[0].getKind());
613 ASSERT_EQ(StringRef(" "), Toks[0].getText());
615 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
616 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
618 ASSERT_EQ(tok::newline, Toks[2].getKind());
621 // Empty verbatim block without an end command.
622 TEST_F(CommentLexerTest, VerbatimBlock3) {
623 const char *Source = "/** \\verbatim*/";
625 std::vector<Token> Toks;
627 lexString(Source, Toks);
629 ASSERT_EQ(4U, Toks.size());
631 ASSERT_EQ(tok::text, Toks[0].getKind());
632 ASSERT_EQ(StringRef(" "), Toks[0].getText());
634 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
635 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
637 ASSERT_EQ(tok::newline, Toks[2].getKind());
638 ASSERT_EQ(tok::newline, Toks[3].getKind());
641 // Single-line verbatim block.
642 TEST_F(CommentLexerTest, VerbatimBlock4) {
643 const char *Sources[] = {
644 "/// Meow \\verbatim aaa \\endverbatim\n//",
645 "/** Meow \\verbatim aaa \\endverbatim*/"
648 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
649 std::vector<Token> Toks;
651 lexString(Sources[i], Toks);
653 ASSERT_EQ(6U, Toks.size());
655 ASSERT_EQ(tok::text, Toks[0].getKind());
656 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
658 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
659 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
661 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
662 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
664 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
665 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[3]));
667 ASSERT_EQ(tok::newline, Toks[4].getKind());
668 ASSERT_EQ(tok::newline, Toks[5].getKind());
672 // Single-line verbatim block without an end command.
673 TEST_F(CommentLexerTest, VerbatimBlock5) {
674 const char *Sources[] = {
675 "/// Meow \\verbatim aaa \n//",
676 "/** Meow \\verbatim aaa */"
679 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
680 std::vector<Token> Toks;
682 lexString(Sources[i], Toks);
684 ASSERT_EQ(5U, Toks.size());
686 ASSERT_EQ(tok::text, Toks[0].getKind());
687 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
689 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
690 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
692 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
693 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
695 ASSERT_EQ(tok::newline, Toks[3].getKind());
696 ASSERT_EQ(tok::newline, Toks[4].getKind());
700 TEST_F(CommentLexerTest, VerbatimBlock6) {
701 const char *Source =
702 "// \\verbatim\n"
703 "// Aaa\n"
704 "//\n"
705 "// Bbb\n"
706 "// \\endverbatim\n";
708 std::vector<Token> Toks;
710 lexString(Source, Toks);
712 ASSERT_EQ(10U, Toks.size());
714 ASSERT_EQ(tok::text, Toks[0].getKind());
715 ASSERT_EQ(StringRef(" "), Toks[0].getText());
717 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
718 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
720 ASSERT_EQ(tok::newline, Toks[2].getKind());
722 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
723 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText());
725 ASSERT_EQ(tok::newline, Toks[4].getKind());
727 ASSERT_EQ(tok::newline, Toks[5].getKind());
729 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
730 ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText());
732 ASSERT_EQ(tok::newline, Toks[7].getKind());
734 ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind());
735 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[8]));
737 ASSERT_EQ(tok::newline, Toks[9].getKind());
740 TEST_F(CommentLexerTest, VerbatimBlock7) {
741 const char *Source =
742 "/* \\verbatim\n"
743 " * Aaa\n"
744 " *\n"
745 " * Bbb\n"
746 " * \\endverbatim\n"
747 " */";
749 std::vector<Token> Toks;
751 lexString(Source, Toks);
753 ASSERT_EQ(10U, Toks.size());
755 ASSERT_EQ(tok::text, Toks[0].getKind());
756 ASSERT_EQ(StringRef(" "), Toks[0].getText());
758 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
759 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
761 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
762 ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText());
764 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
765 ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText());
767 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
768 ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText());
770 ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind());
771 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[5]));
773 ASSERT_EQ(tok::newline, Toks[6].getKind());
775 ASSERT_EQ(tok::text, Toks[7].getKind());
776 ASSERT_EQ(StringRef(" "), Toks[7].getText());
778 ASSERT_EQ(tok::newline, Toks[8].getKind());
779 ASSERT_EQ(tok::newline, Toks[9].getKind());
782 // Complex test for verbatim blocks.
783 TEST_F(CommentLexerTest, VerbatimBlock8) {
784 const char *Source =
785 "/* Meow \\verbatim aaa\\$\\@\n"
786 "bbb \\endverbati\r"
787 "ccc\r\n"
788 "ddd \\endverbatim Blah \\verbatim eee\n"
789 "\\endverbatim BlahBlah*/";
790 std::vector<Token> Toks;
792 lexString(Source, Toks);
794 ASSERT_EQ(14U, Toks.size());
796 ASSERT_EQ(tok::text, Toks[0].getKind());
797 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
799 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
800 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
802 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
803 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText());
805 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
806 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
808 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
809 ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText());
811 ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind());
812 ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText());
814 ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind());
815 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[6]));
817 ASSERT_EQ(tok::text, Toks[7].getKind());
818 ASSERT_EQ(StringRef(" Blah "), Toks[7].getText());
820 ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
821 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[8]));
823 ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind());
824 ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText());
826 ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind());
827 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[10]));
829 ASSERT_EQ(tok::text, Toks[11].getKind());
830 ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText());
832 ASSERT_EQ(tok::newline, Toks[12].getKind());
833 ASSERT_EQ(tok::newline, Toks[13].getKind());
836 // LaTeX verbatim blocks.
837 TEST_F(CommentLexerTest, VerbatimBlock9) {
838 const char *Source =
839 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)";
840 std::vector<Token> Toks;
842 lexString(Source, Toks);
844 ASSERT_EQ(17U, Toks.size());
846 ASSERT_EQ(tok::text, Toks[0].getKind());
847 ASSERT_EQ(StringRef(" "), Toks[0].getText());
849 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
850 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[1]));
852 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
853 ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText());
855 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
856 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[3]));
858 ASSERT_EQ(tok::text, Toks[4].getKind());
859 ASSERT_EQ(StringRef(" "), Toks[4].getText());
861 ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
862 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks[5]));
864 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
865 ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText());
867 ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind());
868 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks[7]));
870 ASSERT_EQ(tok::text, Toks[8].getKind());
871 ASSERT_EQ(StringRef(" "), Toks[8].getText());
873 ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
874 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks[9]));
876 ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind());
877 ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText());
879 ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind());
880 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks[11]));
882 ASSERT_EQ(tok::text, Toks[12].getKind());
883 ASSERT_EQ(StringRef(" "), Toks[12].getText());
885 ASSERT_EQ(tok::verbatim_block_begin, Toks[13].getKind());
886 ASSERT_EQ(StringRef("f("), getVerbatimBlockName(Toks[13]));
888 ASSERT_EQ(tok::verbatim_block_line, Toks[14].getKind());
889 ASSERT_EQ(StringRef(" Ddd "), Toks[14].getVerbatimBlockText());
891 ASSERT_EQ(tok::verbatim_block_end, Toks[15].getKind());
892 ASSERT_EQ(StringRef("f)"), getVerbatimBlockName(Toks[15]));
894 ASSERT_EQ(tok::newline, Toks[16].getKind());
897 // Empty verbatim line.
898 TEST_F(CommentLexerTest, VerbatimLine1) {
899 const char *Sources[] = {
900 "/// \\fn\n//",
901 "/** \\fn*/"
904 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
905 std::vector<Token> Toks;
907 lexString(Sources[i], Toks);
909 ASSERT_EQ(4U, Toks.size());
911 ASSERT_EQ(tok::text, Toks[0].getKind());
912 ASSERT_EQ(StringRef(" "), Toks[0].getText());
914 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
915 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
917 ASSERT_EQ(tok::newline, Toks[2].getKind());
918 ASSERT_EQ(tok::newline, Toks[3].getKind());
922 // Verbatim line with Doxygen escape sequences, which should not be expanded.
923 TEST_F(CommentLexerTest, VerbatimLine2) {
924 const char *Sources[] = {
925 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
926 "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
929 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
930 std::vector<Token> Toks;
932 lexString(Sources[i], Toks);
934 ASSERT_EQ(5U, Toks.size());
936 ASSERT_EQ(tok::text, Toks[0].getKind());
937 ASSERT_EQ(StringRef(" "), Toks[0].getText());
939 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
940 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
942 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
943 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
944 Toks[2].getVerbatimLineText());
946 ASSERT_EQ(tok::newline, Toks[3].getKind());
947 ASSERT_EQ(tok::newline, Toks[4].getKind());
951 // Verbatim line should not eat anything from next source line.
952 TEST_F(CommentLexerTest, VerbatimLine3) {
953 const char *Source =
954 "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
955 " * Meow\n"
956 " */";
958 std::vector<Token> Toks;
960 lexString(Source, Toks);
962 ASSERT_EQ(9U, Toks.size());
964 ASSERT_EQ(tok::text, Toks[0].getKind());
965 ASSERT_EQ(StringRef(" "), Toks[0].getText());
967 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
968 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
970 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
971 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
972 Toks[2].getVerbatimLineText());
973 ASSERT_EQ(tok::newline, Toks[3].getKind());
975 ASSERT_EQ(tok::text, Toks[4].getKind());
976 ASSERT_EQ(StringRef(" Meow"), Toks[4].getText());
977 ASSERT_EQ(tok::newline, Toks[5].getKind());
979 ASSERT_EQ(tok::text, Toks[6].getKind());
980 ASSERT_EQ(StringRef(" "), Toks[6].getText());
982 ASSERT_EQ(tok::newline, Toks[7].getKind());
983 ASSERT_EQ(tok::newline, Toks[8].getKind());
986 TEST_F(CommentLexerTest, HTML1) {
987 const char *Source =
988 "// <";
990 std::vector<Token> Toks;
992 lexString(Source, Toks);
994 ASSERT_EQ(3U, Toks.size());
996 ASSERT_EQ(tok::text, Toks[0].getKind());
997 ASSERT_EQ(StringRef(" "), Toks[0].getText());
999 ASSERT_EQ(tok::text, Toks[1].getKind());
1000 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1002 ASSERT_EQ(tok::newline, Toks[2].getKind());
1005 TEST_F(CommentLexerTest, HTML2) {
1006 const char *Source =
1007 "// a<2";
1009 std::vector<Token> Toks;
1011 lexString(Source, Toks);
1013 ASSERT_EQ(4U, Toks.size());
1015 ASSERT_EQ(tok::text, Toks[0].getKind());
1016 ASSERT_EQ(StringRef(" a"), Toks[0].getText());
1018 ASSERT_EQ(tok::text, Toks[1].getKind());
1019 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1021 ASSERT_EQ(tok::text, Toks[2].getKind());
1022 ASSERT_EQ(StringRef("2"), Toks[2].getText());
1024 ASSERT_EQ(tok::newline, Toks[3].getKind());
1027 TEST_F(CommentLexerTest, HTML3) {
1028 const char *Source =
1029 "// < img";
1031 std::vector<Token> Toks;
1033 lexString(Source, Toks);
1035 ASSERT_EQ(4U, Toks.size());
1037 ASSERT_EQ(tok::text, Toks[0].getKind());
1038 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1040 ASSERT_EQ(tok::text, Toks[1].getKind());
1041 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1043 ASSERT_EQ(tok::text, Toks[2].getKind());
1044 ASSERT_EQ(StringRef(" img"), Toks[2].getText());
1046 ASSERT_EQ(tok::newline, Toks[3].getKind());
1049 TEST_F(CommentLexerTest, HTML4) {
1050 const char *Sources[] = {
1051 "// <img",
1052 "// <img "
1055 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1056 std::vector<Token> Toks;
1058 lexString(Sources[i], Toks);
1060 ASSERT_EQ(3U, Toks.size());
1062 ASSERT_EQ(tok::text, Toks[0].getKind());
1063 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1065 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1066 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1068 ASSERT_EQ(tok::newline, Toks[2].getKind());
1072 TEST_F(CommentLexerTest, HTML5) {
1073 const char *Source =
1074 "// <img 42";
1076 std::vector<Token> Toks;
1078 lexString(Source, Toks);
1080 ASSERT_EQ(4U, Toks.size());
1082 ASSERT_EQ(tok::text, Toks[0].getKind());
1083 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1085 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1086 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1088 ASSERT_EQ(tok::text, Toks[2].getKind());
1089 ASSERT_EQ(StringRef("42"), Toks[2].getText());
1091 ASSERT_EQ(tok::newline, Toks[3].getKind());
1094 TEST_F(CommentLexerTest, HTML6) {
1095 const char *Source = "// <img> Meow";
1097 std::vector<Token> Toks;
1099 lexString(Source, Toks);
1101 ASSERT_EQ(5U, Toks.size());
1103 ASSERT_EQ(tok::text, Toks[0].getKind());
1104 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1106 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1107 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1109 ASSERT_EQ(tok::html_greater, Toks[2].getKind());
1111 ASSERT_EQ(tok::text, Toks[3].getKind());
1112 ASSERT_EQ(StringRef(" Meow"), Toks[3].getText());
1114 ASSERT_EQ(tok::newline, Toks[4].getKind());
1117 TEST_F(CommentLexerTest, HTML7) {
1118 const char *Source = "// <img=";
1120 std::vector<Token> Toks;
1122 lexString(Source, Toks);
1124 ASSERT_EQ(4U, Toks.size());
1126 ASSERT_EQ(tok::text, Toks[0].getKind());
1127 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1129 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1130 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1132 ASSERT_EQ(tok::text, Toks[2].getKind());
1133 ASSERT_EQ(StringRef("="), Toks[2].getText());
1135 ASSERT_EQ(tok::newline, Toks[3].getKind());
1138 TEST_F(CommentLexerTest, HTML8) {
1139 const char *Source = "// <img src=> Meow";
1141 std::vector<Token> Toks;
1143 lexString(Source, Toks);
1145 ASSERT_EQ(7U, Toks.size());
1147 ASSERT_EQ(tok::text, Toks[0].getKind());
1148 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1150 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1151 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1153 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1154 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1156 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1158 ASSERT_EQ(tok::html_greater, Toks[4].getKind());
1160 ASSERT_EQ(tok::text, Toks[5].getKind());
1161 ASSERT_EQ(StringRef(" Meow"), Toks[5].getText());
1163 ASSERT_EQ(tok::newline, Toks[6].getKind());
1166 TEST_F(CommentLexerTest, HTML9) {
1167 const char *Sources[] = {
1168 "// <img src",
1169 "// <img src "
1172 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1173 std::vector<Token> Toks;
1175 lexString(Sources[i], Toks);
1177 ASSERT_EQ(4U, Toks.size());
1179 ASSERT_EQ(tok::text, Toks[0].getKind());
1180 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1182 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1183 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1185 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1186 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1188 ASSERT_EQ(tok::newline, Toks[3].getKind());
1192 TEST_F(CommentLexerTest, HTML10) {
1193 const char *Sources[] = {
1194 "// <img src=",
1195 "// <img src ="
1198 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1199 std::vector<Token> Toks;
1201 lexString(Sources[i], Toks);
1203 ASSERT_EQ(5U, Toks.size());
1205 ASSERT_EQ(tok::text, Toks[0].getKind());
1206 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1208 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1209 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1211 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1212 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1214 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1216 ASSERT_EQ(tok::newline, Toks[4].getKind());
1220 TEST_F(CommentLexerTest, HTML11) {
1221 const char *Sources[] = {
1222 "// <img src=\"",
1223 "// <img src = \"",
1224 "// <img src=\'",
1225 "// <img src = \'"
1228 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1229 std::vector<Token> Toks;
1231 lexString(Sources[i], Toks);
1233 ASSERT_EQ(6U, Toks.size());
1235 ASSERT_EQ(tok::text, Toks[0].getKind());
1236 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1238 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1239 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1241 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1242 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1244 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1246 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1247 ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString());
1249 ASSERT_EQ(tok::newline, Toks[5].getKind());
1253 TEST_F(CommentLexerTest, HTML12) {
1254 const char *Source = "// <img src=@";
1256 std::vector<Token> Toks;
1258 lexString(Source, Toks);
1260 ASSERT_EQ(6U, Toks.size());
1262 ASSERT_EQ(tok::text, Toks[0].getKind());
1263 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1265 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1266 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1268 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1269 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1271 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1273 ASSERT_EQ(tok::text, Toks[4].getKind());
1274 ASSERT_EQ(StringRef("@"), Toks[4].getText());
1276 ASSERT_EQ(tok::newline, Toks[5].getKind());
1279 TEST_F(CommentLexerTest, HTML13) {
1280 const char *Sources[] = {
1281 "// <img src=\"val\\\"\\'val",
1282 "// <img src=\"val\\\"\\'val\"",
1283 "// <img src=\'val\\\"\\'val",
1284 "// <img src=\'val\\\"\\'val\'"
1287 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1288 std::vector<Token> Toks;
1290 lexString(Sources[i], Toks);
1292 ASSERT_EQ(6U, Toks.size());
1294 ASSERT_EQ(tok::text, Toks[0].getKind());
1295 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1297 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1298 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1300 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1301 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1303 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1305 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1306 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1308 ASSERT_EQ(tok::newline, Toks[5].getKind());
1312 TEST_F(CommentLexerTest, HTML14) {
1313 const char *Sources[] = {
1314 "// <img src=\"val\\\"\\'val\">",
1315 "// <img src=\'val\\\"\\'val\'>"
1318 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1319 std::vector<Token> Toks;
1321 lexString(Sources[i], Toks);
1323 ASSERT_EQ(7U, Toks.size());
1325 ASSERT_EQ(tok::text, Toks[0].getKind());
1326 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1328 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1329 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1331 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1332 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1334 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1336 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1337 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1339 ASSERT_EQ(tok::html_greater, Toks[5].getKind());
1341 ASSERT_EQ(tok::newline, Toks[6].getKind());
1345 TEST_F(CommentLexerTest, HTML15) {
1346 const char *Sources[] = {
1347 "// <img/>",
1348 "// <img />"
1351 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1352 std::vector<Token> Toks;
1354 lexString(Sources[i], Toks);
1356 ASSERT_EQ(4U, Toks.size());
1358 ASSERT_EQ(tok::text, Toks[0].getKind());
1359 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1361 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1362 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1364 ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1366 ASSERT_EQ(tok::newline, Toks[3].getKind());
1370 TEST_F(CommentLexerTest, HTML16) {
1371 const char *Sources[] = {
1372 "// <img/ Aaa",
1373 "// <img / Aaa"
1376 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1377 std::vector<Token> Toks;
1379 lexString(Sources[i], Toks);
1381 ASSERT_EQ(5U, Toks.size());
1383 ASSERT_EQ(tok::text, Toks[0].getKind());
1384 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1386 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1387 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1389 ASSERT_EQ(tok::text, Toks[2].getKind());
1390 ASSERT_EQ(StringRef("/"), Toks[2].getText());
1392 ASSERT_EQ(tok::text, Toks[3].getKind());
1393 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getText());
1395 ASSERT_EQ(tok::newline, Toks[4].getKind());
1399 TEST_F(CommentLexerTest, HTML17) {
1400 const char *Source = "// </";
1402 std::vector<Token> Toks;
1404 lexString(Source, Toks);
1406 ASSERT_EQ(3U, Toks.size());
1408 ASSERT_EQ(tok::text, Toks[0].getKind());
1409 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1411 ASSERT_EQ(tok::text, Toks[1].getKind());
1412 ASSERT_EQ(StringRef("</"), Toks[1].getText());
1414 ASSERT_EQ(tok::newline, Toks[2].getKind());
1417 TEST_F(CommentLexerTest, HTML18) {
1418 const char *Source = "// </@";
1420 std::vector<Token> Toks;
1422 lexString(Source, Toks);
1424 ASSERT_EQ(4U, Toks.size());
1426 ASSERT_EQ(tok::text, Toks[0].getKind());
1427 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1429 ASSERT_EQ(tok::text, Toks[1].getKind());
1430 ASSERT_EQ(StringRef("</"), Toks[1].getText());
1432 ASSERT_EQ(tok::text, Toks[2].getKind());
1433 ASSERT_EQ(StringRef("@"), Toks[2].getText());
1435 ASSERT_EQ(tok::newline, Toks[3].getKind());
1438 TEST_F(CommentLexerTest, HTML19) {
1439 const char *Source = "// </img";
1441 std::vector<Token> Toks;
1443 lexString(Source, Toks);
1445 ASSERT_EQ(3U, Toks.size());
1447 ASSERT_EQ(tok::text, Toks[0].getKind());
1448 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1450 ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1451 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName());
1453 ASSERT_EQ(tok::newline, Toks[2].getKind());
1456 TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1457 const char *Source = "// <tag>";
1459 std::vector<Token> Toks;
1461 lexString(Source, Toks);
1463 ASSERT_EQ(4U, Toks.size());
1465 ASSERT_EQ(tok::text, Toks[0].getKind());
1466 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1468 ASSERT_EQ(tok::text, Toks[1].getKind());
1469 ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1471 ASSERT_EQ(tok::text, Toks[2].getKind());
1472 ASSERT_EQ(StringRef(">"), Toks[2].getText());
1474 ASSERT_EQ(tok::newline, Toks[3].getKind());
1477 TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1478 const char *Source = "// </tag>";
1480 std::vector<Token> Toks;
1482 lexString(Source, Toks);
1484 ASSERT_EQ(4U, Toks.size());
1486 ASSERT_EQ(tok::text, Toks[0].getKind());
1487 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1489 ASSERT_EQ(tok::text, Toks[1].getKind());
1490 ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1492 ASSERT_EQ(tok::text, Toks[2].getKind());
1493 ASSERT_EQ(StringRef(">"), Toks[2].getText());
1495 ASSERT_EQ(tok::newline, Toks[3].getKind());
1498 TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1499 const char *Source = "// &";
1501 std::vector<Token> Toks;
1503 lexString(Source, Toks);
1505 ASSERT_EQ(3U, Toks.size());
1507 ASSERT_EQ(tok::text, Toks[0].getKind());
1508 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1510 ASSERT_EQ(tok::text, Toks[1].getKind());
1511 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1513 ASSERT_EQ(tok::newline, Toks[2].getKind());
1516 TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1517 const char *Source = "// &!";
1519 std::vector<Token> Toks;
1521 lexString(Source, Toks);
1523 ASSERT_EQ(4U, Toks.size());
1525 ASSERT_EQ(tok::text, Toks[0].getKind());
1526 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1528 ASSERT_EQ(tok::text, Toks[1].getKind());
1529 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1531 ASSERT_EQ(tok::text, Toks[2].getKind());
1532 ASSERT_EQ(StringRef("!"), Toks[2].getText());
1534 ASSERT_EQ(tok::newline, Toks[3].getKind());
1537 TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1538 const char *Source = "// &amp";
1540 std::vector<Token> Toks;
1542 lexString(Source, Toks);
1544 ASSERT_EQ(3U, Toks.size());
1546 ASSERT_EQ(tok::text, Toks[0].getKind());
1547 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1549 ASSERT_EQ(tok::text, Toks[1].getKind());
1550 ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1552 ASSERT_EQ(tok::newline, Toks[2].getKind());
1555 TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1556 const char *Source = "// &amp!";
1558 std::vector<Token> Toks;
1560 lexString(Source, Toks);
1562 ASSERT_EQ(4U, Toks.size());
1564 ASSERT_EQ(tok::text, Toks[0].getKind());
1565 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1567 ASSERT_EQ(tok::text, Toks[1].getKind());
1568 ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1570 ASSERT_EQ(tok::text, Toks[2].getKind());
1571 ASSERT_EQ(StringRef("!"), Toks[2].getText());
1573 ASSERT_EQ(tok::newline, Toks[3].getKind());
1576 TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1577 const char *Source = "// &#";
1579 std::vector<Token> Toks;
1581 lexString(Source, Toks);
1583 ASSERT_EQ(3U, Toks.size());
1585 ASSERT_EQ(tok::text, Toks[0].getKind());
1586 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1588 ASSERT_EQ(tok::text, Toks[1].getKind());
1589 ASSERT_EQ(StringRef("&#"), Toks[1].getText());
1591 ASSERT_EQ(tok::newline, Toks[2].getKind());
1594 TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1595 const char *Source = "// &#a";
1597 std::vector<Token> Toks;
1599 lexString(Source, Toks);
1601 ASSERT_EQ(4U, Toks.size());
1603 ASSERT_EQ(tok::text, Toks[0].getKind());
1604 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1606 ASSERT_EQ(tok::text, Toks[1].getKind());
1607 ASSERT_EQ(StringRef("&#"), Toks[1].getText());
1609 ASSERT_EQ(tok::text, Toks[2].getKind());
1610 ASSERT_EQ(StringRef("a"), Toks[2].getText());
1612 ASSERT_EQ(tok::newline, Toks[3].getKind());
1615 TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1616 const char *Source = "// &#42";
1618 std::vector<Token> Toks;
1620 lexString(Source, Toks);
1622 ASSERT_EQ(3U, Toks.size());
1624 ASSERT_EQ(tok::text, Toks[0].getKind());
1625 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1627 ASSERT_EQ(tok::text, Toks[1].getKind());
1628 ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1630 ASSERT_EQ(tok::newline, Toks[2].getKind());
1633 TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1634 const char *Source = "// &#42a";
1636 std::vector<Token> Toks;
1638 lexString(Source, Toks);
1640 ASSERT_EQ(4U, Toks.size());
1642 ASSERT_EQ(tok::text, Toks[0].getKind());
1643 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1645 ASSERT_EQ(tok::text, Toks[1].getKind());
1646 ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1648 ASSERT_EQ(tok::text, Toks[2].getKind());
1649 ASSERT_EQ(StringRef("a"), Toks[2].getText());
1651 ASSERT_EQ(tok::newline, Toks[3].getKind());
1654 TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1655 const char *Source = "// &#x";
1657 std::vector<Token> Toks;
1659 lexString(Source, Toks);
1661 ASSERT_EQ(3U, Toks.size());
1663 ASSERT_EQ(tok::text, Toks[0].getKind());
1664 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1666 ASSERT_EQ(tok::text, Toks[1].getKind());
1667 ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
1669 ASSERT_EQ(tok::newline, Toks[2].getKind());
1672 TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1673 const char *Source = "// &#xz";
1675 std::vector<Token> Toks;
1677 lexString(Source, Toks);
1679 ASSERT_EQ(4U, Toks.size());
1681 ASSERT_EQ(tok::text, Toks[0].getKind());
1682 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1684 ASSERT_EQ(tok::text, Toks[1].getKind());
1685 ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
1687 ASSERT_EQ(tok::text, Toks[2].getKind());
1688 ASSERT_EQ(StringRef("z"), Toks[2].getText());
1690 ASSERT_EQ(tok::newline, Toks[3].getKind());
1693 TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1694 const char *Source = "// &#xab";
1696 std::vector<Token> Toks;
1698 lexString(Source, Toks);
1700 ASSERT_EQ(3U, Toks.size());
1702 ASSERT_EQ(tok::text, Toks[0].getKind());
1703 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1705 ASSERT_EQ(tok::text, Toks[1].getKind());
1706 ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
1708 ASSERT_EQ(tok::newline, Toks[2].getKind());
1711 TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1712 const char *Source = "// &#xaBz";
1714 std::vector<Token> Toks;
1716 lexString(Source, Toks);
1718 ASSERT_EQ(4U, Toks.size());
1720 ASSERT_EQ(tok::text, Toks[0].getKind());
1721 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1723 ASSERT_EQ(tok::text, Toks[1].getKind());
1724 ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
1726 ASSERT_EQ(tok::text, Toks[2].getKind());
1727 ASSERT_EQ(StringRef("z"), Toks[2].getText());
1729 ASSERT_EQ(tok::newline, Toks[3].getKind());
1732 TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1733 const char *Source = "// &amp;";
1735 std::vector<Token> Toks;
1737 lexString(Source, Toks);
1739 ASSERT_EQ(3U, Toks.size());
1741 ASSERT_EQ(tok::text, Toks[0].getKind());
1742 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1744 ASSERT_EQ(tok::text, Toks[1].getKind());
1745 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1747 ASSERT_EQ(tok::newline, Toks[2].getKind());
1750 TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1751 const char *Source = "// &amp;&lt;";
1753 std::vector<Token> Toks;
1755 lexString(Source, Toks);
1757 ASSERT_EQ(4U, Toks.size());
1759 ASSERT_EQ(tok::text, Toks[0].getKind());
1760 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1762 ASSERT_EQ(tok::text, Toks[1].getKind());
1763 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1765 ASSERT_EQ(tok::text, Toks[2].getKind());
1766 ASSERT_EQ(StringRef("<"), Toks[2].getText());
1768 ASSERT_EQ(tok::newline, Toks[3].getKind());
1771 TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1772 const char *Source = "// &amp; meow";
1774 std::vector<Token> Toks;
1776 lexString(Source, Toks);
1778 ASSERT_EQ(4U, Toks.size());
1780 ASSERT_EQ(tok::text, Toks[0].getKind());
1781 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1783 ASSERT_EQ(tok::text, Toks[1].getKind());
1784 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1786 ASSERT_EQ(tok::text, Toks[2].getKind());
1787 ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1789 ASSERT_EQ(tok::newline, Toks[3].getKind());
1792 TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1793 const char *Sources[] = {
1794 "// &#61;",
1795 "// &#x3d;",
1796 "// &#X3d;",
1797 "// &#X3D;"
1800 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1801 std::vector<Token> Toks;
1803 lexString(Sources[i], Toks);
1805 ASSERT_EQ(3U, Toks.size());
1807 ASSERT_EQ(tok::text, Toks[0].getKind());
1808 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1810 ASSERT_EQ(tok::text, Toks[1].getKind());
1811 ASSERT_EQ(StringRef("="), Toks[1].getText());
1813 ASSERT_EQ(tok::newline, Toks[2].getKind());
1817 TEST_F(CommentLexerTest, MultipleComments) {
1818 const char *Source =
1819 "// Aaa\n"
1820 "/// Bbb\n"
1821 "/* Ccc\n"
1822 " * Ddd*/\n"
1823 "/** Eee*/";
1825 std::vector<Token> Toks;
1827 lexString(Source, Toks);
1829 ASSERT_EQ(12U, Toks.size());
1831 ASSERT_EQ(tok::text, Toks[0].getKind());
1832 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
1833 ASSERT_EQ(tok::newline, Toks[1].getKind());
1835 ASSERT_EQ(tok::text, Toks[2].getKind());
1836 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
1837 ASSERT_EQ(tok::newline, Toks[3].getKind());
1839 ASSERT_EQ(tok::text, Toks[4].getKind());
1840 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
1841 ASSERT_EQ(tok::newline, Toks[5].getKind());
1843 ASSERT_EQ(tok::text, Toks[6].getKind());
1844 ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText());
1845 ASSERT_EQ(tok::newline, Toks[7].getKind());
1846 ASSERT_EQ(tok::newline, Toks[8].getKind());
1848 ASSERT_EQ(tok::text, Toks[9].getKind());
1849 ASSERT_EQ(StringRef(" Eee"), Toks[9].getText());
1851 ASSERT_EQ(tok::newline, Toks[10].getKind());
1852 ASSERT_EQ(tok::newline, Toks[11].getKind());
1855 } // end namespace comments
1856 } // end namespace clang