1 //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang/AST/CommentLexer.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/Basic/CommentOptions.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticOptions.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "gtest/gtest.h"
21 using namespace clang
;
27 class CommentLexerTest
: public ::testing::Test
{
30 : FileMgr(FileMgrOpts
),
31 DiagID(new DiagnosticIDs()),
32 Diags(DiagID
, new DiagnosticOptions
, new IgnoringDiagConsumer()),
33 SourceMgr(Diags
, FileMgr
),
34 Traits(Allocator
, CommentOptions()) {
37 FileSystemOptions FileMgrOpts
;
39 IntrusiveRefCntPtr
<DiagnosticIDs
> DiagID
;
40 DiagnosticsEngine Diags
;
41 SourceManager SourceMgr
;
42 llvm::BumpPtrAllocator Allocator
;
45 void lexString(const char *Source
, std::vector
<Token
> &Toks
);
47 StringRef
getCommandName(const Token
&Tok
) {
48 return Traits
.getCommandInfo(Tok
.getCommandID())->Name
;
51 StringRef
getVerbatimBlockName(const Token
&Tok
) {
52 return Traits
.getCommandInfo(Tok
.getVerbatimBlockID())->Name
;
55 StringRef
getVerbatimLineName(const Token
&Tok
) {
56 return Traits
.getCommandInfo(Tok
.getVerbatimLineID())->Name
;
60 void CommentLexerTest::lexString(const char *Source
,
61 std::vector
<Token
> &Toks
) {
62 std::unique_ptr
<MemoryBuffer
> Buf
= MemoryBuffer::getMemBuffer(Source
);
63 FileID File
= SourceMgr
.createFileID(std::move(Buf
));
64 SourceLocation Begin
= SourceMgr
.getLocForStartOfFile(File
);
66 Lexer
L(Allocator
, Diags
, Traits
, Begin
, Source
, Source
+ strlen(Source
));
77 } // unnamed namespace
79 // Empty source range should be handled.
80 TEST_F(CommentLexerTest
, Basic1
) {
81 const char *Source
= "";
82 std::vector
<Token
> Toks
;
84 lexString(Source
, Toks
);
86 ASSERT_EQ(0U, Toks
.size());
89 // Empty comments should be handled.
90 TEST_F(CommentLexerTest
, Basic2
) {
91 const char *Sources
[] = {
92 "//", "///", "//!", "///<", "//!<"
94 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
95 std::vector
<Token
> Toks
;
97 lexString(Sources
[i
], Toks
);
99 ASSERT_EQ(1U, Toks
.size());
101 ASSERT_EQ(tok::newline
, Toks
[0].getKind());
105 // Empty comments should be handled.
106 TEST_F(CommentLexerTest
, Basic3
) {
107 const char *Sources
[] = {
108 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
110 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
111 std::vector
<Token
> Toks
;
113 lexString(Sources
[i
], Toks
);
115 ASSERT_EQ(2U, Toks
.size());
117 ASSERT_EQ(tok::newline
, Toks
[0].getKind());
118 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
122 // Single comment with plain text.
123 TEST_F(CommentLexerTest
, Basic4
) {
124 const char *Sources
[] = {
125 "// Meow", "/// Meow", "//! Meow",
126 "// Meow\n", "// Meow\r\n", "//! Meow\r",
129 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
130 std::vector
<Token
> Toks
;
132 lexString(Sources
[i
], Toks
);
134 ASSERT_EQ(2U, Toks
.size());
136 ASSERT_EQ(tok::text
, Toks
[0].getKind());
137 ASSERT_EQ(StringRef(" Meow"), Toks
[0].getText());
139 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
143 // Single comment with plain text.
144 TEST_F(CommentLexerTest
, Basic5
) {
145 const char *Sources
[] = {
146 "/* Meow*/", "/** Meow*/", "/*! Meow*/"
149 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
150 std::vector
<Token
> Toks
;
152 lexString(Sources
[i
], Toks
);
154 ASSERT_EQ(3U, Toks
.size());
156 ASSERT_EQ(tok::text
, Toks
[0].getKind());
157 ASSERT_EQ(StringRef(" Meow"), Toks
[0].getText());
159 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
160 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
164 // Test newline escaping.
165 TEST_F(CommentLexerTest
, Basic6
) {
166 const char *Sources
[] = {
167 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n",
168 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
169 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r"
172 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
173 std::vector
<Token
> Toks
;
175 lexString(Sources
[i
], Toks
);
177 ASSERT_EQ(10U, Toks
.size());
179 ASSERT_EQ(tok::text
, Toks
[0].getKind());
180 ASSERT_EQ(StringRef(" Aaa"), Toks
[0].getText());
181 ASSERT_EQ(tok::text
, Toks
[1].getKind());
182 ASSERT_EQ(StringRef("\\"), Toks
[1].getText());
183 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
185 ASSERT_EQ(tok::text
, Toks
[3].getKind());
186 ASSERT_EQ(StringRef(" Bbb"), Toks
[3].getText());
187 ASSERT_EQ(tok::text
, Toks
[4].getKind());
188 ASSERT_EQ(StringRef("\\"), Toks
[4].getText());
189 ASSERT_EQ(tok::text
, Toks
[5].getKind());
190 ASSERT_EQ(StringRef(" "), Toks
[5].getText());
191 ASSERT_EQ(tok::newline
, Toks
[6].getKind());
193 ASSERT_EQ(tok::text
, Toks
[7].getKind());
194 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks
[7].getText());
195 ASSERT_EQ(tok::newline
, Toks
[8].getKind());
197 ASSERT_EQ(tok::newline
, Toks
[9].getKind());
201 // Check that we skip C-style aligned stars correctly.
202 TEST_F(CommentLexerTest
, Basic7
) {
211 std::vector
<Token
> Toks
;
213 lexString(Source
, Toks
);
215 ASSERT_EQ(15U, Toks
.size());
217 ASSERT_EQ(tok::text
, Toks
[0].getKind());
218 ASSERT_EQ(StringRef(" Aaa"), Toks
[0].getText());
219 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
221 ASSERT_EQ(tok::text
, Toks
[2].getKind());
222 ASSERT_EQ(StringRef(" Bbb"), Toks
[2].getText());
223 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
225 ASSERT_EQ(tok::text
, Toks
[4].getKind());
226 ASSERT_EQ(StringRef(" Ccc"), Toks
[4].getText());
227 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
229 ASSERT_EQ(tok::text
, Toks
[6].getKind());
230 ASSERT_EQ(StringRef(" ! Ddd"), Toks
[6].getText());
231 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
233 ASSERT_EQ(tok::text
, Toks
[8].getKind());
234 ASSERT_EQ(StringRef(" Eee"), Toks
[8].getText());
235 ASSERT_EQ(tok::newline
, Toks
[9].getKind());
237 ASSERT_EQ(tok::text
, Toks
[10].getKind());
238 ASSERT_EQ(StringRef("* Fff"), Toks
[10].getText());
239 ASSERT_EQ(tok::newline
, Toks
[11].getKind());
241 ASSERT_EQ(tok::text
, Toks
[12].getKind());
242 ASSERT_EQ(StringRef(" "), Toks
[12].getText());
244 ASSERT_EQ(tok::newline
, Toks
[13].getKind());
245 ASSERT_EQ(tok::newline
, Toks
[14].getKind());
248 // A command marker followed by comment end.
249 TEST_F(CommentLexerTest
, DoxygenCommand1
) {
250 const char *Sources
[] = { "//@", "///@", "//!@" };
251 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
252 std::vector
<Token
> Toks
;
254 lexString(Sources
[i
], Toks
);
256 ASSERT_EQ(2U, Toks
.size());
258 ASSERT_EQ(tok::text
, Toks
[0].getKind());
259 ASSERT_EQ(StringRef("@"), Toks
[0].getText());
261 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
265 // A command marker followed by comment end.
266 TEST_F(CommentLexerTest
, DoxygenCommand2
) {
267 const char *Sources
[] = { "/*@*/", "/**@*/", "/*!@*/"};
268 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
269 std::vector
<Token
> Toks
;
271 lexString(Sources
[i
], Toks
);
273 ASSERT_EQ(3U, Toks
.size());
275 ASSERT_EQ(tok::text
, Toks
[0].getKind());
276 ASSERT_EQ(StringRef("@"), Toks
[0].getText());
278 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
279 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
283 // A command marker followed by comment end.
284 TEST_F(CommentLexerTest
, DoxygenCommand3
) {
285 const char *Sources
[] = { "/*\\*/", "/**\\*/" };
286 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
287 std::vector
<Token
> Toks
;
289 lexString(Sources
[i
], Toks
);
291 ASSERT_EQ(3U, Toks
.size());
293 ASSERT_EQ(tok::text
, Toks
[0].getKind());
294 ASSERT_EQ(StringRef("\\"), Toks
[0].getText());
296 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
297 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
301 // Doxygen escape sequences.
302 TEST_F(CommentLexerTest
, DoxygenCommand4
) {
303 const char *Sources
[] = {
304 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
305 "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
307 const char *Text
[] = {
309 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
310 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ",
314 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
315 std::vector
<Token
> Toks
;
317 lexString(Sources
[i
], Toks
);
319 ASSERT_EQ(std::size(Text
), Toks
.size());
321 for (size_t j
= 0, e
= Toks
.size(); j
!= e
; j
++) {
322 if(Toks
[j
].is(tok::text
)) {
323 ASSERT_EQ(StringRef(Text
[j
]), Toks
[j
].getText())
330 // A command marker followed by a non-letter that is not a part of an escape
332 TEST_F(CommentLexerTest
, DoxygenCommand5
) {
333 const char *Source
= "/// \\^ \\0";
334 std::vector
<Token
> Toks
;
336 lexString(Source
, Toks
);
338 ASSERT_EQ(6U, Toks
.size());
340 ASSERT_EQ(tok::text
, Toks
[0].getKind());
341 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
343 ASSERT_EQ(tok::text
, Toks
[1].getKind());
344 ASSERT_EQ(StringRef("\\"), Toks
[1].getText());
346 ASSERT_EQ(tok::text
, Toks
[2].getKind());
347 ASSERT_EQ(StringRef("^ "), Toks
[2].getText());
349 ASSERT_EQ(tok::text
, Toks
[3].getKind());
350 ASSERT_EQ(StringRef("\\"), Toks
[3].getText());
352 ASSERT_EQ(tok::text
, Toks
[4].getKind());
353 ASSERT_EQ(StringRef("0"), Toks
[4].getText());
355 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
358 TEST_F(CommentLexerTest
, DoxygenCommand6
) {
359 const char *Source
= "/// \\brief Aaa.";
360 std::vector
<Token
> Toks
;
362 lexString(Source
, Toks
);
364 ASSERT_EQ(4U, Toks
.size());
366 ASSERT_EQ(tok::text
, Toks
[0].getKind());
367 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
369 ASSERT_EQ(tok::backslash_command
, Toks
[1].getKind());
370 ASSERT_EQ(StringRef("brief"), getCommandName(Toks
[1]));
372 ASSERT_EQ(tok::text
, Toks
[2].getKind());
373 ASSERT_EQ(StringRef(" Aaa."), Toks
[2].getText());
375 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
378 TEST_F(CommentLexerTest
, DoxygenCommand7
) {
379 const char *Source
= "/// \\em\\em \\em\t\\em\n";
380 std::vector
<Token
> Toks
;
382 lexString(Source
, Toks
);
384 ASSERT_EQ(8U, Toks
.size());
386 ASSERT_EQ(tok::text
, Toks
[0].getKind());
387 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
389 ASSERT_EQ(tok::backslash_command
, Toks
[1].getKind());
390 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[1]));
392 ASSERT_EQ(tok::backslash_command
, Toks
[2].getKind());
393 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[2]));
395 ASSERT_EQ(tok::text
, Toks
[3].getKind());
396 ASSERT_EQ(StringRef(" "), Toks
[3].getText());
398 ASSERT_EQ(tok::backslash_command
, Toks
[4].getKind());
399 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[4]));
401 ASSERT_EQ(tok::text
, Toks
[5].getKind());
402 ASSERT_EQ(StringRef("\t"), Toks
[5].getText());
404 ASSERT_EQ(tok::backslash_command
, Toks
[6].getKind());
405 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[6]));
407 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
410 TEST_F(CommentLexerTest
, DoxygenCommand8
) {
411 const char *Source
= "/// @em@em @em\t@em\n";
412 std::vector
<Token
> Toks
;
414 lexString(Source
, Toks
);
416 ASSERT_EQ(8U, Toks
.size());
418 ASSERT_EQ(tok::text
, Toks
[0].getKind());
419 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
421 ASSERT_EQ(tok::at_command
, Toks
[1].getKind());
422 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[1]));
424 ASSERT_EQ(tok::at_command
, Toks
[2].getKind());
425 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[2]));
427 ASSERT_EQ(tok::text
, Toks
[3].getKind());
428 ASSERT_EQ(StringRef(" "), Toks
[3].getText());
430 ASSERT_EQ(tok::at_command
, Toks
[4].getKind());
431 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[4]));
433 ASSERT_EQ(tok::text
, Toks
[5].getKind());
434 ASSERT_EQ(StringRef("\t"), Toks
[5].getText());
436 ASSERT_EQ(tok::at_command
, Toks
[6].getKind());
437 ASSERT_EQ(StringRef("em"), getCommandName(Toks
[6]));
439 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
442 TEST_F(CommentLexerTest
, DoxygenCommand9
) {
443 const char *Source
= "/// \\aaa\\bbb \\ccc\t\\ddd\n";
444 std::vector
<Token
> Toks
;
446 lexString(Source
, Toks
);
448 ASSERT_EQ(8U, Toks
.size());
450 ASSERT_EQ(tok::text
, Toks
[0].getKind());
451 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
453 ASSERT_EQ(tok::unknown_command
, Toks
[1].getKind());
454 ASSERT_EQ(StringRef("aaa"), Toks
[1].getUnknownCommandName());
456 ASSERT_EQ(tok::unknown_command
, Toks
[2].getKind());
457 ASSERT_EQ(StringRef("bbb"), Toks
[2].getUnknownCommandName());
459 ASSERT_EQ(tok::text
, Toks
[3].getKind());
460 ASSERT_EQ(StringRef(" "), Toks
[3].getText());
462 ASSERT_EQ(tok::unknown_command
, Toks
[4].getKind());
463 ASSERT_EQ(StringRef("ccc"), Toks
[4].getUnknownCommandName());
465 ASSERT_EQ(tok::text
, Toks
[5].getKind());
466 ASSERT_EQ(StringRef("\t"), Toks
[5].getText());
468 ASSERT_EQ(tok::unknown_command
, Toks
[6].getKind());
469 ASSERT_EQ(StringRef("ddd"), Toks
[6].getUnknownCommandName());
471 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
474 TEST_F(CommentLexerTest
, DoxygenCommand10
) {
475 const char *Source
= "// \\c\n";
476 std::vector
<Token
> Toks
;
478 lexString(Source
, Toks
);
480 ASSERT_EQ(3U, Toks
.size());
482 ASSERT_EQ(tok::text
, Toks
[0].getKind());
483 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
485 ASSERT_EQ(tok::backslash_command
, Toks
[1].getKind());
486 ASSERT_EQ(StringRef("c"), getCommandName(Toks
[1]));
488 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
491 TEST_F(CommentLexerTest
, RegisterCustomBlockCommand
) {
493 "/// \\NewBlockCommand Aaa.\n"
494 "/// @NewBlockCommand Aaa.\n";
496 Traits
.registerBlockCommand(StringRef("NewBlockCommand"));
498 std::vector
<Token
> Toks
;
500 lexString(Source
, Toks
);
502 ASSERT_EQ(8U, Toks
.size());
504 ASSERT_EQ(tok::text
, Toks
[0].getKind());
505 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
507 ASSERT_EQ(tok::backslash_command
, Toks
[1].getKind());
508 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks
[1]));
510 ASSERT_EQ(tok::text
, Toks
[2].getKind());
511 ASSERT_EQ(StringRef(" Aaa."), Toks
[2].getText());
513 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
515 ASSERT_EQ(tok::text
, Toks
[4].getKind());
516 ASSERT_EQ(StringRef(" "), Toks
[4].getText());
518 ASSERT_EQ(tok::at_command
, Toks
[5].getKind());
519 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks
[5]));
521 ASSERT_EQ(tok::text
, Toks
[6].getKind());
522 ASSERT_EQ(StringRef(" Aaa."), Toks
[6].getText());
524 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
527 TEST_F(CommentLexerTest
, RegisterMultipleBlockCommands
) {
531 "/// \\Blech quux=corge\n";
533 Traits
.registerBlockCommand(StringRef("Foo"));
534 Traits
.registerBlockCommand(StringRef("Bar"));
535 Traits
.registerBlockCommand(StringRef("Blech"));
537 std::vector
<Token
> Toks
;
539 lexString(Source
, Toks
);
541 ASSERT_EQ(11U, Toks
.size());
543 ASSERT_EQ(tok::text
, Toks
[0].getKind());
544 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
546 ASSERT_EQ(tok::backslash_command
, Toks
[1].getKind());
547 ASSERT_EQ(StringRef("Foo"), getCommandName(Toks
[1]));
549 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
551 ASSERT_EQ(tok::text
, Toks
[3].getKind());
552 ASSERT_EQ(StringRef(" "), Toks
[3].getText());
554 ASSERT_EQ(tok::backslash_command
, Toks
[4].getKind());
555 ASSERT_EQ(StringRef("Bar"), getCommandName(Toks
[4]));
557 ASSERT_EQ(tok::text
, Toks
[5].getKind());
558 ASSERT_EQ(StringRef(" Baz"), Toks
[5].getText());
560 ASSERT_EQ(tok::newline
, Toks
[6].getKind());
562 ASSERT_EQ(tok::text
, Toks
[7].getKind());
563 ASSERT_EQ(StringRef(" "), Toks
[7].getText());
565 ASSERT_EQ(tok::backslash_command
, Toks
[8].getKind());
566 ASSERT_EQ(StringRef("Blech"), getCommandName(Toks
[8]));
568 ASSERT_EQ(tok::text
, Toks
[9].getKind());
569 ASSERT_EQ(StringRef(" quux=corge"), Toks
[9].getText());
571 ASSERT_EQ(tok::newline
, Toks
[10].getKind());
574 // Empty verbatim block.
575 TEST_F(CommentLexerTest
, VerbatimBlock1
) {
576 const char *Sources
[] = {
577 "/// \\verbatim\\endverbatim\n//",
578 "/** \\verbatim\\endverbatim*/"
581 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
582 std::vector
<Token
> Toks
;
584 lexString(Sources
[i
], Toks
);
586 ASSERT_EQ(5U, Toks
.size());
588 ASSERT_EQ(tok::text
, Toks
[0].getKind());
589 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
591 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
592 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
594 ASSERT_EQ(tok::verbatim_block_end
, Toks
[2].getKind());
595 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks
[2]));
597 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
598 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
602 // Empty verbatim block without an end command.
603 TEST_F(CommentLexerTest
, VerbatimBlock2
) {
604 const char *Source
= "/// \\verbatim";
606 std::vector
<Token
> Toks
;
608 lexString(Source
, Toks
);
610 ASSERT_EQ(3U, Toks
.size());
612 ASSERT_EQ(tok::text
, Toks
[0].getKind());
613 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
615 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
616 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
618 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
621 // Empty verbatim block without an end command.
622 TEST_F(CommentLexerTest
, VerbatimBlock3
) {
623 const char *Source
= "/** \\verbatim*/";
625 std::vector
<Token
> Toks
;
627 lexString(Source
, Toks
);
629 ASSERT_EQ(4U, Toks
.size());
631 ASSERT_EQ(tok::text
, Toks
[0].getKind());
632 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
634 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
635 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
637 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
638 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
641 // Single-line verbatim block.
642 TEST_F(CommentLexerTest
, VerbatimBlock4
) {
643 const char *Sources
[] = {
644 "/// Meow \\verbatim aaa \\endverbatim\n//",
645 "/** Meow \\verbatim aaa \\endverbatim*/"
648 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
649 std::vector
<Token
> Toks
;
651 lexString(Sources
[i
], Toks
);
653 ASSERT_EQ(6U, Toks
.size());
655 ASSERT_EQ(tok::text
, Toks
[0].getKind());
656 ASSERT_EQ(StringRef(" Meow "), Toks
[0].getText());
658 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
659 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
661 ASSERT_EQ(tok::verbatim_block_line
, Toks
[2].getKind());
662 ASSERT_EQ(StringRef(" aaa "), Toks
[2].getVerbatimBlockText());
664 ASSERT_EQ(tok::verbatim_block_end
, Toks
[3].getKind());
665 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks
[3]));
667 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
668 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
672 // Single-line verbatim block without an end command.
673 TEST_F(CommentLexerTest
, VerbatimBlock5
) {
674 const char *Sources
[] = {
675 "/// Meow \\verbatim aaa \n//",
676 "/** Meow \\verbatim aaa */"
679 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
680 std::vector
<Token
> Toks
;
682 lexString(Sources
[i
], Toks
);
684 ASSERT_EQ(5U, Toks
.size());
686 ASSERT_EQ(tok::text
, Toks
[0].getKind());
687 ASSERT_EQ(StringRef(" Meow "), Toks
[0].getText());
689 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
690 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
692 ASSERT_EQ(tok::verbatim_block_line
, Toks
[2].getKind());
693 ASSERT_EQ(StringRef(" aaa "), Toks
[2].getVerbatimBlockText());
695 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
696 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
700 TEST_F(CommentLexerTest
, VerbatimBlock6
) {
706 "// \\endverbatim\n";
708 std::vector
<Token
> Toks
;
710 lexString(Source
, Toks
);
712 ASSERT_EQ(10U, Toks
.size());
714 ASSERT_EQ(tok::text
, Toks
[0].getKind());
715 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
717 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
718 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
720 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
722 ASSERT_EQ(tok::verbatim_block_line
, Toks
[3].getKind());
723 ASSERT_EQ(StringRef(" Aaa"), Toks
[3].getVerbatimBlockText());
725 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
727 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
729 ASSERT_EQ(tok::verbatim_block_line
, Toks
[6].getKind());
730 ASSERT_EQ(StringRef(" Bbb"), Toks
[6].getVerbatimBlockText());
732 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
734 ASSERT_EQ(tok::verbatim_block_end
, Toks
[8].getKind());
735 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks
[8]));
737 ASSERT_EQ(tok::newline
, Toks
[9].getKind());
740 TEST_F(CommentLexerTest
, VerbatimBlock7
) {
749 std::vector
<Token
> Toks
;
751 lexString(Source
, Toks
);
753 ASSERT_EQ(10U, Toks
.size());
755 ASSERT_EQ(tok::text
, Toks
[0].getKind());
756 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
758 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
759 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
761 ASSERT_EQ(tok::verbatim_block_line
, Toks
[2].getKind());
762 ASSERT_EQ(StringRef(" Aaa"), Toks
[2].getVerbatimBlockText());
764 ASSERT_EQ(tok::verbatim_block_line
, Toks
[3].getKind());
765 ASSERT_EQ(StringRef(""), Toks
[3].getVerbatimBlockText());
767 ASSERT_EQ(tok::verbatim_block_line
, Toks
[4].getKind());
768 ASSERT_EQ(StringRef(" Bbb"), Toks
[4].getVerbatimBlockText());
770 ASSERT_EQ(tok::verbatim_block_end
, Toks
[5].getKind());
771 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks
[5]));
773 ASSERT_EQ(tok::newline
, Toks
[6].getKind());
775 ASSERT_EQ(tok::text
, Toks
[7].getKind());
776 ASSERT_EQ(StringRef(" "), Toks
[7].getText());
778 ASSERT_EQ(tok::newline
, Toks
[8].getKind());
779 ASSERT_EQ(tok::newline
, Toks
[9].getKind());
782 // Complex test for verbatim blocks.
783 TEST_F(CommentLexerTest
, VerbatimBlock8
) {
785 "/* Meow \\verbatim aaa\\$\\@\n"
788 "ddd \\endverbatim Blah \\verbatim eee\n"
789 "\\endverbatim BlahBlah*/";
790 std::vector
<Token
> Toks
;
792 lexString(Source
, Toks
);
794 ASSERT_EQ(14U, Toks
.size());
796 ASSERT_EQ(tok::text
, Toks
[0].getKind());
797 ASSERT_EQ(StringRef(" Meow "), Toks
[0].getText());
799 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
800 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[1]));
802 ASSERT_EQ(tok::verbatim_block_line
, Toks
[2].getKind());
803 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks
[2].getVerbatimBlockText());
805 ASSERT_EQ(tok::verbatim_block_line
, Toks
[3].getKind());
806 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks
[3].getVerbatimBlockText());
808 ASSERT_EQ(tok::verbatim_block_line
, Toks
[4].getKind());
809 ASSERT_EQ(StringRef("ccc"), Toks
[4].getVerbatimBlockText());
811 ASSERT_EQ(tok::verbatim_block_line
, Toks
[5].getKind());
812 ASSERT_EQ(StringRef("ddd "), Toks
[5].getVerbatimBlockText());
814 ASSERT_EQ(tok::verbatim_block_end
, Toks
[6].getKind());
815 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks
[6]));
817 ASSERT_EQ(tok::text
, Toks
[7].getKind());
818 ASSERT_EQ(StringRef(" Blah "), Toks
[7].getText());
820 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[8].getKind());
821 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks
[8]));
823 ASSERT_EQ(tok::verbatim_block_line
, Toks
[9].getKind());
824 ASSERT_EQ(StringRef(" eee"), Toks
[9].getVerbatimBlockText());
826 ASSERT_EQ(tok::verbatim_block_end
, Toks
[10].getKind());
827 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks
[10]));
829 ASSERT_EQ(tok::text
, Toks
[11].getKind());
830 ASSERT_EQ(StringRef(" BlahBlah"), Toks
[11].getText());
832 ASSERT_EQ(tok::newline
, Toks
[12].getKind());
833 ASSERT_EQ(tok::newline
, Toks
[13].getKind());
836 // LaTeX verbatim blocks.
837 TEST_F(CommentLexerTest
, VerbatimBlock9
) {
839 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)";
840 std::vector
<Token
> Toks
;
842 lexString(Source
, Toks
);
844 ASSERT_EQ(17U, Toks
.size());
846 ASSERT_EQ(tok::text
, Toks
[0].getKind());
847 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
849 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[1].getKind());
850 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks
[1]));
852 ASSERT_EQ(tok::verbatim_block_line
, Toks
[2].getKind());
853 ASSERT_EQ(StringRef(" Aaa "), Toks
[2].getVerbatimBlockText());
855 ASSERT_EQ(tok::verbatim_block_end
, Toks
[3].getKind());
856 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks
[3]));
858 ASSERT_EQ(tok::text
, Toks
[4].getKind());
859 ASSERT_EQ(StringRef(" "), Toks
[4].getText());
861 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[5].getKind());
862 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks
[5]));
864 ASSERT_EQ(tok::verbatim_block_line
, Toks
[6].getKind());
865 ASSERT_EQ(StringRef(" Bbb "), Toks
[6].getVerbatimBlockText());
867 ASSERT_EQ(tok::verbatim_block_end
, Toks
[7].getKind());
868 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks
[7]));
870 ASSERT_EQ(tok::text
, Toks
[8].getKind());
871 ASSERT_EQ(StringRef(" "), Toks
[8].getText());
873 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[9].getKind());
874 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks
[9]));
876 ASSERT_EQ(tok::verbatim_block_line
, Toks
[10].getKind());
877 ASSERT_EQ(StringRef(" Ccc "), Toks
[10].getVerbatimBlockText());
879 ASSERT_EQ(tok::verbatim_block_end
, Toks
[11].getKind());
880 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks
[11]));
882 ASSERT_EQ(tok::text
, Toks
[12].getKind());
883 ASSERT_EQ(StringRef(" "), Toks
[12].getText());
885 ASSERT_EQ(tok::verbatim_block_begin
, Toks
[13].getKind());
886 ASSERT_EQ(StringRef("f("), getVerbatimBlockName(Toks
[13]));
888 ASSERT_EQ(tok::verbatim_block_line
, Toks
[14].getKind());
889 ASSERT_EQ(StringRef(" Ddd "), Toks
[14].getVerbatimBlockText());
891 ASSERT_EQ(tok::verbatim_block_end
, Toks
[15].getKind());
892 ASSERT_EQ(StringRef("f)"), getVerbatimBlockName(Toks
[15]));
894 ASSERT_EQ(tok::newline
, Toks
[16].getKind());
897 // Empty verbatim line.
898 TEST_F(CommentLexerTest
, VerbatimLine1
) {
899 const char *Sources
[] = {
904 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
905 std::vector
<Token
> Toks
;
907 lexString(Sources
[i
], Toks
);
909 ASSERT_EQ(4U, Toks
.size());
911 ASSERT_EQ(tok::text
, Toks
[0].getKind());
912 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
914 ASSERT_EQ(tok::verbatim_line_name
, Toks
[1].getKind());
915 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks
[1]));
917 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
918 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
922 // Verbatim line with Doxygen escape sequences, which should not be expanded.
923 TEST_F(CommentLexerTest
, VerbatimLine2
) {
924 const char *Sources
[] = {
925 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
926 "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
929 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
930 std::vector
<Token
> Toks
;
932 lexString(Sources
[i
], Toks
);
934 ASSERT_EQ(5U, Toks
.size());
936 ASSERT_EQ(tok::text
, Toks
[0].getKind());
937 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
939 ASSERT_EQ(tok::verbatim_line_name
, Toks
[1].getKind());
940 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks
[1]));
942 ASSERT_EQ(tok::verbatim_line_text
, Toks
[2].getKind());
943 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
944 Toks
[2].getVerbatimLineText());
946 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
947 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
951 // Verbatim line should not eat anything from next source line.
952 TEST_F(CommentLexerTest
, VerbatimLine3
) {
954 "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
958 std::vector
<Token
> Toks
;
960 lexString(Source
, Toks
);
962 ASSERT_EQ(9U, Toks
.size());
964 ASSERT_EQ(tok::text
, Toks
[0].getKind());
965 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
967 ASSERT_EQ(tok::verbatim_line_name
, Toks
[1].getKind());
968 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks
[1]));
970 ASSERT_EQ(tok::verbatim_line_text
, Toks
[2].getKind());
971 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
972 Toks
[2].getVerbatimLineText());
973 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
975 ASSERT_EQ(tok::text
, Toks
[4].getKind());
976 ASSERT_EQ(StringRef(" Meow"), Toks
[4].getText());
977 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
979 ASSERT_EQ(tok::text
, Toks
[6].getKind());
980 ASSERT_EQ(StringRef(" "), Toks
[6].getText());
982 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
983 ASSERT_EQ(tok::newline
, Toks
[8].getKind());
986 TEST_F(CommentLexerTest
, HTML1
) {
990 std::vector
<Token
> Toks
;
992 lexString(Source
, Toks
);
994 ASSERT_EQ(3U, Toks
.size());
996 ASSERT_EQ(tok::text
, Toks
[0].getKind());
997 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
999 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1000 ASSERT_EQ(StringRef("<"), Toks
[1].getText());
1002 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1005 TEST_F(CommentLexerTest
, HTML2
) {
1006 const char *Source
=
1009 std::vector
<Token
> Toks
;
1011 lexString(Source
, Toks
);
1013 ASSERT_EQ(4U, Toks
.size());
1015 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1016 ASSERT_EQ(StringRef(" a"), Toks
[0].getText());
1018 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1019 ASSERT_EQ(StringRef("<"), Toks
[1].getText());
1021 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1022 ASSERT_EQ(StringRef("2"), Toks
[2].getText());
1024 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1027 TEST_F(CommentLexerTest
, HTML3
) {
1028 const char *Source
=
1031 std::vector
<Token
> Toks
;
1033 lexString(Source
, Toks
);
1035 ASSERT_EQ(4U, Toks
.size());
1037 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1038 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1040 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1041 ASSERT_EQ(StringRef("<"), Toks
[1].getText());
1043 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1044 ASSERT_EQ(StringRef(" img"), Toks
[2].getText());
1046 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1049 TEST_F(CommentLexerTest
, HTML4
) {
1050 const char *Sources
[] = {
1055 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1056 std::vector
<Token
> Toks
;
1058 lexString(Sources
[i
], Toks
);
1060 ASSERT_EQ(3U, Toks
.size());
1062 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1063 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1065 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1066 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1068 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1072 TEST_F(CommentLexerTest
, HTML5
) {
1073 const char *Source
=
1076 std::vector
<Token
> Toks
;
1078 lexString(Source
, Toks
);
1080 ASSERT_EQ(4U, Toks
.size());
1082 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1083 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1085 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1086 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1088 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1089 ASSERT_EQ(StringRef("42"), Toks
[2].getText());
1091 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1094 TEST_F(CommentLexerTest
, HTML6
) {
1095 const char *Source
= "// <img> Meow";
1097 std::vector
<Token
> Toks
;
1099 lexString(Source
, Toks
);
1101 ASSERT_EQ(5U, Toks
.size());
1103 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1104 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1106 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1107 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1109 ASSERT_EQ(tok::html_greater
, Toks
[2].getKind());
1111 ASSERT_EQ(tok::text
, Toks
[3].getKind());
1112 ASSERT_EQ(StringRef(" Meow"), Toks
[3].getText());
1114 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
1117 TEST_F(CommentLexerTest
, HTML7
) {
1118 const char *Source
= "// <img=";
1120 std::vector
<Token
> Toks
;
1122 lexString(Source
, Toks
);
1124 ASSERT_EQ(4U, Toks
.size());
1126 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1127 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1129 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1130 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1132 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1133 ASSERT_EQ(StringRef("="), Toks
[2].getText());
1135 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1138 TEST_F(CommentLexerTest
, HTML8
) {
1139 const char *Source
= "// <img src=> Meow";
1141 std::vector
<Token
> Toks
;
1143 lexString(Source
, Toks
);
1145 ASSERT_EQ(7U, Toks
.size());
1147 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1148 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1150 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1151 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1153 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1154 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1156 ASSERT_EQ(tok::html_equals
, Toks
[3].getKind());
1158 ASSERT_EQ(tok::html_greater
, Toks
[4].getKind());
1160 ASSERT_EQ(tok::text
, Toks
[5].getKind());
1161 ASSERT_EQ(StringRef(" Meow"), Toks
[5].getText());
1163 ASSERT_EQ(tok::newline
, Toks
[6].getKind());
1166 TEST_F(CommentLexerTest
, HTML9
) {
1167 const char *Sources
[] = {
1172 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1173 std::vector
<Token
> Toks
;
1175 lexString(Sources
[i
], Toks
);
1177 ASSERT_EQ(4U, Toks
.size());
1179 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1180 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1182 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1183 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1185 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1186 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1188 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1192 TEST_F(CommentLexerTest
, HTML10
) {
1193 const char *Sources
[] = {
1198 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1199 std::vector
<Token
> Toks
;
1201 lexString(Sources
[i
], Toks
);
1203 ASSERT_EQ(5U, Toks
.size());
1205 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1206 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1208 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1209 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1211 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1212 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1214 ASSERT_EQ(tok::html_equals
, Toks
[3].getKind());
1216 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
1220 TEST_F(CommentLexerTest
, HTML11
) {
1221 const char *Sources
[] = {
1228 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1229 std::vector
<Token
> Toks
;
1231 lexString(Sources
[i
], Toks
);
1233 ASSERT_EQ(6U, Toks
.size());
1235 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1236 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1238 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1239 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1241 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1242 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1244 ASSERT_EQ(tok::html_equals
, Toks
[3].getKind());
1246 ASSERT_EQ(tok::html_quoted_string
, Toks
[4].getKind());
1247 ASSERT_EQ(StringRef(""), Toks
[4].getHTMLQuotedString());
1249 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
1253 TEST_F(CommentLexerTest
, HTML12
) {
1254 const char *Source
= "// <img src=@";
1256 std::vector
<Token
> Toks
;
1258 lexString(Source
, Toks
);
1260 ASSERT_EQ(6U, Toks
.size());
1262 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1263 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1265 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1266 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1268 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1269 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1271 ASSERT_EQ(tok::html_equals
, Toks
[3].getKind());
1273 ASSERT_EQ(tok::text
, Toks
[4].getKind());
1274 ASSERT_EQ(StringRef("@"), Toks
[4].getText());
1276 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
1279 TEST_F(CommentLexerTest
, HTML13
) {
1280 const char *Sources
[] = {
1281 "// <img src=\"val\\\"\\'val",
1282 "// <img src=\"val\\\"\\'val\"",
1283 "// <img src=\'val\\\"\\'val",
1284 "// <img src=\'val\\\"\\'val\'"
1287 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1288 std::vector
<Token
> Toks
;
1290 lexString(Sources
[i
], Toks
);
1292 ASSERT_EQ(6U, Toks
.size());
1294 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1295 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1297 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1298 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1300 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1301 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1303 ASSERT_EQ(tok::html_equals
, Toks
[3].getKind());
1305 ASSERT_EQ(tok::html_quoted_string
, Toks
[4].getKind());
1306 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks
[4].getHTMLQuotedString());
1308 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
1312 TEST_F(CommentLexerTest
, HTML14
) {
1313 const char *Sources
[] = {
1314 "// <img src=\"val\\\"\\'val\">",
1315 "// <img src=\'val\\\"\\'val\'>"
1318 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1319 std::vector
<Token
> Toks
;
1321 lexString(Sources
[i
], Toks
);
1323 ASSERT_EQ(7U, Toks
.size());
1325 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1326 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1328 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1329 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1331 ASSERT_EQ(tok::html_ident
, Toks
[2].getKind());
1332 ASSERT_EQ(StringRef("src"), Toks
[2].getHTMLIdent());
1334 ASSERT_EQ(tok::html_equals
, Toks
[3].getKind());
1336 ASSERT_EQ(tok::html_quoted_string
, Toks
[4].getKind());
1337 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks
[4].getHTMLQuotedString());
1339 ASSERT_EQ(tok::html_greater
, Toks
[5].getKind());
1341 ASSERT_EQ(tok::newline
, Toks
[6].getKind());
1345 TEST_F(CommentLexerTest
, HTML15
) {
1346 const char *Sources
[] = {
1351 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1352 std::vector
<Token
> Toks
;
1354 lexString(Sources
[i
], Toks
);
1356 ASSERT_EQ(4U, Toks
.size());
1358 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1359 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1361 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1362 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1364 ASSERT_EQ(tok::html_slash_greater
, Toks
[2].getKind());
1366 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1370 TEST_F(CommentLexerTest
, HTML16
) {
1371 const char *Sources
[] = {
1376 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1377 std::vector
<Token
> Toks
;
1379 lexString(Sources
[i
], Toks
);
1381 ASSERT_EQ(5U, Toks
.size());
1383 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1384 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1386 ASSERT_EQ(tok::html_start_tag
, Toks
[1].getKind());
1387 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagStartName());
1389 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1390 ASSERT_EQ(StringRef("/"), Toks
[2].getText());
1392 ASSERT_EQ(tok::text
, Toks
[3].getKind());
1393 ASSERT_EQ(StringRef(" Aaa"), Toks
[3].getText());
1395 ASSERT_EQ(tok::newline
, Toks
[4].getKind());
1399 TEST_F(CommentLexerTest
, HTML17
) {
1400 const char *Source
= "// </";
1402 std::vector
<Token
> Toks
;
1404 lexString(Source
, Toks
);
1406 ASSERT_EQ(3U, Toks
.size());
1408 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1409 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1411 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1412 ASSERT_EQ(StringRef("</"), Toks
[1].getText());
1414 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1417 TEST_F(CommentLexerTest
, HTML18
) {
1418 const char *Source
= "// </@";
1420 std::vector
<Token
> Toks
;
1422 lexString(Source
, Toks
);
1424 ASSERT_EQ(4U, Toks
.size());
1426 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1427 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1429 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1430 ASSERT_EQ(StringRef("</"), Toks
[1].getText());
1432 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1433 ASSERT_EQ(StringRef("@"), Toks
[2].getText());
1435 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1438 TEST_F(CommentLexerTest
, HTML19
) {
1439 const char *Source
= "// </img";
1441 std::vector
<Token
> Toks
;
1443 lexString(Source
, Toks
);
1445 ASSERT_EQ(3U, Toks
.size());
1447 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1448 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1450 ASSERT_EQ(tok::html_end_tag
, Toks
[1].getKind());
1451 ASSERT_EQ(StringRef("img"), Toks
[1].getHTMLTagEndName());
1453 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1456 TEST_F(CommentLexerTest
, NotAKnownHTMLTag1
) {
1457 const char *Source
= "// <tag>";
1459 std::vector
<Token
> Toks
;
1461 lexString(Source
, Toks
);
1463 ASSERT_EQ(4U, Toks
.size());
1465 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1466 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1468 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1469 ASSERT_EQ(StringRef("<tag"), Toks
[1].getText());
1471 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1472 ASSERT_EQ(StringRef(">"), Toks
[2].getText());
1474 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1477 TEST_F(CommentLexerTest
, NotAKnownHTMLTag2
) {
1478 const char *Source
= "// </tag>";
1480 std::vector
<Token
> Toks
;
1482 lexString(Source
, Toks
);
1484 ASSERT_EQ(4U, Toks
.size());
1486 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1487 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1489 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1490 ASSERT_EQ(StringRef("</tag"), Toks
[1].getText());
1492 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1493 ASSERT_EQ(StringRef(">"), Toks
[2].getText());
1495 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1498 TEST_F(CommentLexerTest
, HTMLCharacterReferences1
) {
1499 const char *Source
= "// &";
1501 std::vector
<Token
> Toks
;
1503 lexString(Source
, Toks
);
1505 ASSERT_EQ(3U, Toks
.size());
1507 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1508 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1510 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1511 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1513 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1516 TEST_F(CommentLexerTest
, HTMLCharacterReferences2
) {
1517 const char *Source
= "// &!";
1519 std::vector
<Token
> Toks
;
1521 lexString(Source
, Toks
);
1523 ASSERT_EQ(4U, Toks
.size());
1525 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1526 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1528 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1529 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1531 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1532 ASSERT_EQ(StringRef("!"), Toks
[2].getText());
1534 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1537 TEST_F(CommentLexerTest
, HTMLCharacterReferences3
) {
1538 const char *Source
= "// &";
1540 std::vector
<Token
> Toks
;
1542 lexString(Source
, Toks
);
1544 ASSERT_EQ(3U, Toks
.size());
1546 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1547 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1549 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1550 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1552 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1555 TEST_F(CommentLexerTest
, HTMLCharacterReferences4
) {
1556 const char *Source
= "// &!";
1558 std::vector
<Token
> Toks
;
1560 lexString(Source
, Toks
);
1562 ASSERT_EQ(4U, Toks
.size());
1564 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1565 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1567 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1568 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1570 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1571 ASSERT_EQ(StringRef("!"), Toks
[2].getText());
1573 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1576 TEST_F(CommentLexerTest
, HTMLCharacterReferences5
) {
1577 const char *Source
= "// &#";
1579 std::vector
<Token
> Toks
;
1581 lexString(Source
, Toks
);
1583 ASSERT_EQ(3U, Toks
.size());
1585 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1586 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1588 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1589 ASSERT_EQ(StringRef("&#"), Toks
[1].getText());
1591 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1594 TEST_F(CommentLexerTest
, HTMLCharacterReferences6
) {
1595 const char *Source
= "// &#a";
1597 std::vector
<Token
> Toks
;
1599 lexString(Source
, Toks
);
1601 ASSERT_EQ(4U, Toks
.size());
1603 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1604 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1606 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1607 ASSERT_EQ(StringRef("&#"), Toks
[1].getText());
1609 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1610 ASSERT_EQ(StringRef("a"), Toks
[2].getText());
1612 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1615 TEST_F(CommentLexerTest
, HTMLCharacterReferences7
) {
1616 const char *Source
= "// *";
1618 std::vector
<Token
> Toks
;
1620 lexString(Source
, Toks
);
1622 ASSERT_EQ(3U, Toks
.size());
1624 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1625 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1627 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1628 ASSERT_EQ(StringRef("*"), Toks
[1].getText());
1630 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1633 TEST_F(CommentLexerTest
, HTMLCharacterReferences8
) {
1634 const char *Source
= "// *a";
1636 std::vector
<Token
> Toks
;
1638 lexString(Source
, Toks
);
1640 ASSERT_EQ(4U, Toks
.size());
1642 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1643 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1645 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1646 ASSERT_EQ(StringRef("*"), Toks
[1].getText());
1648 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1649 ASSERT_EQ(StringRef("a"), Toks
[2].getText());
1651 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1654 TEST_F(CommentLexerTest
, HTMLCharacterReferences9
) {
1655 const char *Source
= "// &#x";
1657 std::vector
<Token
> Toks
;
1659 lexString(Source
, Toks
);
1661 ASSERT_EQ(3U, Toks
.size());
1663 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1664 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1666 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1667 ASSERT_EQ(StringRef("&#x"), Toks
[1].getText());
1669 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1672 TEST_F(CommentLexerTest
, HTMLCharacterReferences10
) {
1673 const char *Source
= "// &#xz";
1675 std::vector
<Token
> Toks
;
1677 lexString(Source
, Toks
);
1679 ASSERT_EQ(4U, Toks
.size());
1681 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1682 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1684 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1685 ASSERT_EQ(StringRef("&#x"), Toks
[1].getText());
1687 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1688 ASSERT_EQ(StringRef("z"), Toks
[2].getText());
1690 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1693 TEST_F(CommentLexerTest
, HTMLCharacterReferences11
) {
1694 const char *Source
= "// «";
1696 std::vector
<Token
> Toks
;
1698 lexString(Source
, Toks
);
1700 ASSERT_EQ(3U, Toks
.size());
1702 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1703 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1705 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1706 ASSERT_EQ(StringRef("«"), Toks
[1].getText());
1708 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1711 TEST_F(CommentLexerTest
, HTMLCharacterReferences12
) {
1712 const char *Source
= "// «z";
1714 std::vector
<Token
> Toks
;
1716 lexString(Source
, Toks
);
1718 ASSERT_EQ(4U, Toks
.size());
1720 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1721 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1723 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1724 ASSERT_EQ(StringRef("«"), Toks
[1].getText());
1726 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1727 ASSERT_EQ(StringRef("z"), Toks
[2].getText());
1729 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1732 TEST_F(CommentLexerTest
, HTMLCharacterReferences13
) {
1733 const char *Source
= "// &";
1735 std::vector
<Token
> Toks
;
1737 lexString(Source
, Toks
);
1739 ASSERT_EQ(3U, Toks
.size());
1741 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1742 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1744 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1745 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1747 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1750 TEST_F(CommentLexerTest
, HTMLCharacterReferences14
) {
1751 const char *Source
= "// &<";
1753 std::vector
<Token
> Toks
;
1755 lexString(Source
, Toks
);
1757 ASSERT_EQ(4U, Toks
.size());
1759 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1760 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1762 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1763 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1765 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1766 ASSERT_EQ(StringRef("<"), Toks
[2].getText());
1768 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1771 TEST_F(CommentLexerTest
, HTMLCharacterReferences15
) {
1772 const char *Source
= "// & meow";
1774 std::vector
<Token
> Toks
;
1776 lexString(Source
, Toks
);
1778 ASSERT_EQ(4U, Toks
.size());
1780 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1781 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1783 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1784 ASSERT_EQ(StringRef("&"), Toks
[1].getText());
1786 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1787 ASSERT_EQ(StringRef(" meow"), Toks
[2].getText());
1789 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1792 TEST_F(CommentLexerTest
, HTMLCharacterReferences16
) {
1793 const char *Sources
[] = {
1800 for (size_t i
= 0, e
= std::size(Sources
); i
!= e
; i
++) {
1801 std::vector
<Token
> Toks
;
1803 lexString(Sources
[i
], Toks
);
1805 ASSERT_EQ(3U, Toks
.size());
1807 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1808 ASSERT_EQ(StringRef(" "), Toks
[0].getText());
1810 ASSERT_EQ(tok::text
, Toks
[1].getKind());
1811 ASSERT_EQ(StringRef("="), Toks
[1].getText());
1813 ASSERT_EQ(tok::newline
, Toks
[2].getKind());
1817 TEST_F(CommentLexerTest
, MultipleComments
) {
1818 const char *Source
=
1825 std::vector
<Token
> Toks
;
1827 lexString(Source
, Toks
);
1829 ASSERT_EQ(12U, Toks
.size());
1831 ASSERT_EQ(tok::text
, Toks
[0].getKind());
1832 ASSERT_EQ(StringRef(" Aaa"), Toks
[0].getText());
1833 ASSERT_EQ(tok::newline
, Toks
[1].getKind());
1835 ASSERT_EQ(tok::text
, Toks
[2].getKind());
1836 ASSERT_EQ(StringRef(" Bbb"), Toks
[2].getText());
1837 ASSERT_EQ(tok::newline
, Toks
[3].getKind());
1839 ASSERT_EQ(tok::text
, Toks
[4].getKind());
1840 ASSERT_EQ(StringRef(" Ccc"), Toks
[4].getText());
1841 ASSERT_EQ(tok::newline
, Toks
[5].getKind());
1843 ASSERT_EQ(tok::text
, Toks
[6].getKind());
1844 ASSERT_EQ(StringRef(" Ddd"), Toks
[6].getText());
1845 ASSERT_EQ(tok::newline
, Toks
[7].getKind());
1846 ASSERT_EQ(tok::newline
, Toks
[8].getKind());
1848 ASSERT_EQ(tok::text
, Toks
[9].getKind());
1849 ASSERT_EQ(StringRef(" Eee"), Toks
[9].getText());
1851 ASSERT_EQ(tok::newline
, Toks
[10].getKind());
1852 ASSERT_EQ(tok::newline
, Toks
[11].getKind());
1855 } // end namespace comments
1856 } // end namespace clang