1 //===--- TokenTest.cpp ----------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "clang-pseudo/Token.h"
10 #include "clang/Basic/LangOptions.h"
11 #include "clang/Basic/TokenKinds.h"
12 #include "gmock/gmock.h"
13 #include "gtest/gtest.h"
20 using testing::ElementsAre
;
21 using testing::ElementsAreArray
;
24 MATCHER_P2(token
, Text
, Kind
, "") {
25 return arg
.Kind
== Kind
&& arg
.text() == Text
;
28 MATCHER_P(hasFlag
, Flag
, "") { return arg
.flag(Flag
); }
30 MATCHER_P2(lineIndent
, Line
, Indent
, "") {
31 return arg
.Line
== (unsigned)Line
&& arg
.Indent
== (unsigned)Indent
;
34 MATCHER_P(originalIndex
, index
, "") {
35 return arg
.OriginalIndex
== (Token::Index
)index
;
38 TEST(TokenTest
, Lex
) {
40 std::string Code
= R
"cpp(
43 return 42; // the answer
46 TokenStream Raw
= lex(Code
, Opts
);
47 ASSERT_TRUE(Raw
.isFinalized());
48 EXPECT_THAT(Raw
.tokens(),
50 // Lexing of directives is weird, especially <angled> strings.
51 token("#", tok::hash
),
52 token("include", tok::raw_identifier
),
53 token("<", tok::less
),
54 token("stdio", tok::raw_identifier
),
55 token(".", tok::period
),
56 token("h", tok::raw_identifier
),
57 token(">", tok::greater
),
59 token("int", tok::raw_identifier
),
60 token("main", tok::raw_identifier
),
61 token("(", tok::l_paren
),
62 token(")", tok::r_paren
),
63 token("{", tok::l_brace
),
64 token("return", tok::raw_identifier
),
65 token("42", tok::numeric_constant
),
66 token(";", tok::semi
),
67 token("// the answer", tok::comment
),
68 token("}", tok::r_brace
),
71 TokenStream Cooked
= cook(Raw
, Opts
);
72 ASSERT_TRUE(Cooked
.isFinalized());
73 EXPECT_THAT(Cooked
.tokens(),
75 // Cooked identifier types in directives are not meaningful.
76 token("#", tok::hash
),
77 token("include", tok::identifier
),
78 token("<", tok::less
),
79 token("stdio", tok::identifier
),
80 token(".", tok::period
),
81 token("h", tok::identifier
),
82 token(">", tok::greater
),
84 token("int", tok::kw_int
),
85 token("main", tok::identifier
),
86 token("(", tok::l_paren
),
87 token(")", tok::r_paren
),
88 token("{", tok::l_brace
),
89 token("return", tok::kw_return
),
90 token("42", tok::numeric_constant
),
91 token(";", tok::semi
),
92 token("// the answer", tok::comment
),
93 token("}", tok::r_brace
),
95 // Check raw tokens point back into original source code.
96 EXPECT_EQ(Raw
.tokens().front().text().begin(), &Code
[Code
.find('#')]);
99 TEST(TokenTest
, LineContinuation
) {
101 std::string Code
= R
"cpp(
107 TokenStream Raw
= lex(Code
, Opts
);
110 ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier
),
111 hasFlag(LexFlags::StartsPPLine
),
112 hasFlag(LexFlags::NeedsCleaning
), lineIndent(1, 0),
114 AllOf(token("two", tok::raw_identifier
),
115 hasFlag(LexFlags::StartsPPLine
),
116 Not(hasFlag(LexFlags::NeedsCleaning
)),
118 AllOf(token("\\\ntokens", tok::raw_identifier
),
119 Not(hasFlag(LexFlags::StartsPPLine
)),
120 hasFlag(LexFlags::NeedsCleaning
), originalIndex(2))));
122 TokenStream Cooked
= cook(Raw
, Opts
);
125 ElementsAre(AllOf(token("one_token", tok::identifier
), lineIndent(1, 0),
127 AllOf(token("two", tok::identifier
), originalIndex(1)),
128 AllOf(token("tokens", tok::identifier
), originalIndex(2))));
131 TEST(TokenTest
, EncodedCharacters
) {
133 Opts
.Trigraphs
= true;
134 Opts
.Digraphs
= true;
135 Opts
.C99
= true; // UCNs
136 Opts
.CXXOperatorNames
= true;
137 std::string Code
= R
"(and <: ??! '??=' \u00E9)";
138 TokenStream Raw
= lex(Code
, Opts
);
141 ElementsAre( // and is not recognized as && until cook().
142 AllOf(token("and", tok::raw_identifier
),
143 Not(hasFlag(LexFlags::NeedsCleaning
))),
144 // Digraphs are just different spellings of tokens.
145 AllOf(token("<:", tok::l_square
),
146 Not(hasFlag(LexFlags::NeedsCleaning
))),
147 // Trigraps are interpreted, still need text cleaning.
148 AllOf(token(R
"(??!)", tok::pipe
), hasFlag(LexFlags::NeedsCleaning
)),
149 // Trigraphs must be substituted inside constants too.
150 AllOf(token(R
"('??=')", tok::char_constant
),
151 hasFlag(LexFlags::NeedsCleaning
)),
152 // UCNs need substitution.
153 AllOf(token(R
"(\u00E9)", tok::raw_identifier
),
154 hasFlag(LexFlags::NeedsCleaning
))));
156 TokenStream Cooked
= cook(Raw
, Opts
);
159 ElementsAre(token("and", tok::ampamp
), // alternate spelling recognized
160 token("<:", tok::l_square
),
161 token("|", tok::pipe
), // trigraph substituted
162 token("'#'", tok::char_constant
), // trigraph substituted
163 token("é", tok::identifier
))); // UCN substituted
166 TEST(TokenTest
, Indentation
) {
168 std::string Code
= R
"cpp( hello world
172 TokenStream Raw
= lex(Code
, Opts
);
173 EXPECT_THAT(Raw
.tokens(), ElementsAreArray({
174 lineIndent(0, 3), // hello
175 lineIndent(0, 3), // world
176 lineIndent(1, 0), // no_indent
177 lineIndent(2, 2), // line_was_continued
181 TEST(TokenTest
, SplitGreaterGreater
) {
183 std::string Code
= R
"cpp(
185 // >> with an escaped newline in the middle, split
190 TokenStream Cook
= cook(lex(Code
, Opts
), Opts
);
191 TokenStream Split
= stripComments(Cook
);
192 EXPECT_THAT(Split
.tokens(),
193 ElementsAre(AllOf(token(">", tok::greater
), originalIndex(0)),
194 AllOf(token(">", tok::greater
), originalIndex(0)),
195 // Token 1 and 2 are comments.
196 AllOf(token(">", tok::greater
), originalIndex(3)),
197 AllOf(token(">", tok::greater
), originalIndex(3)),
198 AllOf(token(">>=", tok::greatergreaterequal
),
202 TEST(TokenTest
, DropComments
) {
204 std::string Code
= R
"cpp(
208 TokenStream Raw
= cook(lex(Code
, Opts
), Opts
);
209 TokenStream Stripped
= stripComments(Raw
);
212 ElementsAre(AllOf(token("// comment", tok::comment
), originalIndex(0)),
213 AllOf(token("int", tok::kw_int
), originalIndex(1)),
214 AllOf(token("/*abc*/", tok::comment
), originalIndex(2)),
215 AllOf(token(";", tok::semi
), originalIndex(3))));
217 EXPECT_THAT(Stripped
.tokens(),
218 ElementsAre(AllOf(token("int", tok::kw_int
), originalIndex(1)),
219 AllOf(token(";", tok::semi
), originalIndex(3))));
223 } // namespace pseudo