1 //===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Support/Regex.h"
10 #include "llvm/ADT/SmallVector.h"
11 #include "gtest/gtest.h"
17 class RegexTest
: public ::testing::Test
{
20 TEST_F(RegexTest
, Basics
) {
22 EXPECT_TRUE(r1
.match("916"));
23 EXPECT_TRUE(r1
.match("9"));
24 EXPECT_FALSE(r1
.match("9a"));
26 SmallVector
<StringRef
, 1> Matches
;
28 EXPECT_TRUE(r2
.match("aa216b", &Matches
));
29 EXPECT_EQ(1u, Matches
.size());
30 EXPECT_EQ("216", Matches
[0].str());
32 Regex
r3("[0-9]+([a-f])?:([0-9]+)");
33 EXPECT_TRUE(r3
.match("9a:513b", &Matches
));
34 EXPECT_EQ(3u, Matches
.size());
35 EXPECT_EQ("9a:513", Matches
[0].str());
36 EXPECT_EQ("a", Matches
[1].str());
37 EXPECT_EQ("513", Matches
[2].str());
39 EXPECT_TRUE(r3
.match("9:513b", &Matches
));
40 EXPECT_EQ(3u, Matches
.size());
41 EXPECT_EQ("9:513", Matches
[0].str());
42 EXPECT_EQ("", Matches
[1].str());
43 EXPECT_EQ("513", Matches
[2].str());
46 std::string String
="axxb";
48 EXPECT_FALSE(r4
.match("abb"));
49 EXPECT_TRUE(r4
.match(String
, &Matches
));
50 EXPECT_EQ(1u, Matches
.size());
51 EXPECT_EQ(String
, Matches
[0].str());
53 std::string NulPattern
="X[0-9]+X([a-f])?:([0-9]+)";
57 EXPECT_FALSE(r5
.match(String
));
58 EXPECT_FALSE(r5
.match("X9"));
60 EXPECT_TRUE(r5
.match(String
));
63 TEST_F(RegexTest
, Backreferences
) {
64 Regex
r1("([a-z]+)_\\1");
65 SmallVector
<StringRef
, 4> Matches
;
66 EXPECT_TRUE(r1
.match("abc_abc", &Matches
));
67 EXPECT_EQ(2u, Matches
.size());
68 EXPECT_FALSE(r1
.match("abc_ab", &Matches
));
70 Regex
r2("a([0-9])b\\1c\\1");
71 EXPECT_TRUE(r2
.match("a4b4c4", &Matches
));
72 EXPECT_EQ(2u, Matches
.size());
73 EXPECT_EQ("4", Matches
[1].str());
74 EXPECT_FALSE(r2
.match("a2b2c3"));
76 Regex
r3("a([0-9])([a-z])b\\1\\2");
77 EXPECT_TRUE(r3
.match("a6zb6z", &Matches
));
78 EXPECT_EQ(3u, Matches
.size());
79 EXPECT_EQ("6", Matches
[1].str());
80 EXPECT_EQ("z", Matches
[2].str());
81 EXPECT_FALSE(r3
.match("a6zb6y"));
82 EXPECT_FALSE(r3
.match("a6zb7z"));
84 Regex
r4("(abc|xyz|uvw)_\\1");
85 EXPECT_TRUE(r4
.match("abc_abc", &Matches
));
86 EXPECT_EQ(2u, Matches
.size());
87 EXPECT_FALSE(r4
.match("abc_ab", &Matches
));
88 EXPECT_FALSE(r4
.match("abc_xyz", &Matches
));
90 Regex
r5("(xyz|abc|uvw)_\\1");
91 EXPECT_TRUE(r5
.match("abc_abc", &Matches
));
92 EXPECT_EQ(2u, Matches
.size());
93 EXPECT_FALSE(r5
.match("abc_ab", &Matches
));
94 EXPECT_FALSE(r5
.match("abc_xyz", &Matches
));
96 Regex
r6("(xyz|uvw|abc)_\\1");
97 EXPECT_TRUE(r6
.match("abc_abc", &Matches
));
98 EXPECT_EQ(2u, Matches
.size());
99 EXPECT_FALSE(r6
.match("abc_ab", &Matches
));
100 EXPECT_FALSE(r6
.match("abc_xyz", &Matches
));
103 TEST_F(RegexTest
, Substitution
) {
106 EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
109 EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error
));
110 EXPECT_EQ("", Error
);
111 EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error
));
112 EXPECT_EQ("", Error
);
113 EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error
));
114 EXPECT_EQ("", Error
);
115 EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error
));
116 EXPECT_EQ("", Error
);
118 EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error
));
119 EXPECT_EQ(Error
, "replacement string contained trailing backslash");
122 EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error
));
123 EXPECT_EQ("", Error
);
125 EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error
));
126 EXPECT_EQ("", Error
);
128 EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error
));
129 EXPECT_EQ(Error
, "invalid backreference string '100'");
131 EXPECT_EQ("012345", Regex("a([0-9]+).*").sub("0\\g<1>5", "a1234ber", &Error
));
132 EXPECT_EQ("", Error
);
134 EXPECT_EQ("0a1234ber5",
135 Regex("a([0-9]+).*").sub("0\\g<0>5", "a1234ber", &Error
));
136 EXPECT_EQ("", Error
);
138 EXPECT_EQ("0A5", Regex("a(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*")
139 .sub("0\\g<10>5", "a123456789Aber", &Error
));
140 EXPECT_EQ("", Error
);
143 Regex("a([0-9]+).*").sub("0\\g<-1>5", "a1234ber", &Error
));
144 EXPECT_EQ("", Error
);
146 EXPECT_EQ("0g<15", Regex("a([0-9]+).*").sub("0\\g<15", "a1234ber", &Error
));
147 EXPECT_EQ("", Error
);
149 EXPECT_EQ("0g<>15", Regex("a([0-9]+).*").sub("0\\g<>15", "a1234ber", &Error
));
150 EXPECT_EQ("", Error
);
153 Regex("a([0-9]+).*").sub("0\\g<3e>1", "a1234ber", &Error
));
154 EXPECT_EQ("", Error
);
156 EXPECT_EQ("aber", Regex("a([0-9]+)b").sub("a\\g<100>b", "a1234ber", &Error
));
157 EXPECT_EQ(Error
, "invalid backreference string 'g<100>'");
160 TEST_F(RegexTest
, IsLiteralERE
) {
161 EXPECT_TRUE(Regex::isLiteralERE("abc"));
162 EXPECT_FALSE(Regex::isLiteralERE("a(bc)"));
163 EXPECT_FALSE(Regex::isLiteralERE("^abc"));
164 EXPECT_FALSE(Regex::isLiteralERE("abc$"));
165 EXPECT_FALSE(Regex::isLiteralERE("a|bc"));
166 EXPECT_FALSE(Regex::isLiteralERE("abc*"));
167 EXPECT_FALSE(Regex::isLiteralERE("abc+"));
168 EXPECT_FALSE(Regex::isLiteralERE("abc?"));
169 EXPECT_FALSE(Regex::isLiteralERE("abc."));
170 EXPECT_FALSE(Regex::isLiteralERE("a[bc]"));
171 EXPECT_FALSE(Regex::isLiteralERE("abc\\1"));
172 EXPECT_FALSE(Regex::isLiteralERE("abc{1,2}"));
175 TEST_F(RegexTest
, Escape
) {
176 EXPECT_EQ("a\\[bc\\]", Regex::escape("a[bc]"));
177 EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}"));
180 TEST_F(RegexTest
, IsValid
) {
182 EXPECT_FALSE(Regex("(foo").isValid(Error
));
183 EXPECT_EQ("parentheses not balanced", Error
);
184 EXPECT_FALSE(Regex("a[b-").isValid(Error
));
185 EXPECT_EQ("invalid character range", Error
);
188 TEST_F(RegexTest
, MoveConstruct
) {
189 Regex
r1("^[0-9]+$");
190 Regex
r2(std::move(r1
));
191 EXPECT_TRUE(r2
.match("916"));
194 TEST_F(RegexTest
, MoveAssign
) {
195 Regex
r1("^[0-9]+$");
198 EXPECT_TRUE(r2
.match("916"));
200 EXPECT_FALSE(r1
.isValid(Error
));
203 TEST_F(RegexTest
, NoArgConstructor
) {
206 EXPECT_FALSE(r1
.isValid(Error
));
207 EXPECT_EQ("invalid regular expression", Error
);
209 EXPECT_TRUE(r1
.isValid(Error
));
212 TEST_F(RegexTest
, MatchInvalid
) {
215 EXPECT_FALSE(r1
.isValid(Error
));
216 EXPECT_FALSE(r1
.match("X"));
219 // https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3727
220 TEST_F(RegexTest
, OssFuzz3727Regression
) {
221 // Wrap in a StringRef so the NUL byte doesn't terminate the string
222 Regex
r(StringRef("[[[=GS\x00[=][", 10));
224 EXPECT_FALSE(r
.isValid(Error
));