[NFC][RemoveDIs] Prefer iterators over inst-pointers in InstCombine
[llvm-project.git] / libcxx / src / regex.cpp
blobe53d324186900135b60f455af155705193863da7
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include <algorithm>
10 #include <iterator>
11 #include <regex>
13 _LIBCPP_BEGIN_NAMESPACE_STD
15 static
16 const char*
17 make_error_type_string(regex_constants::error_type ecode)
19 switch (ecode)
21 case regex_constants::error_collate:
22 return "The expression contained an invalid collating element name.";
23 case regex_constants::error_ctype:
24 return "The expression contained an invalid character class name.";
25 case regex_constants::error_escape:
26 return "The expression contained an invalid escaped character, or a "
27 "trailing escape.";
28 case regex_constants::error_backref:
29 return "The expression contained an invalid back reference.";
30 case regex_constants::error_brack:
31 return "The expression contained mismatched [ and ].";
32 case regex_constants::error_paren:
33 return "The expression contained mismatched ( and ).";
34 case regex_constants::error_brace:
35 return "The expression contained mismatched { and }.";
36 case regex_constants::error_badbrace:
37 return "The expression contained an invalid range in a {} expression.";
38 case regex_constants::error_range:
39 return "The expression contained an invalid character range, "
40 "such as [b-a] in most encodings.";
41 case regex_constants::error_space:
42 return "There was insufficient memory to convert the expression into "
43 "a finite state machine.";
44 case regex_constants::error_badrepeat:
45 return "One of *?+{ was not preceded by a valid regular expression.";
46 case regex_constants::error_complexity:
47 return "The complexity of an attempted match against a regular "
48 "expression exceeded a pre-set level.";
49 case regex_constants::error_stack:
50 return "There was insufficient memory to determine whether the regular "
51 "expression could match the specified character sequence.";
52 case regex_constants::__re_err_grammar:
53 return "An invalid regex grammar has been requested.";
54 case regex_constants::__re_err_empty:
55 return "An empty regex is not allowed in the POSIX grammar.";
56 case regex_constants::__re_err_parse:
57 return "The parser did not consume the entire regular expression.";
58 default:
59 break;
61 return "Unknown error type";
64 regex_error::regex_error(regex_constants::error_type ecode)
65 : runtime_error(make_error_type_string(ecode)),
66 __code_(ecode)
69 regex_error::~regex_error() throw() {}
71 namespace {
73 struct collationnames
75 const char* elem_;
76 char char_;
79 #if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
80 // EBCDIC IBM-1047
81 // Sorted via the EBCDIC collating sequence
82 const collationnames collatenames[] =
84 {"a", 0x81},
85 {"alert", 0x2f},
86 {"ampersand", 0x50},
87 {"apostrophe", 0x7d},
88 {"asterisk", 0x5c},
89 {"b", 0x82},
90 {"backslash", 0xe0},
91 {"backspace", 0x16},
92 {"c", 0x83},
93 {"carriage-return", 0xd},
94 {"circumflex", 0x5f},
95 {"circumflex-accent", 0x5f},
96 {"colon", 0x7a},
97 {"comma", 0x6b},
98 {"commercial-at", 0x7c},
99 {"d", 0x84},
100 {"dollar-sign", 0x5b},
101 {"e", 0x85},
102 {"eight", 0xf8},
103 {"equals-sign", 0x7e},
104 {"exclamation-mark", 0x5a},
105 {"f", 0x86},
106 {"five", 0xf5},
107 {"form-feed", 0xc},
108 {"four", 0xf4},
109 {"full-stop", 0x4b},
110 {"g", 0x87},
111 {"grave-accent", 0x79},
112 {"greater-than-sign", 0x6e},
113 {"h", 0x88},
114 {"hyphen", 0x60},
115 {"hyphen-minus", 0x60},
116 {"i", 0x89},
117 {"j", 0x91},
118 {"k", 0x92},
119 {"l", 0x93},
120 {"left-brace", 0xc0},
121 {"left-curly-bracket", 0xc0},
122 {"left-parenthesis", 0x4d},
123 {"left-square-bracket", 0xad},
124 {"less-than-sign", 0x4c},
125 {"low-line", 0x6d},
126 {"m", 0x94},
127 {"n", 0x95},
128 {"newline", 0x15},
129 {"nine", 0xf9},
130 {"number-sign", 0x7b},
131 {"o", 0x96},
132 {"one", 0xf1},
133 {"p", 0x97},
134 {"percent-sign", 0x6c},
135 {"period", 0x4b},
136 {"plus-sign", 0x4e},
137 {"q", 0x98},
138 {"question-mark", 0x6f},
139 {"quotation-mark", 0x7f},
140 {"r", 0x99},
141 {"reverse-solidus", 0xe0},
142 {"right-brace", 0xd0},
143 {"right-curly-bracket", 0xd0},
144 {"right-parenthesis", 0x5d},
145 {"right-square-bracket", 0xbd},
146 {"s", 0xa2},
147 {"semicolon", 0x5e},
148 {"seven", 0xf7},
149 {"six", 0xf6},
150 {"slash", 0x61},
151 {"solidus", 0x61},
152 {"space", 0x40},
153 {"t", 0xa3},
154 {"tab", 0x5},
155 {"three", 0xf3},
156 {"tilde", 0xa1},
157 {"two", 0xf2},
158 {"u", 0xa4},
159 {"underscore", 0x6d},
160 {"v", 0xa5},
161 {"vertical-line", 0x4f},
162 {"vertical-tab", 0xb},
163 {"w", 0xa6},
164 {"x", 0xa7},
165 {"y", 0xa8},
166 {"z", 0xa9},
167 {"zero", 0xf0},
168 {"A", 0xc1},
169 {"B", 0xc2},
170 {"C", 0xc3},
171 {"D", 0xc4},
172 {"E", 0xc5},
173 {"F", 0xc6},
174 {"G", 0xc7},
175 {"H", 0xc8},
176 {"I", 0xc9},
177 {"J", 0xd1},
178 {"K", 0xd2},
179 {"L", 0xd3},
180 {"M", 0xd4},
181 {"N", 0xd5},
182 {"NUL", 0},
183 {"O", 0xd6},
184 {"P", 0xd7},
185 {"Q", 0xd8},
186 {"R", 0xd9},
187 {"S", 0xe2},
188 {"T", 0xe3},
189 {"U", 0xe4},
190 {"V", 0xe5},
191 {"W", 0xe6},
192 {"X", 0xe7},
193 {"Y", 0xe8},
194 {"Z", 0xe9}
196 #else
197 // ASCII
198 const collationnames collatenames[] =
200 {"A", 0x41},
201 {"B", 0x42},
202 {"C", 0x43},
203 {"D", 0x44},
204 {"E", 0x45},
205 {"F", 0x46},
206 {"G", 0x47},
207 {"H", 0x48},
208 {"I", 0x49},
209 {"J", 0x4a},
210 {"K", 0x4b},
211 {"L", 0x4c},
212 {"M", 0x4d},
213 {"N", 0x4e},
214 {"NUL", 0x00},
215 {"O", 0x4f},
216 {"P", 0x50},
217 {"Q", 0x51},
218 {"R", 0x52},
219 {"S", 0x53},
220 {"T", 0x54},
221 {"U", 0x55},
222 {"V", 0x56},
223 {"W", 0x57},
224 {"X", 0x58},
225 {"Y", 0x59},
226 {"Z", 0x5a},
227 {"a", 0x61},
228 {"alert", 0x07},
229 {"ampersand", 0x26},
230 {"apostrophe", 0x27},
231 {"asterisk", 0x2a},
232 {"b", 0x62},
233 {"backslash", 0x5c},
234 {"backspace", 0x08},
235 {"c", 0x63},
236 {"carriage-return", 0x0d},
237 {"circumflex", 0x5e},
238 {"circumflex-accent", 0x5e},
239 {"colon", 0x3a},
240 {"comma", 0x2c},
241 {"commercial-at", 0x40},
242 {"d", 0x64},
243 {"dollar-sign", 0x24},
244 {"e", 0x65},
245 {"eight", 0x38},
246 {"equals-sign", 0x3d},
247 {"exclamation-mark", 0x21},
248 {"f", 0x66},
249 {"five", 0x35},
250 {"form-feed", 0x0c},
251 {"four", 0x34},
252 {"full-stop", 0x2e},
253 {"g", 0x67},
254 {"grave-accent", 0x60},
255 {"greater-than-sign", 0x3e},
256 {"h", 0x68},
257 {"hyphen", 0x2d},
258 {"hyphen-minus", 0x2d},
259 {"i", 0x69},
260 {"j", 0x6a},
261 {"k", 0x6b},
262 {"l", 0x6c},
263 {"left-brace", 0x7b},
264 {"left-curly-bracket", 0x7b},
265 {"left-parenthesis", 0x28},
266 {"left-square-bracket", 0x5b},
267 {"less-than-sign", 0x3c},
268 {"low-line", 0x5f},
269 {"m", 0x6d},
270 {"n", 0x6e},
271 {"newline", 0x0a},
272 {"nine", 0x39},
273 {"number-sign", 0x23},
274 {"o", 0x6f},
275 {"one", 0x31},
276 {"p", 0x70},
277 {"percent-sign", 0x25},
278 {"period", 0x2e},
279 {"plus-sign", 0x2b},
280 {"q", 0x71},
281 {"question-mark", 0x3f},
282 {"quotation-mark", 0x22},
283 {"r", 0x72},
284 {"reverse-solidus", 0x5c},
285 {"right-brace", 0x7d},
286 {"right-curly-bracket", 0x7d},
287 {"right-parenthesis", 0x29},
288 {"right-square-bracket", 0x5d},
289 {"s", 0x73},
290 {"semicolon", 0x3b},
291 {"seven", 0x37},
292 {"six", 0x36},
293 {"slash", 0x2f},
294 {"solidus", 0x2f},
295 {"space", 0x20},
296 {"t", 0x74},
297 {"tab", 0x09},
298 {"three", 0x33},
299 {"tilde", 0x7e},
300 {"two", 0x32},
301 {"u", 0x75},
302 {"underscore", 0x5f},
303 {"v", 0x76},
304 {"vertical-line", 0x7c},
305 {"vertical-tab", 0x0b},
306 {"w", 0x77},
307 {"x", 0x78},
308 {"y", 0x79},
309 {"z", 0x7a},
310 {"zero", 0x30}
312 #endif
314 struct classnames
316 const char* elem_;
317 regex_traits<char>::char_class_type mask_;
320 const classnames ClassNames[] =
322 {"alnum", ctype_base::alnum},
323 {"alpha", ctype_base::alpha},
324 {"blank", ctype_base::blank},
325 {"cntrl", ctype_base::cntrl},
326 {"d", ctype_base::digit},
327 {"digit", ctype_base::digit},
328 {"graph", ctype_base::graph},
329 {"lower", ctype_base::lower},
330 {"print", ctype_base::print},
331 {"punct", ctype_base::punct},
332 {"s", ctype_base::space},
333 {"space", ctype_base::space},
334 {"upper", ctype_base::upper},
335 {"w", regex_traits<char>::__regex_word},
336 {"xdigit", ctype_base::xdigit}
339 struct use_strcmp
341 bool operator()(const collationnames& x, const char* y)
342 {return strcmp(x.elem_, y) < 0;}
343 bool operator()(const classnames& x, const char* y)
344 {return strcmp(x.elem_, y) < 0;}
349 string
350 __get_collation_name(const char* s)
352 const collationnames* i =
353 _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp());
354 string r;
355 if (i != end(collatenames) && strcmp(s, i->elem_) == 0)
356 r = char(i->char_);
357 return r;
360 regex_traits<char>::char_class_type
361 __get_classname(const char* s, bool __icase)
363 const classnames* i =
364 _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp());
365 regex_traits<char>::char_class_type r = 0;
366 if (i != end(ClassNames) && strcmp(s, i->elem_) == 0)
368 r = i->mask_;
369 if (r == regex_traits<char>::__regex_word)
370 r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower;
371 else if (__icase)
373 if (r & (ctype_base::lower | ctype_base::upper))
374 r |= ctype_base::alpha;
377 return r;
380 template <>
381 void
382 __match_any_but_newline<char>::__exec(__state& __s) const
384 if (__s.__current_ != __s.__last_)
386 switch (*__s.__current_)
388 case '\r':
389 case '\n':
390 __s.__do_ = __state::__reject;
391 __s.__node_ = nullptr;
392 break;
393 default:
394 __s.__do_ = __state::__accept_and_consume;
395 ++__s.__current_;
396 __s.__node_ = this->first();
397 break;
400 else
402 __s.__do_ = __state::__reject;
403 __s.__node_ = nullptr;
407 template <>
408 void
409 __match_any_but_newline<wchar_t>::__exec(__state& __s) const
411 if (__s.__current_ != __s.__last_)
413 switch (*__s.__current_)
415 case '\r':
416 case '\n':
417 case 0x2028:
418 case 0x2029:
419 __s.__do_ = __state::__reject;
420 __s.__node_ = nullptr;
421 break;
422 default:
423 __s.__do_ = __state::__accept_and_consume;
424 ++__s.__current_;
425 __s.__node_ = this->first();
426 break;
429 else
431 __s.__do_ = __state::__reject;
432 __s.__node_ = nullptr;
436 _LIBCPP_END_NAMESPACE_STD