1 /**********************************************************************
2 regsyntax.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 const OnigSyntaxType OnigSyntaxASIS
= {
35 , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
40 (OnigCodePoint
)'\\' /* esc */
41 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
42 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
43 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
44 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
45 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
49 const OnigSyntaxType OnigSyntaxPosixBasic
= {
50 ( SYN_POSIX_COMMON_OP
| ONIG_SYN_OP_ESC_LPAREN_SUBEXP
|
51 ONIG_SYN_OP_ESC_BRACE_INTERVAL
)
54 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_MULTILINE
)
57 (OnigCodePoint
)'\\' /* esc */
58 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
59 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
60 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
61 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
62 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
66 const OnigSyntaxType OnigSyntaxPosixExtended
= {
67 ( SYN_POSIX_COMMON_OP
| ONIG_SYN_OP_LPAREN_SUBEXP
|
68 ONIG_SYN_OP_BRACE_INTERVAL
|
69 ONIG_SYN_OP_PLUS_ONE_INF
| ONIG_SYN_OP_QMARK_ZERO_ONE
| ONIG_SYN_OP_VBAR_ALT
)
71 , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS
|
72 ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
| ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
|
73 ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
|
74 ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
)
75 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_MULTILINE
)
78 (OnigCodePoint
)'\\' /* esc */
79 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
80 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
81 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
82 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
83 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
87 const OnigSyntaxType OnigSyntaxEmacs
= {
88 ( ONIG_SYN_OP_DOT_ANYCHAR
| ONIG_SYN_OP_BRACKET_CC
|
89 ONIG_SYN_OP_ESC_BRACE_INTERVAL
|
90 ONIG_SYN_OP_ESC_LPAREN_SUBEXP
| ONIG_SYN_OP_ESC_VBAR_ALT
|
91 ONIG_SYN_OP_ASTERISK_ZERO_INF
| ONIG_SYN_OP_PLUS_ONE_INF
|
92 ONIG_SYN_OP_QMARK_ZERO_ONE
| ONIG_SYN_OP_DECIMAL_BACKREF
|
93 ONIG_SYN_OP_LINE_ANCHOR
| ONIG_SYN_OP_ESC_CONTROL_CHARS
)
94 , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
95 , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
99 (OnigCodePoint
)'\\' /* esc */
100 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
101 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
102 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
103 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
104 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
108 const OnigSyntaxType OnigSyntaxGrep
= {
109 ( ONIG_SYN_OP_DOT_ANYCHAR
| ONIG_SYN_OP_BRACKET_CC
| ONIG_SYN_OP_POSIX_BRACKET
|
110 ONIG_SYN_OP_ESC_BRACE_INTERVAL
| ONIG_SYN_OP_ESC_LPAREN_SUBEXP
|
111 ONIG_SYN_OP_ESC_VBAR_ALT
|
112 ONIG_SYN_OP_ASTERISK_ZERO_INF
| ONIG_SYN_OP_ESC_PLUS_ONE_INF
|
113 ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
| ONIG_SYN_OP_LINE_ANCHOR
|
114 ONIG_SYN_OP_ESC_W_WORD
| ONIG_SYN_OP_ESC_B_WORD_BOUND
|
115 ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
| ONIG_SYN_OP_DECIMAL_BACKREF
)
117 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
| ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
)
121 (OnigCodePoint
)'\\' /* esc */
122 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
123 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
124 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
125 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
126 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
130 const OnigSyntaxType OnigSyntaxGnuRegex
= {
137 (OnigCodePoint
)'\\' /* esc */
138 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
139 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
140 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
141 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
142 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
146 const OnigSyntaxType OnigSyntaxJava
= {
147 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
148 ONIG_SYN_OP_ESC_CONTROL_CHARS
| ONIG_SYN_OP_ESC_C_CONTROL
|
149 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
)
150 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
151 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
| ONIG_SYN_OP2_QMARK_GROUP_EFFECT
|
152 ONIG_SYN_OP2_OPTION_PERL
| ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
|
153 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
| ONIG_SYN_OP2_CCLASS_SET_OP
|
154 ONIG_SYN_OP2_ESC_V_VTAB
| ONIG_SYN_OP2_ESC_U_HEX4
|
155 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
)
156 , ( SYN_GNU_REGEX_BV
| ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
)
157 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_ASCII_RANGE
|
158 ONIG_OPTION_WORD_BOUND_ALL_RANGE
)
161 (OnigCodePoint
)'\\' /* esc */
162 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
163 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
164 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
165 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
166 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
171 const OnigSyntaxType OnigSyntaxPerl58
= {
172 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
173 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
174 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_CONTROL_CHARS
|
175 ONIG_SYN_OP_ESC_C_CONTROL
)
176 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
177 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
178 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
179 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
180 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
181 ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
|
182 ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
)
184 , ONIG_OPTION_SINGLELINE
187 (OnigCodePoint
)'\\' /* esc */
188 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
189 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
190 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
191 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
192 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
196 /* Perl 5.8 + named group */
197 const OnigSyntaxType OnigSyntaxPerl58_NG
= {
198 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
199 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
200 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_CONTROL_CHARS
|
201 ONIG_SYN_OP_ESC_C_CONTROL
)
202 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
203 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
204 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
205 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
206 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
207 ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
|
208 ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
|
209 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
|
210 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
|
211 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
)
212 , ( SYN_GNU_REGEX_BV
|
213 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
|
214 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
)
215 , ONIG_OPTION_SINGLELINE
218 (OnigCodePoint
)'\\' /* esc */
219 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
220 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
221 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
222 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
223 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
228 const OnigSyntaxType OnigSyntaxPerl
= {
229 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
230 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
231 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_CONTROL_CHARS
|
232 ONIG_SYN_OP_ESC_O_BRACE_OCTAL
| ONIG_SYN_OP_ESC_C_CONTROL
)
233 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
234 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
235 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
236 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
237 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
238 ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
|
239 ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
|
240 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
|
241 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
|
242 ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
|
243 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
|
244 ONIG_SYN_OP2_QMARK_SUBEXP_CALL
|
245 ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
|
246 ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
|
247 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
|
248 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
)
249 , ( SYN_GNU_REGEX_BV
|
250 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
|
251 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
|
252 ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP
)
253 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_CAPTURE_GROUP
)
256 (OnigCodePoint
)'\\' /* esc */
257 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
258 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
259 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
260 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
261 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
265 const OnigSyntaxType OnigSyntaxPython
= {
266 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
267 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
268 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_CONTROL_CHARS
|
269 ONIG_SYN_OP_ESC_C_CONTROL
)
270 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
271 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
272 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
273 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
274 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
|
275 ONIG_SYN_OP2_ESC_V_VTAB
|
276 ONIG_SYN_OP2_ESC_U_HEX4
|
277 ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
|
278 ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
)
279 , ( SYN_GNU_REGEX_BV
|
280 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
)
281 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_ASCII_RANGE
)
284 (OnigCodePoint
)'\\' /* esc */
285 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
286 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
287 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
288 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
289 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
296 onig_set_default_syntax(const OnigSyntaxType
* syntax
)
299 syntax
= ONIG_SYNTAX_RUBY
;
301 OnigDefaultSyntax
= syntax
;
306 onig_copy_syntax(OnigSyntaxType
* to
, const OnigSyntaxType
* from
)
312 onig_set_syntax_op(OnigSyntaxType
* syntax
, unsigned int op
)
318 onig_set_syntax_op2(OnigSyntaxType
* syntax
, unsigned int op2
)
324 onig_set_syntax_behavior(OnigSyntaxType
* syntax
, unsigned int behavior
)
326 syntax
->behavior
= behavior
;
330 onig_set_syntax_options(OnigSyntaxType
* syntax
, OnigOptionType options
)
332 syntax
->options
= options
;
336 onig_get_syntax_op(const OnigSyntaxType
* syntax
)
342 onig_get_syntax_op2(const OnigSyntaxType
* syntax
)
348 onig_get_syntax_behavior(const OnigSyntaxType
* syntax
)
350 return syntax
->behavior
;
353 extern OnigOptionType
354 onig_get_syntax_options(const OnigSyntaxType
* syntax
)
356 return syntax
->options
;
359 #ifdef USE_VARIABLE_META_CHARS
360 extern int onig_set_meta_char(OnigSyntaxType
* enc
,
361 unsigned int what
, OnigCodePoint code
)
364 case ONIG_META_CHAR_ESCAPE
:
365 enc
->meta_char_table
.esc
= code
;
367 case ONIG_META_CHAR_ANYCHAR
:
368 enc
->meta_char_table
.anychar
= code
;
370 case ONIG_META_CHAR_ANYTIME
:
371 enc
->meta_char_table
.anytime
= code
;
373 case ONIG_META_CHAR_ZERO_OR_ONE_TIME
:
374 enc
->meta_char_table
.zero_or_one_time
= code
;
376 case ONIG_META_CHAR_ONE_OR_MORE_TIME
:
377 enc
->meta_char_table
.one_or_more_time
= code
;
379 case ONIG_META_CHAR_ANYCHAR_ANYTIME
:
380 enc
->meta_char_table
.anychar_anytime
= code
;
383 return ONIGERR_INVALID_ARGUMENT
;
388 #endif /* USE_VARIABLE_META_CHARS */