1 /**********************************************************************
2 regsyntax.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 const OnigSyntaxType OnigSyntaxASIS
= {
34 , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
39 (OnigCodePoint
)'\\' /* esc */
40 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
41 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
42 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
43 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
44 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
48 const OnigSyntaxType OnigSyntaxPosixBasic
= {
49 ( SYN_POSIX_COMMON_OP
| ONIG_SYN_OP_ESC_LPAREN_SUBEXP
|
50 ONIG_SYN_OP_ESC_BRACE_INTERVAL
)
53 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_MULTILINE
)
56 (OnigCodePoint
)'\\' /* esc */
57 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
58 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
59 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
60 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
61 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
65 const OnigSyntaxType OnigSyntaxPosixExtended
= {
66 ( SYN_POSIX_COMMON_OP
| ONIG_SYN_OP_LPAREN_SUBEXP
|
67 ONIG_SYN_OP_BRACE_INTERVAL
|
68 ONIG_SYN_OP_PLUS_ONE_INF
| ONIG_SYN_OP_QMARK_ZERO_ONE
| ONIG_SYN_OP_VBAR_ALT
)
70 , ( ONIG_SYN_CONTEXT_INDEP_ANCHORS
|
71 ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
| ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
|
72 ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
|
73 ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
)
74 , ( ONIG_OPTION_SINGLELINE
| ONIG_OPTION_MULTILINE
)
77 (OnigCodePoint
)'\\' /* esc */
78 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
79 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
80 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
81 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
82 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
86 const OnigSyntaxType OnigSyntaxEmacs
= {
87 ( ONIG_SYN_OP_DOT_ANYCHAR
| ONIG_SYN_OP_BRACKET_CC
|
88 ONIG_SYN_OP_ESC_BRACE_INTERVAL
|
89 ONIG_SYN_OP_ESC_LPAREN_SUBEXP
| ONIG_SYN_OP_ESC_VBAR_ALT
|
90 ONIG_SYN_OP_ASTERISK_ZERO_INF
| ONIG_SYN_OP_PLUS_ONE_INF
|
91 ONIG_SYN_OP_QMARK_ZERO_ONE
| ONIG_SYN_OP_DECIMAL_BACKREF
|
92 ONIG_SYN_OP_LINE_ANCHOR
| ONIG_SYN_OP_ESC_CONTROL_CHARS
)
93 , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
94 , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
98 (OnigCodePoint
)'\\' /* esc */
99 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
100 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
101 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
102 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
103 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
107 const OnigSyntaxType OnigSyntaxGrep
= {
108 ( ONIG_SYN_OP_DOT_ANYCHAR
| ONIG_SYN_OP_BRACKET_CC
| ONIG_SYN_OP_POSIX_BRACKET
|
109 ONIG_SYN_OP_ESC_BRACE_INTERVAL
| ONIG_SYN_OP_ESC_LPAREN_SUBEXP
|
110 ONIG_SYN_OP_ESC_VBAR_ALT
|
111 ONIG_SYN_OP_ASTERISK_ZERO_INF
| ONIG_SYN_OP_ESC_PLUS_ONE_INF
|
112 ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
| ONIG_SYN_OP_LINE_ANCHOR
|
113 ONIG_SYN_OP_ESC_W_WORD
| ONIG_SYN_OP_ESC_B_WORD_BOUND
|
114 ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
| ONIG_SYN_OP_DECIMAL_BACKREF
)
116 , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
| ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
)
120 (OnigCodePoint
)'\\' /* esc */
121 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
122 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
123 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
124 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
125 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
129 const OnigSyntaxType OnigSyntaxGnuRegex
= {
136 (OnigCodePoint
)'\\' /* esc */
137 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
138 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
139 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
140 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
141 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
145 const OnigSyntaxType OnigSyntaxJava
= {
146 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
147 ONIG_SYN_OP_ESC_CONTROL_CHARS
| ONIG_SYN_OP_ESC_C_CONTROL
|
148 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
)
149 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
150 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
| ONIG_SYN_OP2_QMARK_GROUP_EFFECT
|
151 ONIG_SYN_OP2_OPTION_PERL
| ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT
|
152 ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
| ONIG_SYN_OP2_CCLASS_SET_OP
|
153 ONIG_SYN_OP2_ESC_V_VTAB
| ONIG_SYN_OP2_ESC_U_HEX4
|
154 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
)
155 , ( SYN_GNU_REGEX_BV
| ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
)
156 , ONIG_OPTION_SINGLELINE
159 (OnigCodePoint
)'\\' /* esc */
160 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
161 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
162 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
163 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
164 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
168 const OnigSyntaxType OnigSyntaxPerl
= {
169 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
170 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
171 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_CONTROL_CHARS
|
172 ONIG_SYN_OP_ESC_C_CONTROL
)
173 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
174 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
175 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
176 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
177 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
)
179 , ONIG_OPTION_SINGLELINE
182 (OnigCodePoint
)'\\' /* esc */
183 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
184 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
185 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
186 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
187 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
191 /* Perl + named group */
192 const OnigSyntaxType OnigSyntaxPerl_NG
= {
193 (( SYN_GNU_REGEX_OP
| ONIG_SYN_OP_QMARK_NON_GREEDY
|
194 ONIG_SYN_OP_ESC_OCTAL3
| ONIG_SYN_OP_ESC_X_HEX2
|
195 ONIG_SYN_OP_ESC_X_BRACE_HEX8
| ONIG_SYN_OP_ESC_CONTROL_CHARS
|
196 ONIG_SYN_OP_ESC_C_CONTROL
)
197 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
)
198 , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
|
199 ONIG_SYN_OP2_QMARK_GROUP_EFFECT
| ONIG_SYN_OP2_OPTION_PERL
|
200 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
|
201 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
|
202 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
|
203 ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
|
204 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
)
205 , ( SYN_GNU_REGEX_BV
|
206 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
|
207 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
)
208 , ONIG_OPTION_SINGLELINE
211 (OnigCodePoint
)'\\' /* esc */
212 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar '.' */
213 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anytime '*' */
214 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* zero or one time '?' */
215 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* one or more time '+' */
216 , (OnigCodePoint
)ONIG_INEFFECTIVE_META_CHAR
/* anychar anytime */
223 onig_set_default_syntax(const OnigSyntaxType
* syntax
)
226 syntax
= ONIG_SYNTAX_RUBY
;
228 OnigDefaultSyntax
= syntax
;
233 onig_copy_syntax(OnigSyntaxType
* to
, const OnigSyntaxType
* from
)
239 onig_set_syntax_op(OnigSyntaxType
* syntax
, unsigned int op
)
245 onig_set_syntax_op2(OnigSyntaxType
* syntax
, unsigned int op2
)
251 onig_set_syntax_behavior(OnigSyntaxType
* syntax
, unsigned int behavior
)
253 syntax
->behavior
= behavior
;
257 onig_set_syntax_options(OnigSyntaxType
* syntax
, OnigOptionType options
)
259 syntax
->options
= options
;
263 onig_get_syntax_op(OnigSyntaxType
* syntax
)
269 onig_get_syntax_op2(OnigSyntaxType
* syntax
)
275 onig_get_syntax_behavior(OnigSyntaxType
* syntax
)
277 return syntax
->behavior
;
280 extern OnigOptionType
281 onig_get_syntax_options(OnigSyntaxType
* syntax
)
283 return syntax
->options
;
286 #ifdef USE_VARIABLE_META_CHARS
287 extern int onig_set_meta_char(OnigSyntaxType
* enc
,
288 unsigned int what
, OnigCodePoint code
)
291 case ONIG_META_CHAR_ESCAPE
:
292 enc
->meta_char_table
.esc
= code
;
294 case ONIG_META_CHAR_ANYCHAR
:
295 enc
->meta_char_table
.anychar
= code
;
297 case ONIG_META_CHAR_ANYTIME
:
298 enc
->meta_char_table
.anytime
= code
;
300 case ONIG_META_CHAR_ZERO_OR_ONE_TIME
:
301 enc
->meta_char_table
.zero_or_one_time
= code
;
303 case ONIG_META_CHAR_ONE_OR_MORE_TIME
:
304 enc
->meta_char_table
.one_or_more_time
= code
;
306 case ONIG_META_CHAR_ANYCHAR_ANYTIME
:
307 enc
->meta_char_table
.anychar_anytime
= code
;
310 return ONIGERR_INVALID_ARGUMENT
;
315 #endif /* USE_VARIABLE_META_CHARS */