3 /**********************************************************************
4 oniguruma.h - Oniguruma (regular expression library)
5 **********************************************************************/
7 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 } /* satisfy cc-mode */
40 #define ONIGURUMA_VERSION_MAJOR 5
41 #define ONIGURUMA_VERSION_MINOR 9
42 #define ONIGURUMA_VERSION_TEENY 1
45 # ifndef HAVE_PROTOTYPES
46 # define HAVE_PROTOTYPES 1
48 # ifndef HAVE_STDARG_PROTOTYPES
49 # define HAVE_STDARG_PROTOTYPES 1
53 /* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
54 #if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
55 # ifndef HAVE_STDARG_PROTOTYPES
56 # define HAVE_STDARG_PROTOTYPES 1
61 # ifndef HAVE_STDARG_PROTOTYPES
62 # define HAVE_STDARG_PROTOTYPES 1
67 #if defined(__STDC__) || defined(_WIN32)
68 # define P_(args) args
75 #ifdef HAVE_STDARG_PROTOTYPES
76 # define PV_(args) args
84 #define ONIG_EXTERN RUBY_EXTERN
86 #if defined(_WIN32) && !defined(__GNUC__)
87 #if defined(EXPORT) || defined(RUBY_EXPORT)
88 #define ONIG_EXTERN extern __declspec(dllexport)
90 #define ONIG_EXTERN extern __declspec(dllimport)
97 #define ONIG_EXTERN extern
100 /* PART: character encoding */
102 #ifndef ONIG_ESCAPE_UCHAR_COLLISION
103 #define UChar OnigUChar
106 typedef unsigned char OnigUChar
;
107 typedef unsigned long OnigCodePoint
;
108 typedef unsigned int OnigCtype
;
109 typedef unsigned int OnigDistance
;
111 #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
113 typedef unsigned int OnigCaseFoldType
; /* case fold flag */
115 ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag
;
117 /* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
118 /* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
119 #define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
120 #define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
122 #define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
123 #define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
126 #define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
127 #define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
128 /* 13 => Unicode:0x1ffc */
131 #define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
132 #define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
133 #define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
136 int byte_len
; /* argument(original) character(s) byte length */
137 int code_len
; /* number of code */
138 OnigCodePoint code
[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN
];
139 } OnigCaseFoldCodeItem
;
143 OnigCodePoint anychar
;
144 OnigCodePoint anytime
;
145 OnigCodePoint zero_or_one_time
;
146 OnigCodePoint one_or_more_time
;
147 OnigCodePoint anychar_anytime
;
148 } OnigMetaCharTableType
;
150 typedef int (*OnigApplyAllCaseFoldFunc
)(OnigCodePoint from
, OnigCodePoint
* to
, int to_len
, void* arg
);
152 typedef struct OnigEncodingTypeST
{
153 int (*precise_mbc_enc_len
)(const OnigUChar
* p
,const OnigUChar
* e
, struct OnigEncodingTypeST
* enc
);
157 int (*is_mbc_newline
)(const OnigUChar
* p
, const OnigUChar
* end
, struct OnigEncodingTypeST
* enc
);
158 OnigCodePoint (*mbc_to_code
)(const OnigUChar
* p
, const OnigUChar
* end
, struct OnigEncodingTypeST
* enc
);
159 int (*code_to_mbclen
)(OnigCodePoint code
, struct OnigEncodingTypeST
* enc
);
160 int (*code_to_mbc
)(OnigCodePoint code
, OnigUChar
*buf
, struct OnigEncodingTypeST
* enc
);
161 int (*mbc_case_fold
)(OnigCaseFoldType flag
, const OnigUChar
** pp
, const OnigUChar
* end
, OnigUChar
* to
, struct OnigEncodingTypeST
* enc
);
162 int (*apply_all_case_fold
)(OnigCaseFoldType flag
, OnigApplyAllCaseFoldFunc f
, void* arg
, struct OnigEncodingTypeST
* enc
);
163 int (*get_case_fold_codes_by_str
)(OnigCaseFoldType flag
, const OnigUChar
* p
, const OnigUChar
* end
, OnigCaseFoldCodeItem acs
[], struct OnigEncodingTypeST
* enc
);
164 int (*property_name_to_ctype
)(struct OnigEncodingTypeST
* enc
, OnigUChar
* p
, OnigUChar
* end
);
165 int (*is_code_ctype
)(OnigCodePoint code
, OnigCtype ctype
, struct OnigEncodingTypeST
* enc
);
166 int (*get_ctype_code_range
)(OnigCtype ctype
, OnigCodePoint
* sb_out
, const OnigCodePoint
* ranges
[], struct OnigEncodingTypeST
* enc
);
167 OnigUChar
* (*left_adjust_char_head
)(const OnigUChar
* start
, const OnigUChar
* p
, struct OnigEncodingTypeST
* enc
);
168 int (*is_allowed_reverse_match
)(const OnigUChar
* p
, const OnigUChar
* end
, struct OnigEncodingTypeST
* enc
);
169 int ruby_encoding_index
;
172 typedef OnigEncodingType
* OnigEncoding
;
174 ONIG_EXTERN OnigEncodingType OnigEncodingASCII
;
176 #define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
178 #define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
182 #define ONIGENC_CODE_TO_MBC_MAXLEN 7
183 #define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
184 /* 18: 6(max-byte) * 3(case-fold chars) */
186 /* character types */
187 #define ONIGENC_CTYPE_NEWLINE 0
188 #define ONIGENC_CTYPE_ALPHA 1
189 #define ONIGENC_CTYPE_BLANK 2
190 #define ONIGENC_CTYPE_CNTRL 3
191 #define ONIGENC_CTYPE_DIGIT 4
192 #define ONIGENC_CTYPE_GRAPH 5
193 #define ONIGENC_CTYPE_LOWER 6
194 #define ONIGENC_CTYPE_PRINT 7
195 #define ONIGENC_CTYPE_PUNCT 8
196 #define ONIGENC_CTYPE_SPACE 9
197 #define ONIGENC_CTYPE_UPPER 10
198 #define ONIGENC_CTYPE_XDIGIT 11
199 #define ONIGENC_CTYPE_WORD 12
200 #define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
201 #define ONIGENC_CTYPE_ASCII 14
202 #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
205 #define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
207 #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
208 #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
209 #define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
210 #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
211 #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
212 #define ONIGENC_IS_MBC_WORD(enc,s,end) \
213 ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
216 #define ONIGENC_NAME(enc) ((enc)->name)
218 #define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
219 (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
220 #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
221 (enc)->is_allowed_reverse_match(s,end,enc)
222 #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
223 (enc)->left_adjust_char_head(start, s, enc)
224 #define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
225 (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
226 #define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
227 (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
228 #define ONIGENC_STEP_BACK(enc,start,s,n) \
229 onigenc_step_back((enc),(start),(s),(n))
231 #define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
232 #define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
233 #define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
235 #define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
236 #define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
238 #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
239 #define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
240 #define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
242 #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
245 int onigenc_mbclen_approximate
P_((const OnigUChar
* p
,const OnigUChar
* e
, struct OnigEncodingTypeST
* enc
));
247 #define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
248 #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
249 #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
250 #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
251 #define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
252 #define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
253 #define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
254 #define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
255 #define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
256 (enc)->property_name_to_ctype(enc,p,end)
258 #define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
260 #define ONIGENC_IS_CODE_NEWLINE(enc,code) \
261 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
262 #define ONIGENC_IS_CODE_GRAPH(enc,code) \
263 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
264 #define ONIGENC_IS_CODE_PRINT(enc,code) \
265 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
266 #define ONIGENC_IS_CODE_ALNUM(enc,code) \
267 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
268 #define ONIGENC_IS_CODE_ALPHA(enc,code) \
269 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
270 #define ONIGENC_IS_CODE_LOWER(enc,code) \
271 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
272 #define ONIGENC_IS_CODE_UPPER(enc,code) \
273 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
274 #define ONIGENC_IS_CODE_CNTRL(enc,code) \
275 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
276 #define ONIGENC_IS_CODE_PUNCT(enc,code) \
277 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
278 #define ONIGENC_IS_CODE_SPACE(enc,code) \
279 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
280 #define ONIGENC_IS_CODE_BLANK(enc,code) \
281 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
282 #define ONIGENC_IS_CODE_DIGIT(enc,code) \
283 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
284 #define ONIGENC_IS_CODE_XDIGIT(enc,code) \
285 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
286 #define ONIGENC_IS_CODE_WORD(enc,code) \
287 ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
289 #define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
290 (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
293 OnigUChar
* onigenc_step_back
P_((OnigEncoding enc
, const OnigUChar
* start
, const OnigUChar
* s
, int n
));
298 int onigenc_init
P_((void));
300 int onigenc_set_default_encoding
P_((OnigEncoding enc
));
302 OnigEncoding onigenc_get_default_encoding
P_((void));
304 void onigenc_set_default_caseconv_table
P_((const OnigUChar
* table
));
306 OnigUChar
* onigenc_get_right_adjust_char_head_with_prev
P_((OnigEncoding enc
, const OnigUChar
* start
, const OnigUChar
* s
, const OnigUChar
** prev
));
308 OnigUChar
* onigenc_get_prev_char_head
P_((OnigEncoding enc
, const OnigUChar
* start
, const OnigUChar
* s
));
310 OnigUChar
* onigenc_get_left_adjust_char_head
P_((OnigEncoding enc
, const OnigUChar
* start
, const OnigUChar
* s
));
312 OnigUChar
* onigenc_get_right_adjust_char_head
P_((OnigEncoding enc
, const OnigUChar
* start
, const OnigUChar
* s
));
314 int onigenc_strlen
P_((OnigEncoding enc
, const OnigUChar
* p
, const OnigUChar
* end
));
316 int onigenc_strlen_null
P_((OnigEncoding enc
, const OnigUChar
* p
));
318 int onigenc_str_bytelen_null
P_((OnigEncoding enc
, const OnigUChar
* p
));
322 /* PART: regular expression */
324 /* config parameters */
325 #define ONIG_NREGION 10
326 #define ONIG_MAX_BACKREF_NUM 1000
327 #define ONIG_MAX_REPEAT_NUM 100000
328 #define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
330 #define ONIG_MAX_ERROR_MESSAGE_LEN 90
332 typedef unsigned int OnigOptionType
;
334 #define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
337 #define ONIG_OPTION_NONE 0U
338 #define ONIG_OPTION_IGNORECASE 1U
339 #define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
340 #define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
341 #define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
342 #define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
343 #define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
344 #define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
345 #define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
346 #define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
347 /* options (search time) */
348 #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
349 #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
350 #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
351 #define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
353 #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
354 #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
355 #define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
361 unsigned int behavior
;
362 OnigOptionType options
; /* default option */
363 OnigMetaCharTableType meta_char_table
;
366 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxASIS
;
367 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxPosixBasic
;
368 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxPosixExtended
;
369 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxEmacs
;
370 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxGrep
;
371 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxGnuRegex
;
372 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxJava
;
373 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxPerl
;
374 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxPerl_NG
;
375 ONIG_EXTERN
const OnigSyntaxType OnigSyntaxRuby
;
377 /* predefined syntaxes (see regsyntax.c) */
378 #define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
379 #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
380 #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
381 #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
382 #define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
383 #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
384 #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
385 #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
386 #define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
387 #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
390 ONIG_EXTERN
const OnigSyntaxType
* OnigDefaultSyntax
;
391 #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
393 /* syntax (operators) */
394 #define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
395 #define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
396 #define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
397 #define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
398 #define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
399 #define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
400 #define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
401 #define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
402 #define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
403 #define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
404 #define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
405 #define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
406 #define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
407 #define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
408 #define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
409 #define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
410 #define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
411 #define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
412 #define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
413 #define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
414 #define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
415 #define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
416 #define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
417 #define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
418 #define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
419 #define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
420 #define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
421 #define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
422 #define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
423 #define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
424 #define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
426 #define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
427 #define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
428 #define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
429 #define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
430 #define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
431 #define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
432 #define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
433 #define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
434 #define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
435 #define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
436 #define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
437 #define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
438 #define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
439 #define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
440 #define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
441 #define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
442 #define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
443 #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
444 /* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
445 #define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
446 #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
448 /* syntax (behavior) */
449 #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
450 #define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
451 #define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
452 #define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
453 #define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
454 #define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
455 #define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
456 #define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
457 #define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
458 #define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
459 #define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
461 /* syntax (behavior) in char class [...] */
462 #define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
463 #define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
464 #define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
465 #define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
466 /* syntax (behavior) warning */
467 #define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
468 #define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
470 /* meta character specifiers (onig_set_meta_char()) */
471 #define ONIG_META_CHAR_ESCAPE 0
472 #define ONIG_META_CHAR_ANYCHAR 1
473 #define ONIG_META_CHAR_ANYTIME 2
474 #define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
475 #define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
476 #define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
478 #define ONIG_INEFFECTIVE_META_CHAR 0
481 #define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
483 #define ONIG_NORMAL 0
484 #define ONIG_MISMATCH -1
485 #define ONIG_NO_SUPPORT_CONFIG -2
488 #define ONIGERR_MEMORY -5
489 #define ONIGERR_TYPE_BUG -6
490 #define ONIGERR_PARSER_BUG -11
491 #define ONIGERR_STACK_BUG -12
492 #define ONIGERR_UNDEFINED_BYTECODE -13
493 #define ONIGERR_UNEXPECTED_BYTECODE -14
494 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15
495 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
496 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
498 #define ONIGERR_INVALID_ARGUMENT -30
500 #define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
501 #define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
502 #define ONIGERR_EMPTY_CHAR_CLASS -102
503 #define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
504 #define ONIGERR_END_PATTERN_AT_ESCAPE -104
505 #define ONIGERR_END_PATTERN_AT_META -105
506 #define ONIGERR_END_PATTERN_AT_CONTROL -106
507 #define ONIGERR_META_CODE_SYNTAX -108
508 #define ONIGERR_CONTROL_CODE_SYNTAX -109
509 #define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
510 #define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
511 #define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
512 #define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
513 #define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
514 #define ONIGERR_NESTED_REPEAT_OPERATOR -115
515 #define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
516 #define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
517 #define ONIGERR_END_PATTERN_IN_GROUP -118
518 #define ONIGERR_UNDEFINED_GROUP_OPTION -119
519 #define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
520 #define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
521 #define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
522 /* values error (syntax error) */
523 #define ONIGERR_TOO_BIG_NUMBER -200
524 #define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
525 #define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
526 #define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
527 #define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
528 #define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
529 #define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
530 #define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
531 #define ONIGERR_INVALID_BACKREF -208
532 #define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
533 #define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
534 #define ONIGERR_EMPTY_GROUP_NAME -214
535 #define ONIGERR_INVALID_GROUP_NAME -215
536 #define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
537 #define ONIGERR_UNDEFINED_NAME_REFERENCE -217
538 #define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
539 #define ONIGERR_MULTIPLEX_DEFINED_NAME -219
540 #define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
541 #define ONIGERR_NEVER_ENDING_RECURSION -221
542 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
543 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
544 #define ONIGERR_INVALID_CODE_POINT_VALUE -400
545 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
546 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
547 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
548 #define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
550 /* errors related to thread */
551 #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
554 /* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
555 #define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
556 #define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
557 ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
559 typedef struct OnigCaptureTreeNodeStruct
{
560 int group
; /* group number */
565 struct OnigCaptureTreeNodeStruct
** childs
;
566 } OnigCaptureTreeNode
;
568 /* match result region type */
569 struct re_registers
{
575 OnigCaptureTreeNode
* history_root
; /* capture history tree root */
578 /* capture tree traverse */
579 #define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
580 #define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
581 #define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
582 ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
585 #define ONIG_REGION_NOTPOS -1
587 typedef struct re_registers OnigRegion
;
600 typedef void (*OnigWarnFunc
) P_((const char* s
));
601 extern void onig_null_warn
P_((const char* s
));
602 #define ONIG_NULL_WARN onig_null_warn
604 #define ONIG_CHAR_TABLE_SIZE 256
607 #define ONIG_STATE_NORMAL 0
608 #define ONIG_STATE_SEARCHING 1
609 #define ONIG_STATE_COMPILING -1
610 #define ONIG_STATE_MODIFY -2
612 #define ONIG_STATE(reg) \
613 ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
615 typedef struct re_pattern_buffer
{
616 /* common members of BBuf(bytes-buffer) */
617 unsigned char* p
; /* compiled pattern */
618 unsigned int used
; /* used space for p */
619 unsigned int alloc
; /* allocated space for p */
621 int state
; /* normal, searching, compiling */
622 int num_mem
; /* used memory(...) num counted from 1 */
623 int num_repeat
; /* OP_REPEAT/OP_REPEAT_NG id-counter */
624 int num_null_check
; /* OP_NULL_CHECK_START/END id counter */
625 int num_comb_exp_check
; /* combination explosion check */
626 int num_call
; /* number of subexp call */
627 unsigned int capture_history
; /* (?@...) flag (1-31) */
628 unsigned int bt_mem_start
; /* need backtrack flag */
629 unsigned int bt_mem_end
; /* need backtrack flag */
631 int repeat_range_alloc
;
632 OnigRepeatRange
* repeat_range
;
635 OnigOptionType options
;
636 const OnigSyntaxType
* syntax
;
637 OnigCaseFoldType case_fold_flag
;
640 /* optimization info (string search, char-map and anchors) */
641 int optimize
; /* optimize flag */
642 int threshold_len
; /* search str-length for apply optimize */
643 int anchor
; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
644 OnigDistance anchor_dmin
; /* (SEMI_)END_BUF anchor distance */
645 OnigDistance anchor_dmax
; /* (SEMI_)END_BUF anchor distance */
646 int sub_anchor
; /* start-anchor for exact or map */
647 unsigned char *exact
;
648 unsigned char *exact_end
;
649 unsigned char map
[ONIG_CHAR_TABLE_SIZE
]; /* used as BM skip or char-map */
650 int *int_map
; /* BM skip for exact_len > 255 */
651 int *int_map_backward
; /* BM skip for backward search */
652 OnigDistance dmin
; /* min-distance of exact or map */
653 OnigDistance dmax
; /* max-distance of exact or map */
655 /* regex_t link chain */
656 struct re_pattern_buffer
* chain
; /* escape compile-conflict */
659 typedef OnigRegexType
* OnigRegex
;
661 #ifndef ONIG_ESCAPE_REGEX_T_COLLISION
662 typedef OnigRegexType regex_t
;
668 OnigEncoding pattern_enc
;
669 OnigEncoding target_enc
;
670 OnigSyntaxType
* syntax
;
671 OnigOptionType option
;
672 OnigCaseFoldType case_fold_flag
;
675 /* Oniguruma Native API */
677 int onig_init
P_((void));
679 int onig_error_code_to_str
PV_((OnigUChar
* s
, int err_code
, ...));
681 void onig_set_warn_func
P_((OnigWarnFunc f
));
683 void onig_set_verb_warn_func
P_((OnigWarnFunc f
));
685 int onig_new
P_((OnigRegex
*, const OnigUChar
* pattern
, const OnigUChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
, const OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
));
687 int onig_new_deluxe
P_((OnigRegex
* reg
, const OnigUChar
* pattern
, const OnigUChar
* pattern_end
, OnigCompileInfo
* ci
, OnigErrorInfo
* einfo
));
689 void onig_free
P_((OnigRegex
));
691 int onig_recompile
P_((OnigRegex
, const OnigUChar
* pattern
, const OnigUChar
* pattern_end
, OnigOptionType option
, OnigEncoding enc
, OnigSyntaxType
* syntax
, OnigErrorInfo
* einfo
));
693 int onig_recompile_deluxe
P_((OnigRegex reg
, const OnigUChar
* pattern
, const OnigUChar
* pattern_end
, OnigCompileInfo
* ci
, OnigErrorInfo
* einfo
));
695 int onig_search
P_((OnigRegex
, const OnigUChar
* str
, const OnigUChar
* end
, const OnigUChar
* start
, const OnigUChar
* range
, OnigRegion
* region
, OnigOptionType option
));
697 int onig_match
P_((OnigRegex
, const OnigUChar
* str
, const OnigUChar
* end
, const OnigUChar
* at
, OnigRegion
* region
, OnigOptionType option
));
699 OnigRegion
* onig_region_new
P_((void));
701 void onig_region_init
P_((OnigRegion
* region
));
703 void onig_region_free
P_((OnigRegion
* region
, int free_self
));
705 void onig_region_copy
P_((OnigRegion
* to
, OnigRegion
* from
));
707 void onig_region_clear
P_((OnigRegion
* region
));
709 int onig_region_resize
P_((OnigRegion
* region
, int n
));
711 int onig_region_set
P_((OnigRegion
* region
, int at
, int beg
, int end
));
713 int onig_name_to_group_numbers
P_((OnigRegex reg
, const OnigUChar
* name
, const OnigUChar
* name_end
, int** nums
));
715 int onig_name_to_backref_number
P_((OnigRegex reg
, const OnigUChar
* name
, const OnigUChar
* name_end
, OnigRegion
*region
));
717 int onig_foreach_name
P_((OnigRegex reg
, int (*func
)(const OnigUChar
*, const OnigUChar
*,int,int*,OnigRegex
,void*), void* arg
));
719 int onig_number_of_names
P_((OnigRegex reg
));
721 int onig_number_of_captures
P_((OnigRegex reg
));
723 int onig_number_of_capture_histories
P_((OnigRegex reg
));
725 OnigCaptureTreeNode
* onig_get_capture_tree
P_((OnigRegion
* region
));
727 int onig_capture_tree_traverse
P_((OnigRegion
* region
, int at
, int(*callback_func
)(int,int,int,int,int,void*), void* arg
));
729 int onig_noname_group_capture_is_active
P_((OnigRegex reg
));
731 OnigEncoding onig_get_encoding
P_((OnigRegex reg
));
733 OnigOptionType onig_get_options
P_((OnigRegex reg
));
735 OnigCaseFoldType onig_get_case_fold_flag
P_((OnigRegex reg
));
737 const OnigSyntaxType
* onig_get_syntax
P_((OnigRegex reg
));
739 int onig_set_default_syntax
P_((const OnigSyntaxType
* syntax
));
741 void onig_copy_syntax
P_((OnigSyntaxType
* to
, const OnigSyntaxType
* from
));
743 unsigned int onig_get_syntax_op
P_((OnigSyntaxType
* syntax
));
745 unsigned int onig_get_syntax_op2
P_((OnigSyntaxType
* syntax
));
747 unsigned int onig_get_syntax_behavior
P_((OnigSyntaxType
* syntax
));
749 OnigOptionType onig_get_syntax_options
P_((OnigSyntaxType
* syntax
));
751 void onig_set_syntax_op
P_((OnigSyntaxType
* syntax
, unsigned int op
));
753 void onig_set_syntax_op2
P_((OnigSyntaxType
* syntax
, unsigned int op2
));
755 void onig_set_syntax_behavior
P_((OnigSyntaxType
* syntax
, unsigned int behavior
));
757 void onig_set_syntax_options
P_((OnigSyntaxType
* syntax
, OnigOptionType options
));
759 int onig_set_meta_char
P_((OnigSyntaxType
* syntax
, unsigned int what
, OnigCodePoint code
));
761 void onig_copy_encoding
P_((OnigEncoding to
, OnigEncoding from
));
763 OnigCaseFoldType onig_get_default_case_fold_flag
P_((void));
765 int onig_set_default_case_fold_flag
P_((OnigCaseFoldType case_fold_flag
));
767 unsigned int onig_get_match_stack_limit_size
P_((void));
769 int onig_set_match_stack_limit_size
P_((unsigned int size
));
771 int onig_end
P_((void));
773 const char* onig_version
P_((void));
775 const char* onig_copyright
P_((void));
779 { /* satisfy cc-mode */
784 #endif /* ONIGURUMA_H */