1 /**********************************************************************
2 regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 OnigEncoding OnigEncDefaultCharEncoding
= ONIG_ENCODING_INIT_DEFAULT
;
41 onigenc_get_default_encoding(void)
43 return OnigEncDefaultCharEncoding
;
47 onigenc_set_default_encoding(OnigEncoding enc
)
49 OnigEncDefaultCharEncoding
= enc
;
54 onigenc_mbclen_approximate(const OnigUChar
* p
,const OnigUChar
* e
, struct OnigEncodingTypeST
* enc
)
56 int ret
= ONIGENC_PRECISE_MBC_ENC_LEN(enc
,p
,e
);
57 if (ONIGENC_MBCLEN_CHARFOUND_P(ret
))
58 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret
);
59 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret
))
60 return e
-p
+ONIGENC_MBCLEN_NEEDMORE_LEN(ret
);
65 onigenc_get_right_adjust_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
67 UChar
* p
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
69 p
+= enclen(enc
, p
, s
);
75 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc
,
76 const UChar
* start
, const UChar
* s
, const UChar
** prev
)
78 UChar
* p
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
81 if (prev
) *prev
= (const UChar
* )p
;
82 p
+= enclen(enc
, p
, s
);
85 if (prev
) *prev
= (const UChar
* )NULL
; /* Sorry */
91 onigenc_get_prev_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
96 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
- 1);
100 onigenc_step_back(OnigEncoding enc
, const UChar
* start
, const UChar
* s
, int n
)
102 while (ONIG_IS_NOT_NULL(s
) && n
-- > 0) {
104 return (UChar
* )NULL
;
106 s
= ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
- 1);
112 onigenc_step(OnigEncoding enc
, const UChar
* p
, const UChar
* end
, int n
)
114 UChar
* q
= (UChar
* )p
;
116 q
+= ONIGENC_MBC_ENC_LEN(enc
, q
, end
);
118 return (q
<= end
? q
: NULL
);
122 onigenc_strlen(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
125 UChar
* q
= (UChar
* )p
;
128 q
+= ONIGENC_MBC_ENC_LEN(enc
, q
, end
);
135 onigenc_strlen_null(OnigEncoding enc
, const UChar
* s
)
138 UChar
* p
= (UChar
* )s
;
139 UChar
* e
= p
+ strlen((const char *)s
);
144 int len
= ONIGENC_MBC_MINLEN(enc
);
146 if (len
== 1) return n
;
149 if (*q
!= '\0') break;
153 if (len
== 1) return n
;
155 p
+= ONIGENC_MBC_ENC_LEN(enc
, p
, e
);
161 onigenc_str_bytelen_null(OnigEncoding enc
, const UChar
* s
)
163 UChar
* start
= (UChar
* )s
;
164 UChar
* p
= (UChar
* )s
;
165 UChar
* e
= p
+ strlen((const char *)s
);
170 int len
= ONIGENC_MBC_MINLEN(enc
);
172 if (len
== 1) return (int )(p
- start
);
175 if (*q
!= '\0') break;
179 if (len
== 1) return (int )(p
- start
);
181 p
+= ONIGENC_MBC_ENC_LEN(enc
, p
, e
);
185 const UChar OnigEncAsciiToLowerCaseTable
[] = {
186 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
187 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
188 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
189 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
190 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
191 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
192 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
193 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
194 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
195 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
196 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
197 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
198 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
199 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
200 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
201 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
202 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
203 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
204 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
205 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
206 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
207 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
208 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
209 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
210 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
211 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
212 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
213 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
214 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
215 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
216 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
217 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
220 #ifdef USE_UPPER_CASE_TABLE
221 const UChar OnigEncAsciiToUpperCaseTable
[256] = {
222 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
223 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
224 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
225 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
226 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
227 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
228 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
229 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
230 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
231 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
232 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
233 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
234 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
235 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
236 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
237 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
238 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
239 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
240 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
241 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
242 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
243 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
244 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
245 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
246 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
247 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
248 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
249 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
250 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
251 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
252 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
253 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
257 const unsigned short OnigEncAsciiCtypeTable
[256] = {
258 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
259 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
260 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
261 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
262 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
263 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
264 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
265 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
266 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
267 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
268 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
269 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
270 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
271 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
272 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
273 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
274 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
275 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
292 const UChar OnigEncISO_8859_1_ToLowerCaseTable
[256] = {
293 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
294 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
295 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
296 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
297 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
298 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
299 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
300 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
301 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
302 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
303 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
304 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
305 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
306 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
307 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
308 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
309 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
310 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
311 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
312 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
313 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
314 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
315 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
316 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
317 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
318 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
319 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
320 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
321 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
322 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
323 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
324 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
327 #ifdef USE_UPPER_CASE_TABLE
328 const UChar OnigEncISO_8859_1_ToUpperCaseTable
[256] = {
329 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
330 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
331 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
332 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
333 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
334 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
335 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
336 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
337 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
338 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
339 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
340 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
341 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
342 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
343 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
344 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
345 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
346 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
347 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
348 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
349 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
350 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
351 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
352 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
353 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
354 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
355 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
356 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
357 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
358 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
359 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
360 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
365 onigenc_set_default_caseconv_table(const UChar
* table ARG_UNUSED
)
372 onigenc_get_left_adjust_char_head(OnigEncoding enc
, const UChar
* start
, const UChar
* s
)
374 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc
, start
, s
);
377 const OnigPairCaseFoldCodes OnigAsciiLowerMap
[] = {
407 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED
,
408 OnigApplyAllCaseFoldFunc f
, void* arg
,
409 OnigEncoding enc ARG_UNUSED
)
415 i
< (int )(sizeof(OnigAsciiLowerMap
)/sizeof(OnigPairCaseFoldCodes
));
417 code
= OnigAsciiLowerMap
[i
].to
;
418 r
= (*f
)(OnigAsciiLowerMap
[i
].from
, &code
, 1, arg
);
419 if (r
!= 0) return r
;
421 code
= OnigAsciiLowerMap
[i
].from
;
422 r
= (*f
)(OnigAsciiLowerMap
[i
].to
, &code
, 1, arg
);
423 if (r
!= 0) return r
;
430 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED
,
431 const OnigUChar
* p
, const OnigUChar
* end ARG_UNUSED
, OnigCaseFoldCodeItem items
[],
432 OnigEncoding enc ARG_UNUSED
)
434 if (0x41 <= *p
&& *p
<= 0x5a) {
435 items
[0].byte_len
= 1;
436 items
[0].code_len
= 1;
437 items
[0].code
[0] = (OnigCodePoint
)(*p
+ 0x20);
440 else if (0x61 <= *p
&& *p
<= 0x7a) {
441 items
[0].byte_len
= 1;
442 items
[0].code_len
= 1;
443 items
[0].code
[0] = (OnigCodePoint
)(*p
- 0x20);
451 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED
,
452 OnigApplyAllCaseFoldFunc f
, void* arg
)
454 OnigCodePoint ss
[] = { 0x73, 0x73 };
456 return (*f
)((OnigCodePoint
)0xdf, ss
, 2, arg
);
460 onigenc_apply_all_case_fold_with_map(int map_size
,
461 const OnigPairCaseFoldCodes map
[],
462 int ess_tsett_flag
, OnigCaseFoldType flag
,
463 OnigApplyAllCaseFoldFunc f
, void* arg
)
468 r
= onigenc_ascii_apply_all_case_fold(flag
, f
, arg
, 0);
469 if (r
!= 0) return r
;
471 for (i
= 0; i
< map_size
; i
++) {
473 r
= (*f
)(map
[i
].from
, &code
, 1, arg
);
474 if (r
!= 0) return r
;
477 r
= (*f
)(map
[i
].to
, &code
, 1, arg
);
478 if (r
!= 0) return r
;
481 if (ess_tsett_flag
!= 0)
482 return ss_apply_all_case_fold(flag
, f
, arg
);
488 onigenc_get_case_fold_codes_by_str_with_map(int map_size
,
489 const OnigPairCaseFoldCodes map
[],
490 int ess_tsett_flag
, OnigCaseFoldType flag ARG_UNUSED
,
491 const OnigUChar
* p
, const OnigUChar
* end
, OnigCaseFoldCodeItem items
[])
493 if (0x41 <= *p
&& *p
<= 0x5a) {
494 items
[0].byte_len
= 1;
495 items
[0].code_len
= 1;
496 items
[0].code
[0] = (OnigCodePoint
)(*p
+ 0x20);
497 if (*p
== 0x53 && ess_tsett_flag
!= 0 && end
> p
+ 1
498 && (*(p
+1) == 0x53 || *(p
+1) == 0x73)) {
500 items
[1].byte_len
= 2;
501 items
[1].code_len
= 1;
502 items
[1].code
[0] = (OnigCodePoint
)0xdf;
508 else if (0x61 <= *p
&& *p
<= 0x7a) {
509 items
[0].byte_len
= 1;
510 items
[0].code_len
= 1;
511 items
[0].code
[0] = (OnigCodePoint
)(*p
- 0x20);
512 if (*p
== 0x73 && ess_tsett_flag
!= 0 && end
> p
+ 1
513 && (*(p
+1) == 0x73 || *(p
+1) == 0x53)) {
515 items
[1].byte_len
= 2;
516 items
[1].code_len
= 1;
517 items
[1].code
[0] = (OnigCodePoint
)0xdf;
523 else if (*p
== 0xdf && ess_tsett_flag
!= 0) {
524 items
[0].byte_len
= 1;
525 items
[0].code_len
= 2;
526 items
[0].code
[0] = (OnigCodePoint
)'s';
527 items
[0].code
[1] = (OnigCodePoint
)'s';
529 items
[1].byte_len
= 1;
530 items
[1].code_len
= 2;
531 items
[1].code
[0] = (OnigCodePoint
)'S';
532 items
[1].code
[1] = (OnigCodePoint
)'S';
534 items
[2].byte_len
= 1;
535 items
[2].code_len
= 2;
536 items
[2].code
[0] = (OnigCodePoint
)'s';
537 items
[2].code
[1] = (OnigCodePoint
)'S';
539 items
[3].byte_len
= 1;
540 items
[3].code_len
= 2;
541 items
[3].code
[0] = (OnigCodePoint
)'S';
542 items
[3].code
[1] = (OnigCodePoint
)'s';
549 for (i
= 0; i
< map_size
; i
++) {
550 if (*p
== map
[i
].from
) {
551 items
[0].byte_len
= 1;
552 items
[0].code_len
= 1;
553 items
[0].code
[0] = map
[i
].to
;
556 else if (*p
== map
[i
].to
) {
557 items
[0].byte_len
= 1;
558 items
[0].code_len
= 1;
559 items
[0].code
[0] = map
[i
].from
;
570 onigenc_not_support_get_ctype_code_range(OnigCtype ctype
,
571 OnigCodePoint
* sb_out
, const OnigCodePoint
* ranges
[],
574 return ONIG_NO_SUPPORT_CONFIG
;
578 onigenc_is_mbc_newline_0x0a(const UChar
* p
, const UChar
* end
, OnigEncoding enc ARG_UNUSED
)
581 if (*p
== 0x0a) return 1;
586 /* for single byte encodings */
588 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED
, const UChar
** p
,
589 const UChar
*end
, UChar
* lower
, OnigEncoding enc ARG_UNUSED
)
591 *lower
= ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p
);
594 return 1; /* return byte length of converted char to lower */
599 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED
,
600 const UChar
** pp
, const UChar
* end ARG_UNUSED
)
602 const UChar
* p
= *pp
;
605 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p
);
610 onigenc_single_byte_mbc_enc_len(const UChar
* p ARG_UNUSED
, const UChar
* e ARG_UNUSED
,
611 OnigEncoding enc ARG_UNUSED
)
617 onigenc_single_byte_mbc_to_code(const UChar
* p
, const UChar
* end ARG_UNUSED
,
618 OnigEncoding enc ARG_UNUSED
)
620 return (OnigCodePoint
)(*p
);
624 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED
, OnigEncoding enc ARG_UNUSED
)
630 onigenc_single_byte_code_to_mbc(OnigCodePoint code
, UChar
*buf
, OnigEncoding enc ARG_UNUSED
)
633 rb_raise(rb_eRangeError
, "%"PRIdVALUE
" out of char range", code
);
634 *buf
= (UChar
)(code
& 0xff);
639 onigenc_single_byte_left_adjust_char_head(const UChar
* start ARG_UNUSED
, const UChar
* s
,
640 OnigEncoding enc ARG_UNUSED
)
646 onigenc_always_true_is_allowed_reverse_match(const UChar
* s ARG_UNUSED
, const UChar
* end ARG_UNUSED
,
647 OnigEncoding enc ARG_UNUSED
)
653 onigenc_always_false_is_allowed_reverse_match(const UChar
* s ARG_UNUSED
, const UChar
* end ARG_UNUSED
,
654 OnigEncoding enc ARG_UNUSED
)
660 onigenc_ascii_is_code_ctype(OnigCodePoint code
, unsigned int ctype
,
661 OnigEncoding enc ARG_UNUSED
)
664 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
670 onigenc_mbn_mbc_to_code(OnigEncoding enc
, const UChar
* p
, const UChar
* end
)
675 len
= enclen(enc
, p
, end
);
676 n
= (OnigCodePoint
)(*p
++);
677 if (len
== 1) return n
;
679 for (i
= 1; i
< len
; i
++) {
688 onigenc_mbn_mbc_case_fold(OnigEncoding enc
, OnigCaseFoldType flag ARG_UNUSED
,
689 const UChar
** pp
, const UChar
* end ARG_UNUSED
,
693 const UChar
*p
= *pp
;
695 if (ONIGENC_IS_MBC_ASCII(p
)) {
696 *lower
= ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p
);
703 len
= enclen(enc
, p
, end
);
704 for (i
= 0; i
< len
; i
++) {
708 return len
; /* return byte length of converted to lower char */
714 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc
, OnigCaseFoldType flag
,
715 const UChar
** pp ARG_UNUSED
, const UChar
* end ARG_UNUSED
)
717 const UChar
* p
= *pp
;
719 if (ONIGENC_IS_MBC_ASCII(p
)) {
721 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p
);
724 (*pp
) += enclen(enc
, p
);
730 onigenc_mb2_code_to_mbclen(OnigCodePoint code
, OnigEncoding enc ARG_UNUSED
)
732 if ((code
& 0xff00) != 0) return 2;
737 onigenc_mb4_code_to_mbclen(OnigCodePoint code
, OnigEncoding enc ARG_UNUSED
)
739 if ((code
& 0xff000000) != 0) return 4;
740 else if ((code
& 0xff0000) != 0) return 3;
741 else if ((code
& 0xff00) != 0) return 2;
746 onigenc_mb2_code_to_mbc(OnigEncoding enc
, OnigCodePoint code
, UChar
*buf
)
750 if ((code
& 0xff00) != 0) {
751 *p
++ = (UChar
)((code
>> 8) & 0xff);
753 *p
++ = (UChar
)(code
& 0xff);
756 if (enclen(enc
, buf
, p
) != (p
- buf
))
757 return ONIGERR_INVALID_CODE_POINT_VALUE
;
763 onigenc_mb4_code_to_mbc(OnigEncoding enc
, OnigCodePoint code
, UChar
*buf
)
767 if ((code
& 0xff000000) != 0) {
768 *p
++ = (UChar
)((code
>> 24) & 0xff);
770 if ((code
& 0xff0000) != 0 || p
!= buf
) {
771 *p
++ = (UChar
)((code
>> 16) & 0xff);
773 if ((code
& 0xff00) != 0 || p
!= buf
) {
774 *p
++ = (UChar
)((code
>> 8) & 0xff);
776 *p
++ = (UChar
)(code
& 0xff);
779 if (enclen(enc
, buf
, p
) != (p
- buf
))
780 return ONIGERR_INVALID_CODE_POINT_VALUE
;
786 onigenc_minimum_property_name_to_ctype(OnigEncoding enc
, UChar
* p
, UChar
* end
)
788 static const PosixBracketEntryType PBS
[] = {
789 { (UChar
* )"Alnum", ONIGENC_CTYPE_ALNUM
, 5 },
790 { (UChar
* )"Alpha", ONIGENC_CTYPE_ALPHA
, 5 },
791 { (UChar
* )"Blank", ONIGENC_CTYPE_BLANK
, 5 },
792 { (UChar
* )"Cntrl", ONIGENC_CTYPE_CNTRL
, 5 },
793 { (UChar
* )"Digit", ONIGENC_CTYPE_DIGIT
, 5 },
794 { (UChar
* )"Graph", ONIGENC_CTYPE_GRAPH
, 5 },
795 { (UChar
* )"Lower", ONIGENC_CTYPE_LOWER
, 5 },
796 { (UChar
* )"Print", ONIGENC_CTYPE_PRINT
, 5 },
797 { (UChar
* )"Punct", ONIGENC_CTYPE_PUNCT
, 5 },
798 { (UChar
* )"Space", ONIGENC_CTYPE_SPACE
, 5 },
799 { (UChar
* )"Upper", ONIGENC_CTYPE_UPPER
, 5 },
800 { (UChar
* )"XDigit", ONIGENC_CTYPE_XDIGIT
, 6 },
801 { (UChar
* )"ASCII", ONIGENC_CTYPE_ASCII
, 5 },
802 { (UChar
* )"Word", ONIGENC_CTYPE_WORD
, 4 },
803 { (UChar
* )NULL
, -1, 0 }
806 const PosixBracketEntryType
*pb
;
809 len
= onigenc_strlen(enc
, p
, end
);
810 for (pb
= PBS
; IS_NOT_NULL(pb
->name
); pb
++) {
811 if (len
== pb
->len
&&
812 onigenc_with_ascii_strncmp(enc
, p
, end
, pb
->name
, pb
->len
) == 0)
816 return ONIGERR_INVALID_CHAR_PROPERTY_NAME
;
820 onigenc_mb2_is_code_ctype(OnigEncoding enc
, OnigCodePoint code
,
824 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
826 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype
)) {
827 return (ONIGENC_CODE_TO_MBCLEN(enc
, code
) > 1 ? TRUE
: FALSE
);
835 onigenc_mb4_is_code_ctype(OnigEncoding enc
, OnigCodePoint code
,
839 return ONIGENC_IS_ASCII_CODE_CTYPE(code
, ctype
);
841 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype
)) {
842 return (ONIGENC_CODE_TO_MBCLEN(enc
, code
) > 1 ? TRUE
: FALSE
);
850 onigenc_with_ascii_strncmp(OnigEncoding enc
, const UChar
* p
, const UChar
* end
,
851 const UChar
* sascii
/* ascii */, int n
)
856 if (p
>= end
) return (int )(*sascii
);
858 c
= (int )ONIGENC_MBC_TO_CODE(enc
, p
, end
);
863 p
+= enclen(enc
, p
, end
);
868 /* Property management */
870 resize_property_list(int new_size
, const OnigCodePoint
*** plist
, int* psize
)
873 const OnigCodePoint
**list
= *plist
;
875 size
= sizeof(OnigCodePoint
*) * new_size
;
877 list
= (const OnigCodePoint
** )xmalloc(size
);
880 list
= (const OnigCodePoint
** )xrealloc((void* )list
, size
);
883 if (IS_NULL(list
)) return ONIGERR_MEMORY
;
892 onigenc_property_list_add_property(UChar
* name
, const OnigCodePoint
* prop
,
893 hash_table_type
**table
, const OnigCodePoint
*** plist
, int *pnum
,
896 #define PROP_INIT_SIZE 16
900 if (*psize
<= *pnum
) {
901 int new_size
= (*psize
== 0 ? PROP_INIT_SIZE
: *psize
* 2);
902 r
= resize_property_list(new_size
, plist
, psize
);
903 if (r
!= 0) return r
;
906 (*plist
)[*pnum
] = prop
;
908 if (ONIG_IS_NULL(*table
)) {
909 *table
= onig_st_init_strend_table_with_size(PROP_INIT_SIZE
);
910 if (ONIG_IS_NULL(*table
)) return ONIGERR_MEMORY
;
914 onig_st_insert_strend(*table
, name
, name
+ strlen((char* )name
),
915 (hash_data_type
)(*pnum
+ ONIGENC_MAX_STD_CTYPE
));
920 onigenc_property_list_init(int (*f
)(void))