1 /**********************************************************************
2 euc_kr.c - Oniguruma (regular expression library)
3 **********************************************************************/
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 static const int EncLen_EUCKR
[] = {
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
44 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
51 typedef enum { FAILURE
= -2, ACCEPT
= -1, S0
= 0, S1
} state_t
;
54 static const signed char trans
[][0x100] = {
55 { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
56 /* 0 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
57 /* 1 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
58 /* 2 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
59 /* 3 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
60 /* 4 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
61 /* 5 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
62 /* 6 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
63 /* 7 */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
64 /* 8 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
65 /* 9 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
66 /* a */ F
, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
67 /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
68 /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
69 /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
70 /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
71 /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
73 { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
74 /* 0 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
75 /* 1 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
76 /* 2 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
77 /* 3 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
78 /* 4 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
79 /* 5 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
80 /* 6 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
81 /* 7 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
82 /* 8 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
83 /* 9 */ F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
, F
,
84 /* a */ F
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
85 /* b */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
86 /* c */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
87 /* d */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
88 /* e */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
,
89 /* f */ A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, A
, F
96 euckr_mbc_enc_len(const UChar
* p
, const UChar
* e
, OnigEncoding enc ARG_UNUSED
)
99 state_t s
= trans
[0][firstbyte
];
101 return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
102 ONIGENC_CONSTRUCT_MBCLEN_INVALID()
103 if (s
< 0) RETURN(1);
104 if (p
== e
) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCKR
[firstbyte
]-1);
111 euckr_mbc_to_code(const UChar
* p
, const UChar
* end
, OnigEncoding enc
)
113 return onigenc_mbn_mbc_to_code(enc
, p
, end
);
117 euckr_code_to_mbc(OnigCodePoint code
, UChar
*buf
, OnigEncoding enc
)
119 return onigenc_mb2_code_to_mbc(enc
, code
, buf
);
123 euckr_mbc_case_fold(OnigCaseFoldType flag
, const UChar
** pp
, const UChar
* end
,
124 UChar
* lower
, OnigEncoding enc
)
126 return onigenc_mbn_mbc_case_fold(enc
, flag
,
132 euckr_is_mbc_ambiguous(OnigCaseFoldType flag
,
133 const UChar
** pp
, const UChar
* end
, OnigEncoding enc
)
135 return onigenc_mbn_is_mbc_ambiguous(enc
, flag
, pp
, end
);
140 euckr_is_code_ctype(OnigCodePoint code
, unsigned int ctype
, OnigEncoding enc
)
142 return onigenc_mb2_is_code_ctype(enc
, code
, ctype
);
145 #define euckr_islead(c) ((c) < 0xa1 || (c) == 0xff)
148 euckr_left_adjust_char_head(const UChar
* start
, const UChar
* s
, OnigEncoding enc
)
150 /* Assumed in this encoding,
151 mb-trail bytes don't mix with single bytes.
156 if (s
<= start
) return (UChar
* )s
;
159 while (!euckr_islead(*p
) && p
> start
) p
--;
160 len
= enclen(enc
, p
, s
);
161 if (p
+ len
> s
) return (UChar
* )p
;
163 return (UChar
* )(p
+ ((s
- p
) & ~1));
167 euckr_is_allowed_reverse_match(const UChar
* s
, const UChar
* end ARG_UNUSED
, OnigEncoding enc ARG_UNUSED
)
170 if (c
<= 0x7e) return TRUE
;
174 OnigEncodingDefine(euc_kr
, EUC_KR
) = {
177 2, /* max enc length */
178 1, /* min enc length */
179 onigenc_is_mbc_newline_0x0a
,
181 onigenc_mb2_code_to_mbclen
,
184 onigenc_ascii_apply_all_case_fold
,
185 onigenc_ascii_get_case_fold_codes_by_str
,
186 onigenc_minimum_property_name_to_ctype
,
188 onigenc_not_support_get_ctype_code_range
,
189 euckr_left_adjust_char_head
,
190 euckr_is_allowed_reverse_match
192 ENC_ALIAS("eucKR", "EUC-KR")