2 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Paul Borman at Krystal Technologies.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 #include <sys/types.h>
47 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD
,
48 const char *_RESTRICT_KYWD
,
49 size_t, mbstate_t *_RESTRICT_KYWD
, uint8_t, uint8_t, uint8_t, uint8_t);
50 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD
, wchar_t,
51 mbstate_t *_RESTRICT_KYWD
, uint8_t, uint8_t, uint8_t, uint8_t);
53 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD
,
54 const char *_RESTRICT_KYWD
,
55 size_t, mbstate_t *_RESTRICT_KYWD
);
56 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD
,
57 const char *_RESTRICT_KYWD
,
58 size_t, mbstate_t *_RESTRICT_KYWD
);
59 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD
,
60 const char *_RESTRICT_KYWD
,
61 size_t, mbstate_t *_RESTRICT_KYWD
);
62 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD
,
63 const char *_RESTRICT_KYWD
,
64 size_t, mbstate_t *_RESTRICT_KYWD
);
66 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD
, wchar_t,
67 mbstate_t *_RESTRICT_KYWD
);
68 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD
, wchar_t,
69 mbstate_t *_RESTRICT_KYWD
);
70 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD
, wchar_t,
71 mbstate_t *_RESTRICT_KYWD
);
72 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD
, wchar_t,
73 mbstate_t *_RESTRICT_KYWD
);
75 static size_t _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD
,
76 const char **_RESTRICT_KYWD
, size_t, size_t,
77 mbstate_t *_RESTRICT_KYWD
);
78 static size_t _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD
,
79 const char **_RESTRICT_KYWD
, size_t, size_t,
80 mbstate_t *_RESTRICT_KYWD
);
81 static size_t _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD
,
82 const char **_RESTRICT_KYWD
, size_t, size_t,
83 mbstate_t *_RESTRICT_KYWD
);
84 static size_t _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD
,
85 const char **_RESTRICT_KYWD
, size_t, size_t,
86 mbstate_t *_RESTRICT_KYWD
);
88 static size_t _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD
,
89 const wchar_t **_RESTRICT_KYWD
, size_t, size_t,
90 mbstate_t *_RESTRICT_KYWD
);
91 static size_t _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD
,
92 const wchar_t **_RESTRICT_KYWD
, size_t, size_t,
93 mbstate_t *_RESTRICT_KYWD
);
94 static size_t _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD
,
95 const wchar_t **_RESTRICT_KYWD
, size_t, size_t,
96 mbstate_t *_RESTRICT_KYWD
);
97 static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD
,
98 const wchar_t **_RESTRICT_KYWD
, size_t, size_t,
99 mbstate_t *_RESTRICT_KYWD
);
101 static int _EUC_mbsinit(const mbstate_t *);
110 _EUC_mbsinit(const mbstate_t *ps
)
113 return (ps
== NULL
|| ((const _EucState
*)ps
)->want
== 0);
117 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
120 _EUC_CN_init(struct lc_ctype
*lct
)
122 lct
->lc_mbrtowc
= _EUC_CN_mbrtowc
;
123 lct
->lc_wcrtomb
= _EUC_CN_wcrtomb
;
124 lct
->lc_mbsnrtowcs
= _EUC_CN_mbsnrtowcs
;
125 lct
->lc_wcsnrtombs
= _EUC_CN_wcsnrtombs
;
126 lct
->lc_mbsinit
= _EUC_mbsinit
;
128 lct
->lc_max_mblen
= 4;
129 lct
->lc_is_ascii
= 0;
133 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc
, const char *_RESTRICT_KYWD s
,
134 size_t n
, mbstate_t *_RESTRICT_KYWD ps
)
136 return (_EUC_mbrtowc_impl(pwc
, s
, n
, ps
, SS2
, 4, 0, 0));
140 _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst
,
141 const char **_RESTRICT_KYWD src
,
142 size_t nms
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
144 return (__mbsnrtowcs_std(dst
, src
, nms
, len
, ps
, _EUC_CN_mbrtowc
));
148 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s
, wchar_t wc
,
149 mbstate_t *_RESTRICT_KYWD ps
)
151 return (_EUC_wcrtomb_impl(s
, wc
, ps
, SS2
, 4, 0, 0));
155 _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD dst
, const wchar_t **_RESTRICT_KYWD src
,
156 size_t nwc
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
158 return (__wcsnrtombs_std(dst
, src
, nwc
, len
, ps
, _EUC_CN_wcrtomb
));
162 * EUC-KR uses only CS0 and CS1.
165 _EUC_KR_init(struct lc_ctype
*lct
)
167 lct
->lc_mbrtowc
= _EUC_KR_mbrtowc
;
168 lct
->lc_wcrtomb
= _EUC_KR_wcrtomb
;
169 lct
->lc_mbsnrtowcs
= _EUC_KR_mbsnrtowcs
;
170 lct
->lc_wcsnrtombs
= _EUC_KR_wcsnrtombs
;
171 lct
->lc_mbsinit
= _EUC_mbsinit
;
173 lct
->lc_max_mblen
= 2;
174 lct
->lc_is_ascii
= 0;
178 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc
, const char *_RESTRICT_KYWD s
,
179 size_t n
, mbstate_t *_RESTRICT_KYWD ps
)
181 return (_EUC_mbrtowc_impl(pwc
, s
, n
, ps
, 0, 0, 0, 0));
185 _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst
,
186 const char **_RESTRICT_KYWD src
,
187 size_t nms
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
189 return (__mbsnrtowcs_std(dst
, src
, nms
, len
, ps
, _EUC_KR_mbrtowc
));
193 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s
, wchar_t wc
,
194 mbstate_t *_RESTRICT_KYWD ps
)
196 return (_EUC_wcrtomb_impl(s
, wc
, ps
, 0, 0, 0, 0));
200 _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD dst
, const wchar_t **_RESTRICT_KYWD src
,
201 size_t nwc
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
203 return (__wcsnrtombs_std(dst
, src
, nwc
, len
, ps
, _EUC_KR_wcrtomb
));
207 * EUC-JP uses CS0, CS1, CS2, and CS3.
210 _EUC_JP_init(struct lc_ctype
*lct
)
212 lct
->lc_mbrtowc
= _EUC_JP_mbrtowc
;
213 lct
->lc_wcrtomb
= _EUC_JP_wcrtomb
;
214 lct
->lc_mbsnrtowcs
= _EUC_JP_mbsnrtowcs
;
215 lct
->lc_wcsnrtombs
= _EUC_JP_wcsnrtombs
;
216 lct
->lc_mbsinit
= _EUC_mbsinit
;
218 lct
->lc_max_mblen
= 3;
219 lct
->lc_is_ascii
= 0;
223 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc
, const char *_RESTRICT_KYWD s
,
224 size_t n
, mbstate_t *_RESTRICT_KYWD ps
)
226 return (_EUC_mbrtowc_impl(pwc
, s
, n
, ps
, SS2
, 2, SS3
, 3));
230 _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst
,
231 const char **_RESTRICT_KYWD src
,
232 size_t nms
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
234 return (__mbsnrtowcs_std(dst
, src
, nms
, len
, ps
, _EUC_JP_mbrtowc
));
238 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s
, wchar_t wc
,
239 mbstate_t *_RESTRICT_KYWD ps
)
241 return (_EUC_wcrtomb_impl(s
, wc
, ps
, SS2
, 2, SS3
, 3));
245 _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD dst
, const wchar_t **_RESTRICT_KYWD src
,
246 size_t nwc
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
248 return (__wcsnrtombs_std(dst
, src
, nwc
, len
, ps
, _EUC_JP_wcrtomb
));
252 * EUC-TW uses CS0, CS1, and CS2.
255 _EUC_TW_init(struct lc_ctype
*lct
)
257 lct
->lc_mbrtowc
= _EUC_TW_mbrtowc
;
258 lct
->lc_wcrtomb
= _EUC_TW_wcrtomb
;
259 lct
->lc_mbsnrtowcs
= _EUC_TW_mbsnrtowcs
;
260 lct
->lc_wcsnrtombs
= _EUC_TW_wcsnrtombs
;
261 lct
->lc_mbsinit
= _EUC_mbsinit
;
263 lct
->lc_max_mblen
= 4;
264 lct
->lc_is_ascii
= 0;
268 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc
, const char *_RESTRICT_KYWD s
,
269 size_t n
, mbstate_t *_RESTRICT_KYWD ps
)
271 return (_EUC_mbrtowc_impl(pwc
, s
, n
, ps
, SS2
, 4, 0, 0));
275 _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst
,
276 const char **_RESTRICT_KYWD src
,
277 size_t nms
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
279 return (__mbsnrtowcs_std(dst
, src
, nms
, len
, ps
, _EUC_TW_mbrtowc
));
283 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s
, wchar_t wc
,
284 mbstate_t *_RESTRICT_KYWD ps
)
286 return (_EUC_wcrtomb_impl(s
, wc
, ps
, SS2
, 4, 0, 0));
290 _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD dst
, const wchar_t **_RESTRICT_KYWD src
,
291 size_t nwc
, size_t len
, mbstate_t *_RESTRICT_KYWD ps
)
293 return (__wcsnrtombs_std(dst
, src
, nwc
, len
, ps
, _EUC_TW_wcrtomb
));
301 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc
, const char *_RESTRICT_KYWD s
,
302 size_t n
, mbstate_t *_RESTRICT_KYWD ps
,
303 uint8_t cs2
, uint8_t cs2width
, uint8_t cs3
, uint8_t cs3width
)
308 unsigned char ch
, chs
;
310 es
= (_EucState
*)ps
;
312 if (es
->want
< 0 || es
->want
> MB_CUR_MAX
) {
324 /* Incomplete multibyte sequence */
328 /* Fast path for plain ASCII (CS0) */
329 if (((ch
= (unsigned char)*s
) & 0x80) == 0) {
332 return (ch
!= '\0' ? 1 : 0);
338 } else if (ch
== cs2
) {
340 } else if (ch
== cs3
) {
355 for (i
= 0; i
< MIN(want
, n
); i
++) {
362 /* Incomplete multibyte sequence */
370 return (wc
== L
'\0' ? 0 : want
);
374 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s
, wchar_t wc
,
375 mbstate_t *_RESTRICT_KYWD ps
,
376 uint8_t cs2
, uint8_t cs2width
, uint8_t cs3
, uint8_t cs3width
)
382 es
= (_EucState
*)ps
;
390 /* Reset to initial shift state (no-op) */
393 if ((wc
& ~0x7f) == 0) {
394 /* Fast path for plain ASCII (CS0) */
399 /* Determine the "length" */
400 if ((unsigned)wc
> 0xffffff) {
402 } else if ((unsigned)wc
> 0xffff) {
404 } else if ((unsigned)wc
> 0xff) {
410 if (len
> MB_CUR_MAX
) {
415 /* This first check excludes CS1, which is implicitly valid. */
416 if ((wc
< 0xa100) || (wc
> 0xffff)) {
417 /* Check for valid CS2 or CS3 */
418 nm
= (wc
>> ((len
- 1) * 8));
420 if (len
!= cs2width
) {
424 } else if (nm
== cs3
) {
425 if (len
!= cs3width
) {
435 /* Stash the bytes, least significant last */
436 for (i
= len
- 1; i
>= 0; i
--) {