11 _mbtowc_r (struct _reent
*r
,
12 wchar_t *__restrict pwc
,
13 const char *__restrict s
,
17 return __MBTOWC (r
, pwc
, s
, n
, state
);
21 __ascii_mbtowc (struct _reent
*r
,
28 unsigned char *t
= (unsigned char *)s
;
48 typedef enum __packed
{ ESCAPE
, DOLLAR
, BRACKET
, AT
, B
, J
,
49 NUL
, JIS_CHAR
, OTHER
, JIS_C_NUM
} JIS_CHAR_TYPE
;
50 typedef enum __packed
{ ASCII
, JIS
, A_ESC
, A_ESC_DL
, JIS_1
, J_ESC
, J_ESC_BR
,
51 INV
, JIS_S_NUM
} JIS_STATE
;
52 typedef enum __packed
{ COPY_A
, COPY_J1
, COPY_J2
, MAKE_A
, NOOP
, EMPTY
, ERROR
} JIS_ACTION
;
54 /**************************************************************************************
55 * state/action tables for processing JIS encoding
56 * Where possible, switches to JIS are grouped with proceding JIS characters and switches
57 * to ASCII are grouped with preceding JIS characters. Thus, maximum returned length
58 * is 2 (switch to JIS) + 2 (JIS characters) + 2 (switch back to ASCII) = 6.
59 *************************************************************************************/
62 static JIS_STATE JIS_state_table
[JIS_S_NUM
][JIS_C_NUM
] = {
63 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
64 /* ASCII */ { A_ESC
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
},
65 /* JIS */ { J_ESC
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, JIS_1
, INV
, JIS_1
, INV
},
66 /* A_ESC */ { ASCII
, A_ESC_DL
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
, ASCII
},
67 /* A_ESC_DL */{ ASCII
, ASCII
, ASCII
, JIS
, JIS
, ASCII
, ASCII
, ASCII
, ASCII
},
68 /* JIS_1 */ { INV
, JIS
, JIS
, JIS
, JIS
, JIS
, INV
, JIS
, INV
},
69 /* J_ESC */ { INV
, INV
, J_ESC_BR
, INV
, INV
, INV
, INV
, INV
, INV
},
70 /* J_ESC_BR */{ INV
, INV
, INV
, INV
, ASCII
, ASCII
, INV
, INV
, INV
},
73 static JIS_ACTION JIS_action_table
[JIS_S_NUM
][JIS_C_NUM
] = {
74 /* ESCAPE DOLLAR BRACKET AT B J NUL JIS_CHAR OTHER */
75 /* ASCII */ { NOOP
, COPY_A
, COPY_A
, COPY_A
, COPY_A
, COPY_A
, EMPTY
, COPY_A
, COPY_A
},
76 /* JIS */ { NOOP
, COPY_J1
, COPY_J1
, COPY_J1
, COPY_J1
, COPY_J1
, ERROR
, COPY_J1
, ERROR
},
77 /* A_ESC */ { COPY_A
, NOOP
, COPY_A
, COPY_A
, COPY_A
, COPY_A
, COPY_A
, COPY_A
, COPY_A
},
78 /* A_ESC_DL */{ COPY_A
, COPY_A
, COPY_A
, NOOP
, NOOP
, COPY_A
, COPY_A
, COPY_A
, COPY_A
},
79 /* JIS_1 */ { ERROR
, COPY_J2
, COPY_J2
, COPY_J2
, COPY_J2
, COPY_J2
, ERROR
, COPY_J2
, ERROR
},
80 /* J_ESC */ { ERROR
, ERROR
, NOOP
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
, ERROR
},
81 /* J_ESC_BR */{ ERROR
, ERROR
, ERROR
, ERROR
, MAKE_A
, MAKE_A
, ERROR
, ERROR
, ERROR
},
83 #endif /* !__CYGWIN__ */
85 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
86 #define __state __count
88 #ifdef _MB_EXTENDED_CHARSETS_ISO
90 ___iso_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
91 int iso_idx
, mbstate_t *state
)
94 unsigned char *t
= (unsigned char *)s
;
109 *pwc
= __iso_8859_conv
[iso_idx
][*t
- 0xa0];
110 if (*pwc
== 0) /* Invalid character */
112 _REENT_ERRNO(r
) = EILSEQ
;
128 __iso_8859_1_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
131 return ___iso_mbtowc (r
, pwc
, s
, n
, -1, state
);
135 __iso_8859_2_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
138 return ___iso_mbtowc (r
, pwc
, s
, n
, 0, state
);
142 __iso_8859_3_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
145 return ___iso_mbtowc (r
, pwc
, s
, n
, 1, state
);
149 __iso_8859_4_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
152 return ___iso_mbtowc (r
, pwc
, s
, n
, 2, state
);
156 __iso_8859_5_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
159 return ___iso_mbtowc (r
, pwc
, s
, n
, 3, state
);
163 __iso_8859_6_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
166 return ___iso_mbtowc (r
, pwc
, s
, n
, 4, state
);
170 __iso_8859_7_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
173 return ___iso_mbtowc (r
, pwc
, s
, n
, 5, state
);
177 __iso_8859_8_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
180 return ___iso_mbtowc (r
, pwc
, s
, n
, 6, state
);
184 __iso_8859_9_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
187 return ___iso_mbtowc (r
, pwc
, s
, n
, 7, state
);
191 __iso_8859_10_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
194 return ___iso_mbtowc (r
, pwc
, s
, n
, 8, state
);
198 __iso_8859_11_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
201 return ___iso_mbtowc (r
, pwc
, s
, n
, 9, state
);
205 __iso_8859_13_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
208 return ___iso_mbtowc (r
, pwc
, s
, n
, 10, state
);
212 __iso_8859_14_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
215 return ___iso_mbtowc (r
, pwc
, s
, n
, 11, state
);
219 __iso_8859_15_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
222 return ___iso_mbtowc (r
, pwc
, s
, n
, 12, state
);
226 __iso_8859_16_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
229 return ___iso_mbtowc (r
, pwc
, s
, n
, 13, state
);
232 static mbtowc_p __iso_8859_mbtowc
[17] = {
243 __iso_8859_10_mbtowc
,
244 __iso_8859_11_mbtowc
,
245 NULL
, /* No ISO 8859-12 */
246 __iso_8859_13_mbtowc
,
247 __iso_8859_14_mbtowc
,
248 __iso_8859_15_mbtowc
,
252 /* val *MUST* be valid! All checks for validity are supposed to be
253 performed before calling this function. */
255 __iso_mbtowc (int val
)
257 return __iso_8859_mbtowc
[val
];
259 #endif /* _MB_EXTENDED_CHARSETS_ISO */
261 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
263 ___cp_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
264 int cp_idx
, mbstate_t *state
)
267 unsigned char *t
= (unsigned char *)s
;
282 *pwc
= __cp_conv
[cp_idx
][*t
- 0x80];
283 if (*pwc
== 0) /* Invalid character */
285 _REENT_ERRNO(r
) = EILSEQ
;
301 __cp_437_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
304 return ___cp_mbtowc (r
, pwc
, s
, n
, 0, state
);
308 __cp_720_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
311 return ___cp_mbtowc (r
, pwc
, s
, n
, 1, state
);
315 __cp_737_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
318 return ___cp_mbtowc (r
, pwc
, s
, n
, 2, state
);
322 __cp_775_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
325 return ___cp_mbtowc (r
, pwc
, s
, n
, 3, state
);
329 __cp_850_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
332 return ___cp_mbtowc (r
, pwc
, s
, n
, 4, state
);
336 __cp_852_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
339 return ___cp_mbtowc (r
, pwc
, s
, n
, 5, state
);
343 __cp_855_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
346 return ___cp_mbtowc (r
, pwc
, s
, n
, 6, state
);
350 __cp_857_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
353 return ___cp_mbtowc (r
, pwc
, s
, n
, 7, state
);
357 __cp_858_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
360 return ___cp_mbtowc (r
, pwc
, s
, n
, 8, state
);
364 __cp_862_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
367 return ___cp_mbtowc (r
, pwc
, s
, n
, 9, state
);
371 __cp_866_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
374 return ___cp_mbtowc (r
, pwc
, s
, n
, 10, state
);
378 __cp_874_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
381 return ___cp_mbtowc (r
, pwc
, s
, n
, 11, state
);
385 __cp_1125_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
388 return ___cp_mbtowc (r
, pwc
, s
, n
, 12, state
);
392 __cp_1250_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
395 return ___cp_mbtowc (r
, pwc
, s
, n
, 13, state
);
399 __cp_1251_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
402 return ___cp_mbtowc (r
, pwc
, s
, n
, 14, state
);
406 __cp_1252_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
409 return ___cp_mbtowc (r
, pwc
, s
, n
, 15, state
);
413 __cp_1253_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
416 return ___cp_mbtowc (r
, pwc
, s
, n
, 16, state
);
420 __cp_1254_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
423 return ___cp_mbtowc (r
, pwc
, s
, n
, 17, state
);
427 __cp_1255_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
430 return ___cp_mbtowc (r
, pwc
, s
, n
, 18, state
);
434 __cp_1256_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
437 return ___cp_mbtowc (r
, pwc
, s
, n
, 19, state
);
441 __cp_1257_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
444 return ___cp_mbtowc (r
, pwc
, s
, n
, 20, state
);
448 __cp_1258_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
451 return ___cp_mbtowc (r
, pwc
, s
, n
, 21, state
);
455 __cp_20866_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
458 return ___cp_mbtowc (r
, pwc
, s
, n
, 22, state
);
462 __cp_21866_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
465 return ___cp_mbtowc (r
, pwc
, s
, n
, 23, state
);
469 __cp_101_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
472 return ___cp_mbtowc (r
, pwc
, s
, n
, 24, state
);
476 __cp_102_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
479 return ___cp_mbtowc (r
, pwc
, s
, n
, 25, state
);
483 __cp_103_mbtowc (struct _reent
*r
, wchar_t *pwc
, const char *s
, size_t n
,
486 return ___cp_mbtowc (r
, pwc
, s
, n
, 26, state
);
489 static mbtowc_p __cp_xxx_mbtowc
[27] = {
519 /* val *MUST* be valid! All checks for validity are supposed to be
520 performed before calling this function. */
522 __cp_mbtowc (int val
)
524 return __cp_xxx_mbtowc
[__cp_val_index (val
)];
526 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
529 __utf8_mbtowc (struct _reent
*r
,
536 unsigned char *t
= (unsigned char *)s
;
549 if (state
->__count
== 0)
552 ch
= state
->__value
.__wchb
[0];
558 return 0; /* s points to the null character */
563 /* single-byte sequence */
568 if (ch
>= 0xc0 && ch
<= 0xdf)
570 /* two-byte sequence */
571 state
->__value
.__wchb
[0] = ch
;
572 if (state
->__count
== 0)
574 else if (n
< (size_t)-1)
579 if (ch
< 0x80 || ch
> 0xbf)
581 _REENT_ERRNO(r
) = EILSEQ
;
584 if (state
->__value
.__wchb
[0] < 0xc2)
586 /* overlong UTF-8 sequence */
587 _REENT_ERRNO(r
) = EILSEQ
;
591 *pwc
= (wchar_t)((state
->__value
.__wchb
[0] & 0x1f) << 6)
592 | (wchar_t)(ch
& 0x3f);
595 if (ch
>= 0xe0 && ch
<= 0xef)
597 /* three-byte sequence */
599 state
->__value
.__wchb
[0] = ch
;
600 if (state
->__count
== 0)
602 else if (n
< (size_t)-1)
606 ch
= (state
->__count
== 1) ? t
[i
++] : state
->__value
.__wchb
[1];
607 if (state
->__value
.__wchb
[0] == 0xe0 && ch
< 0xa0)
609 /* overlong UTF-8 sequence */
610 _REENT_ERRNO(r
) = EILSEQ
;
613 if (ch
< 0x80 || ch
> 0xbf)
615 _REENT_ERRNO(r
) = EILSEQ
;
618 state
->__value
.__wchb
[1] = ch
;
619 if (state
->__count
== 1)
621 else if (n
< (size_t)-1)
626 if (ch
< 0x80 || ch
> 0xbf)
628 _REENT_ERRNO(r
) = EILSEQ
;
632 tmp
= (wchar_t)((state
->__value
.__wchb
[0] & 0x0f) << 12)
633 | (wchar_t)((state
->__value
.__wchb
[1] & 0x3f) << 6)
634 | (wchar_t)(ch
& 0x3f);
638 if (ch
>= 0xf0 && ch
<= 0xf4)
640 /* four-byte sequence */
642 state
->__value
.__wchb
[0] = ch
;
643 if (state
->__count
== 0)
645 else if (n
< (size_t)-1)
649 ch
= (state
->__count
== 1) ? t
[i
++] : state
->__value
.__wchb
[1];
650 if ((state
->__value
.__wchb
[0] == 0xf0 && ch
< 0x90)
651 || (state
->__value
.__wchb
[0] == 0xf4 && ch
>= 0x90))
653 /* overlong UTF-8 sequence or result is > 0x10ffff */
654 _REENT_ERRNO(r
) = EILSEQ
;
657 if (ch
< 0x80 || ch
> 0xbf)
659 _REENT_ERRNO(r
) = EILSEQ
;
662 state
->__value
.__wchb
[1] = ch
;
663 if (state
->__count
== 1)
665 else if (n
< (size_t)-1)
669 ch
= (state
->__count
== 2) ? t
[i
++] : state
->__value
.__wchb
[2];
670 if (ch
< 0x80 || ch
> 0xbf)
672 _REENT_ERRNO(r
) = EILSEQ
;
675 state
->__value
.__wchb
[2] = ch
;
676 if (state
->__count
== 2)
678 else if (n
< (size_t)-1)
680 if (state
->__count
== 3 && sizeof(wchar_t) == 2)
682 /* On systems which have wchar_t being UTF-16 values, the value
683 doesn't fit into a single wchar_t in this case. So what we
684 do here is to store the state with a special value of __count
685 and return the first half of a surrogate pair. The first
686 three bytes of a UTF-8 sequence are enough to generate the
687 first half of a UTF-16 surrogate pair. As return value we
688 choose to return the number of bytes actually read up to
690 The second half of the surrogate pair is returned in case we
691 recognize the special __count value of four, and the next
692 byte is actually a valid value. See below. */
693 tmp
= (wint_t)((state
->__value
.__wchb
[0] & 0x07) << 18)
694 | (wint_t)((state
->__value
.__wchb
[1] & 0x3f) << 12)
695 | (wint_t)((state
->__value
.__wchb
[2] & 0x3f) << 6);
697 *pwc
= 0xd800 | ((tmp
- 0x10000) >> 10);
703 if (ch
< 0x80 || ch
> 0xbf)
705 _REENT_ERRNO(r
) = EILSEQ
;
708 tmp
= (wint_t)((state
->__value
.__wchb
[0] & 0x07) << 18)
709 | (wint_t)((state
->__value
.__wchb
[1] & 0x3f) << 12)
710 | (wint_t)((state
->__value
.__wchb
[2] & 0x3f) << 6)
711 | (wint_t)(ch
& 0x3f);
712 if (state
->__count
== 4 && sizeof(wchar_t) == 2)
713 /* Create the second half of the surrogate pair for systems with
714 wchar_t == UTF-16 . */
715 *pwc
= 0xdc00 | (tmp
& 0x3ff);
722 _REENT_ERRNO(r
) = EILSEQ
;
726 /* Cygwin defines its own doublebyte charset conversion functions
727 because the underlying OS requires wchar_t == UTF-16. */
730 __sjis_mbtowc (struct _reent
*r
,
737 unsigned char *t
= (unsigned char *)s
;
745 return 0; /* not state-dependent */
751 if (state
->__count
== 0)
755 state
->__value
.__wchb
[0] = ch
;
762 if (state
->__count
== 1)
766 *pwc
= (((wchar_t)state
->__value
.__wchb
[0]) << 8) + (wchar_t)ch
;
772 _REENT_ERRNO(r
) = EILSEQ
;
786 __eucjp_mbtowc (struct _reent
*r
,
793 unsigned char *t
= (unsigned char *)s
;
807 if (state
->__count
== 0)
811 state
->__value
.__wchb
[0] = ch
;
818 if (state
->__count
== 1)
822 if (state
->__value
.__wchb
[0] == 0x8f)
824 state
->__value
.__wchb
[1] = ch
;
832 *pwc
= (((wchar_t)state
->__value
.__wchb
[0]) << 8) + (wchar_t)ch
;
839 _REENT_ERRNO(r
) = EILSEQ
;
843 if (state
->__count
== 2)
847 *pwc
= (((wchar_t)state
->__value
.__wchb
[1]) << 8)
848 + (wchar_t)(ch
& 0x7f);
854 _REENT_ERRNO(r
) = EILSEQ
;
868 __jis_mbtowc (struct _reent
*r
,
875 unsigned char *t
= (unsigned char *)s
;
876 JIS_STATE curr_state
;
888 state
->__state
= ASCII
;
889 return 1; /* state-dependent */
895 curr_state
= state
->__state
;
898 for (i
= 0; i
< n
; ++i
)
925 if (_isjis (curr_ch
))
931 action
= JIS_action_table
[curr_state
][ch
];
932 curr_state
= JIS_state_table
[curr_state
][ch
];
939 state
->__state
= ASCII
;
943 state
->__state
= ASCII
;
944 *pwc
= (wchar_t)*ptr
;
947 state
->__value
.__wchb
[0] = t
[i
];
950 state
->__state
= JIS
;
951 *pwc
= (((wchar_t)state
->__value
.__wchb
[0]) << 8) + (wchar_t)(t
[i
]);
954 ptr
= (unsigned char *)(t
+ i
+ 1);
958 _REENT_ERRNO(r
) = EILSEQ
;
964 state
->__state
= curr_state
;
965 return -2; /* n < bytes needed */
967 #endif /* !__CYGWIN__*/
968 #endif /* _MB_CAPABLE */