1 /* Convert multibyte character to wide character.
2 Copyright (C) 1999-2002, 2005-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation; either version 2.1 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
19 /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
20 that handles the special case of the UTF-8 encoding. */
22 /* Cf. unistr/u8-mbtouc.c. */
23 unsigned char c
= (unsigned char) p
[0];
29 res
= (c
== 0 ? 0 : 1);
40 unsigned char c2
= (unsigned char) p
[1];
42 if ((c2
^ 0x80) < 0x40)
45 *pwc
= ((unsigned int) (c
& 0x1f) << 6)
46 | (unsigned int) (c2
^ 0x80);
58 unsigned char c2
= (unsigned char) p
[1];
60 if ((c2
^ 0x80) < 0x40
61 && (c
>= 0xe1 || c2
>= 0xa0)
62 && (c
!= 0xed || c2
< 0xa0))
68 unsigned char c3
= (unsigned char) p
[2];
70 if ((c3
^ 0x80) < 0x40)
73 (((unsigned int) (c
& 0x0f) << 12)
74 | ((unsigned int) (c2
^ 0x80) << 6)
75 | (unsigned int) (c3
^ 0x80));
77 if (FITS_IN_CHAR_TYPE (wc
))
95 unsigned char c2
= (unsigned char) p
[1];
97 if ((c2
^ 0x80) < 0x40
98 && (c
>= 0xf1 || c2
>= 0x90)
99 && (c
< 0xf4 || (/* c == 0xf4 && */ c2
< 0x90)))
105 unsigned char c3
= (unsigned char) p
[2];
107 if ((c3
^ 0x80) < 0x40)
113 unsigned char c4
= (unsigned char) p
[3];
115 if ((c4
^ 0x80) < 0x40)
118 (((unsigned int) (c
& 0x07) << 18)
119 | ((unsigned int) (c2
^ 0x80) << 12)
120 | ((unsigned int) (c3
^ 0x80) << 6)
121 | (unsigned int) (c4
^ 0x80));
123 if (FITS_IN_CHAR_TYPE (wc
))