Tue Jul 16 16:43:58 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu>
[glibc/history.git] / wcsmbs / mbrtowc.c
blob9e70a0b2c9931f3ae60f5c743d72922861bab56e
1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If
17 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 #include <errno.h>
21 #include <wchar.h>
23 #ifndef EILSEQ
24 #define EILSEQ EINVAL
25 #endif
28 static mbstate_t internal;
30 size_t
31 mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
33 wchar_t to_wide;
34 size_t used = 0;
36 if (ps == NULL)
37 ps = &internal;
39 if (s == NULL)
41 /* See first paragraph of description in 7.16.6.3.2. */
42 pwc = NULL;
43 s = "";
44 n = 1;
47 if (n > 0)
49 if (ps->count == 0)
51 unsigned char byte = (unsigned char) *s++;
52 ++used;
54 /* We must look for a possible first byte of a UTF8 sequence. */
55 if (byte < 0x80)
57 /* One byte sequence. */
58 if (pwc != NULL)
59 *pwc = (wchar_t) byte;
60 return byte ? used : 0;
63 if ((byte & 0xc0) == 0x80 || (byte & 0xfe) == 0xfe)
65 /* Oh, oh. An encoding error. */
66 errno = EILSEQ;
67 return (size_t) -1;
70 if ((byte & 0xe0) == 0xc0)
72 /* We expect two bytes. */
73 ps->count = 1;
74 ps->value = byte & 0x1f;
76 else if ((byte & 0xf0) == 0xe0)
78 /* We expect three bytes. */
79 ps->count = 2;
80 ps->value = byte & 0x0f;
82 else if ((byte & 0xf8) == 0xf0)
84 /* We expect four bytes. */
85 ps->count = 3;
86 ps->value = byte & 0x07;
88 else if ((byte & 0xfc) == 0xf8)
90 /* We expect five bytes. */
91 ps->count = 4;
92 ps->value = byte & 0x03;
94 else
96 /* We expect six bytes. */
97 ps->count = 5;
98 ps->value = byte & 0x01;
102 /* We know we have to handle a multibyte character and there are
103 some more bytes to read. */
104 while (used < n)
106 /* The second to sixths byte must be of the form 10xxxxxx. */
107 unsigned char byte = (unsigned char) *s++;
108 ++used;
110 if ((byte & 0xc0) != 0x80)
112 /* Oh, oh. An encoding error. */
113 errno = EILSEQ;
114 return (size_t) -1;
117 ps->value <<= 6;
118 ps->value |= byte & 0x3f;
120 if (--ps->count == 0)
122 /* The character is finished. */
123 if (pwc != NULL)
124 *pwc = (wchar_t) ps->value;
125 return ps->value ? used : 0;
130 return (size_t) -2;