Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gettext / gettext-tools / lib / utf8-ucs4.h
blob3cee14b2922930ac22e3a3c4eccabe6bc703db4f
1 /* Conversion UTF-8 to UCS-4.
2 Copyright (C) 2001-2002 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20 #include <stddef.h>
22 /* Return the length (number of units) of the first character in S, putting
23 its 'ucs4_t' representation in *PUC. */
24 static int
25 u8_mbtouc_aux (unsigned int *puc, const unsigned char *s, size_t n)
27 unsigned char c = *s;
29 if (c >= 0xc2)
31 if (c < 0xe0)
33 if (n >= 2)
35 if ((s[1] ^ 0x80) < 0x40)
37 *puc = ((unsigned int) (c & 0x1f) << 6)
38 | (unsigned int) (s[1] ^ 0x80);
39 return 2;
41 /* invalid multibyte character */
43 else
45 /* incomplete multibyte character */
46 *puc = 0xfffd;
47 return n;
50 else if (c < 0xf0)
52 if (n >= 3)
54 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
55 && (c >= 0xe1 || s[1] >= 0xa0))
57 *puc = ((unsigned int) (c & 0x0f) << 12)
58 | ((unsigned int) (s[1] ^ 0x80) << 6)
59 | (unsigned int) (s[2] ^ 0x80);
60 return 3;
62 /* invalid multibyte character */
64 else
66 /* incomplete multibyte character */
67 *puc = 0xfffd;
68 return n;
71 else if (c < 0xf8)
73 if (n >= 4)
75 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
76 && (s[3] ^ 0x80) < 0x40
77 && (c >= 0xf1 || s[1] >= 0x90)
78 #if 1
79 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
80 #endif
83 *puc = ((unsigned int) (c & 0x07) << 18)
84 | ((unsigned int) (s[1] ^ 0x80) << 12)
85 | ((unsigned int) (s[2] ^ 0x80) << 6)
86 | (unsigned int) (s[3] ^ 0x80);
87 return 4;
89 /* invalid multibyte character */
91 else
93 /* incomplete multibyte character */
94 *puc = 0xfffd;
95 return n;
98 #if 0
99 else if (c < 0xfc)
101 if (n >= 5)
103 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
104 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
105 && (c >= 0xf9 || s[1] >= 0x88))
107 *puc = ((unsigned int) (c & 0x03) << 24)
108 | ((unsigned int) (s[1] ^ 0x80) << 18)
109 | ((unsigned int) (s[2] ^ 0x80) << 12)
110 | ((unsigned int) (s[3] ^ 0x80) << 6)
111 | (unsigned int) (s[4] ^ 0x80);
112 return 5;
114 /* invalid multibyte character */
116 else
118 /* incomplete multibyte character */
119 *puc = 0xfffd;
120 return n;
123 else if (c < 0xfe)
125 if (n >= 6)
127 if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
128 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
129 && (s[5] ^ 0x80) < 0x40
130 && (c >= 0xfd || s[1] >= 0x84))
132 *puc = ((unsigned int) (c & 0x01) << 30)
133 | ((unsigned int) (s[1] ^ 0x80) << 24)
134 | ((unsigned int) (s[2] ^ 0x80) << 18)
135 | ((unsigned int) (s[3] ^ 0x80) << 12)
136 | ((unsigned int) (s[4] ^ 0x80) << 6)
137 | (unsigned int) (s[5] ^ 0x80);
138 return 6;
140 /* invalid multibyte character */
142 else
144 /* incomplete multibyte character */
145 *puc = 0xfffd;
146 return n;
149 #endif
151 /* invalid multibyte character */
152 *puc = 0xfffd;
153 return 1;
155 static inline int
156 u8_mbtouc (unsigned int *puc, const unsigned char *s, size_t n)
158 unsigned char c = *s;
160 if (c < 0x80)
162 *puc = c;
163 return 1;
165 else
166 return u8_mbtouc_aux (puc, s, n);