2005-02-16 Roland McGrath <roland@redhat.com>
[glibc/history.git] / iconvdata / tst-table-from.c
blob34ea79362d6ce3f68d7913c9fa2ffdc322d834b5
1 /* Copyright (C) 2000-2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
20 /* Create a table from CHARSET to Unicode.
21 This is a good test for CHARSET's iconv() module, in particular the
22 FROM_LOOP BODY macro. */
24 #include <stddef.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <iconv.h>
29 #include <errno.h>
31 /* If nonzero, ignore conversions outside Unicode plane 0. */
32 static int bmp_only;
34 /* Converts a byte buffer to a hexadecimal string. */
35 static const char*
36 hexbuf (unsigned char buf[], unsigned int buflen)
38 static char msg[50];
40 switch (buflen)
42 case 1:
43 sprintf (msg, "0x%02X", buf[0]);
44 break;
45 case 2:
46 sprintf (msg, "0x%02X%02X", buf[0], buf[1]);
47 break;
48 case 3:
49 sprintf (msg, "0x%02X%02X%02X", buf[0], buf[1], buf[2]);
50 break;
51 case 4:
52 sprintf (msg, "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]);
53 break;
54 default:
55 abort ();
57 return msg;
60 /* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (12 bytes)
61 using the conversion descriptor CD. Returns the number of written bytes,
62 or 0 if ambiguous, or -1 if invalid. */
63 static int
64 try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
66 const char *inbuf = (const char *) buf;
67 size_t inbytesleft = buflen;
68 char *outbuf = (char *) out;
69 size_t outbytesleft = 12;
70 size_t result;
72 iconv (cd, NULL, NULL, NULL, NULL);
73 result = iconv (cd, (char **) &inbuf, &inbytesleft, &outbuf, &outbytesleft);
74 if (result != (size_t)(-1))
75 result = iconv (cd, NULL, NULL, &outbuf, &outbytesleft);
77 if (result == (size_t)(-1))
79 if (errno == EILSEQ)
81 return -1;
83 else if (errno == EINVAL)
85 return 0;
87 else
89 int saved_errno = errno;
90 fprintf (stderr, "%s: iconv error: ", hexbuf (buf, buflen));
91 errno = saved_errno;
92 perror ("");
93 exit (1);
96 else
98 if (inbytesleft != 0)
100 fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n",
101 hexbuf (buf, buflen),
102 (long) (buflen - inbytesleft),
103 (long) (12 - outbytesleft));
104 exit (1);
106 return 12 - outbytesleft;
110 /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
111 static const char *
112 utf8_decode (const unsigned char *out, unsigned int outlen)
114 static char hexbuf[84];
115 char *p = hexbuf;
117 while (outlen > 0)
119 if (p > hexbuf)
120 *p++ = ' ';
122 if (out[0] < 0x80)
124 sprintf (p, "0x%04X", out[0]);
125 out += 1; outlen -= 1;
127 else if (out[0] >= 0xc0 && out[0] < 0xe0 && outlen >= 2)
129 sprintf (p, "0x%04X", ((out[0] & 0x1f) << 6) + (out[1] & 0x3f));
130 out += 2; outlen -= 2;
132 else if (out[0] >= 0xe0 && out[0] < 0xf0 && outlen >= 3)
134 sprintf (p, "0x%04X", ((out[0] & 0x0f) << 12)
135 + ((out[1] & 0x3f) << 6) + (out[2] & 0x3f));
136 out += 3; outlen -= 3;
138 else if (out[0] >= 0xf0 && out[0] < 0xf8 && outlen >= 4)
140 sprintf (p, "0x%04X", ((out[0] & 0x07) << 18)
141 + ((out[1] & 0x3f) << 12)
142 + ((out[2] & 0x3f) << 6) + (out[3] & 0x3f));
143 out += 4; outlen -= 4;
145 else if (out[0] >= 0xf8 && out[0] < 0xfc && outlen >= 5)
147 sprintf (p, "0x%04X", ((out[0] & 0x03) << 24)
148 + ((out[1] & 0x3f) << 18)
149 + ((out[2] & 0x3f) << 12)
150 + ((out[3] & 0x3f) << 6) + (out[4] & 0x3f));
151 out += 5; outlen -= 5;
153 else if (out[0] >= 0xfc && out[0] < 0xfe && outlen >= 6)
155 sprintf (p, "0x%04X", ((out[0] & 0x01) << 30)
156 + ((out[1] & 0x3f) << 24)
157 + ((out[2] & 0x3f) << 18)
158 + ((out[3] & 0x3f) << 12)
159 + ((out[4] & 0x3f) << 6) + (out[5] & 0x3f));
160 out += 6; outlen -= 6;
162 else
164 sprintf (p, "0x????");
165 out += 1; outlen -= 1;
168 if (bmp_only && strlen (p) > 6)
169 /* Ignore conversions outside Unicode plane 0. */
170 return NULL;
172 p += strlen (p);
175 return hexbuf;
179 main (int argc, char *argv[])
181 const char *charset;
182 iconv_t cd;
183 int search_depth;
185 if (argc != 2)
187 fprintf (stderr, "Usage: tst-table-from charset\n");
188 exit (1);
190 charset = argv[1];
192 cd = iconv_open ("UTF-8", charset);
193 if (cd == (iconv_t)(-1))
195 perror ("iconv_open");
196 exit (1);
199 /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
200 file gets too big. */
201 bmp_only = (strcmp (charset, "UTF-8") == 0
202 || strcmp (charset, "GB18030") == 0);
203 search_depth = (strcmp (charset, "UTF-8") == 0 ? 3 : 4);
206 unsigned char out[12];
207 unsigned char buf[4];
208 unsigned int i0, i1, i2, i3;
209 int result;
211 for (i0 = 0; i0 < 0x100; i0++)
213 buf[0] = i0;
214 result = try (cd, buf, 1, out);
215 if (result < 0)
218 else if (result > 0)
220 const char *unicode = utf8_decode (out, result);
221 if (unicode != NULL)
222 printf ("0x%02X\t%s\n", i0, unicode);
224 else
226 for (i1 = 0; i1 < 0x100; i1++)
228 buf[1] = i1;
229 result = try (cd, buf, 2, out);
230 if (result < 0)
233 else if (result > 0)
235 const char *unicode = utf8_decode (out, result);
236 if (unicode != NULL)
237 printf ("0x%02X%02X\t%s\n", i0, i1, unicode);
239 else
241 for (i2 = 0; i2 < 0x100; i2++)
243 buf[2] = i2;
244 result = try (cd, buf, 3, out);
245 if (result < 0)
248 else if (result > 0)
250 const char *unicode = utf8_decode (out, result);
251 if (unicode != NULL)
252 printf ("0x%02X%02X%02X\t%s\n",
253 i0, i1, i2, unicode);
255 else if (search_depth > 3)
257 for (i3 = 0; i3 < 0x100; i3++)
259 buf[3] = i3;
260 result = try (cd, buf, 4, out);
261 if (result < 0)
264 else if (result > 0)
266 const char *unicode =
267 utf8_decode (out, result);
268 if (unicode != NULL)
269 printf ("0x%02X%02X%02X%02X\t%s\n",
270 i0, i1, i2, i3, unicode);
272 else
274 fprintf (stderr,
275 "%s: incomplete byte sequence\n",
276 hexbuf (buf, 4));
277 exit (1);
288 if (iconv_close (cd) < 0)
290 perror ("iconv_close");
291 exit (1);
294 if (ferror (stdin) || fflush (stdout) || ferror (stdout))
296 fprintf (stderr, "I/O error\n");
297 exit (1);
300 return 0;