Remove all .cvsignore files. Not needed any more since the switch to git.
[libiconv.git] / tests / table-from.c
blob03bf0d9e9e90c5967847d8d3917751719ff84b7e
1 /* Copyright (C) 2000-2002, 2004-2005 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 The GNU LIBICONV Library is free software; you can redistribute it
5 and/or modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 The GNU LIBICONV Library is distributed in the hope that it will be
10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16 If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
17 Fifth Floor, Boston, MA 02110-1301, USA. */
19 /* Create a table from CHARSET to Unicode. */
21 #include "config.h"
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <iconv.h>
28 #include <errno.h>
30 #include "binary-io.h"
32 /* If nonzero, ignore conversions outside Unicode plane 0. */
33 static int bmp_only;
35 static const char* hexbuf (unsigned char buf[], unsigned int buflen)
37 static char msg[50];
38 switch (buflen) {
39 case 1: sprintf(msg,"0x%02X",buf[0]); break;
40 case 2: sprintf(msg,"0x%02X%02X",buf[0],buf[1]); break;
41 case 3: sprintf(msg,"0x%02X%02X%02X",buf[0],buf[1],buf[2]); break;
42 case 4: sprintf(msg,"0x%02X%02X%02X%02X",buf[0],buf[1],buf[2],buf[3]); break;
43 default: abort();
45 return msg;
48 static int try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned int* out)
50 const char* inbuf = (const char*) buf;
51 size_t inbytesleft = buflen;
52 char* outbuf = (char*) out;
53 size_t outbytesleft = 3*sizeof(unsigned int);
54 size_t result;
55 iconv(cd,NULL,NULL,NULL,NULL);
56 result = iconv(cd,(ICONV_CONST char**)&inbuf,&inbytesleft,&outbuf,&outbytesleft);
57 if (result != (size_t)(-1))
58 result = iconv(cd,NULL,NULL,&outbuf,&outbytesleft);
59 if (result == (size_t)(-1)) {
60 if (errno == EILSEQ) {
61 return -1;
62 } else if (errno == EINVAL) {
63 return 0;
64 } else {
65 int saved_errno = errno;
66 fprintf(stderr,"%s: iconv error: ",hexbuf(buf,buflen));
67 errno = saved_errno;
68 perror("");
69 exit(1);
71 } else if (result > 0) /* ignore conversions with transliteration */ {
72 return -1;
73 } else {
74 if (inbytesleft != 0) {
75 fprintf(stderr,"%s: inbytes = %ld, outbytes = %ld\n",hexbuf(buf,buflen),(long)(buflen-inbytesleft),(long)(3*sizeof(unsigned int)-outbytesleft));
76 exit(1);
78 return (3*sizeof(unsigned int)-outbytesleft)/sizeof(unsigned int);
82 /* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
83 static const char* ucs4_decode (const unsigned int* out, unsigned int outlen)
85 static char hexbuf[21];
86 char* p = hexbuf;
87 while (outlen > 0) {
88 if (p > hexbuf)
89 *p++ = ' ';
90 sprintf (p, "0x%04X", out[0]);
91 out += 1; outlen -= 1;
92 if (bmp_only && strlen(p) > 6)
93 return NULL;
94 p += strlen(p);
96 return hexbuf;
99 int main (int argc, char* argv[])
101 const char* charset;
102 iconv_t cd;
103 int search_depth;
105 if (argc != 2) {
106 fprintf(stderr,"Usage: table-from charset\n");
107 exit(1);
109 charset = argv[1];
111 #if O_BINARY
112 SET_BINARY(fileno(stdout));
113 #endif
115 cd = iconv_open("UCS-4-INTERNAL",charset);
116 if (cd == (iconv_t)(-1)) {
117 perror("iconv_open");
118 exit(1);
121 /* When testing UTF-8, stop at 0x10000, otherwise the output file gets too
122 big. */
123 bmp_only = (strcmp(charset,"UTF-8") == 0);
124 search_depth = (strcmp(charset,"UTF-8") == 0 ? 3 : 4);
127 unsigned int out[3];
128 unsigned char buf[4];
129 unsigned int i0, i1, i2, i3;
130 int result;
131 for (i0 = 0; i0 < 0x100; i0++) {
132 buf[0] = i0;
133 result = try(cd,buf,1,out);
134 if (result < 0) {
135 } else if (result > 0) {
136 const char* unicode = ucs4_decode(out,result);
137 if (unicode != NULL)
138 printf("0x%02X\t%s\n",i0,unicode);
139 } else {
140 for (i1 = 0; i1 < 0x100; i1++) {
141 buf[1] = i1;
142 result = try(cd,buf,2,out);
143 if (result < 0) {
144 } else if (result > 0) {
145 const char* unicode = ucs4_decode(out,result);
146 if (unicode != NULL)
147 printf("0x%02X%02X\t%s\n",i0,i1,unicode);
148 } else {
149 for (i2 = 0; i2 < 0x100; i2++) {
150 buf[2] = i2;
151 result = try(cd,buf,3,out);
152 if (result < 0) {
153 } else if (result > 0) {
154 const char* unicode = ucs4_decode(out,result);
155 if (unicode != NULL)
156 printf("0x%02X%02X%02X\t%s\n",i0,i1,i2,unicode);
157 } else if (search_depth > 3) {
158 for (i3 = 0; i3 < 0x100; i3++) {
159 buf[3] = i3;
160 result = try(cd,buf,4,out);
161 if (result < 0) {
162 } else if (result > 0) {
163 const char* unicode = ucs4_decode(out,result);
164 if (unicode != NULL)
165 printf("0x%02X%02X%02X%02X\t%s\n",i0,i1,i2,i3,unicode);
166 } else {
167 fprintf(stderr,"%s: incomplete byte sequence\n",hexbuf(buf,4));
168 exit(1);
179 if (iconv_close(cd) < 0) {
180 perror("iconv_close");
181 exit(1);
184 if (ferror(stdin) || ferror(stdout) || fclose(stdout)) {
185 fprintf(stderr,"I/O error\n");
186 exit(1);
189 exit(0);