Fix a bug in transliteration.
[libiconv.git] / extras / iconv_string.c
blob9220d73f96ac7fccc468fe1f4105316e6b4d9d2a
1 /* Copyright (C) 1999-2001 Bruno Haible.
2 This file is not part of the GNU LIBICONV Library.
3 This file is put into the public domain. */
5 #include "iconv_string.h"
6 #include <iconv.h>
7 #include <errno.h>
8 #include <stdlib.h>
9 #include <string.h>
11 #define tmpbufsize 4096
13 int iconv_string (const char* tocode, const char* fromcode,
14 const char* start, const char* end,
15 char** resultp, size_t* lengthp)
17 iconv_t cd = iconv_open(tocode,fromcode);
18 size_t length;
19 char* result;
20 if (cd == (iconv_t)(-1)) {
21 if (errno != EINVAL)
22 return -1;
23 /* Unsupported fromcode or tocode. Check whether the caller requested
24 autodetection. */
25 if (!strcmp(fromcode,"autodetect_utf8")) {
26 int ret;
27 /* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
28 be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
29 ret = iconv_string(tocode,"UTF-8",start,end,resultp,lengthp);
30 if (!(ret < 0 && errno == EILSEQ))
31 return ret;
32 ret = iconv_string(tocode,"ISO-8859-1",start,end,resultp,lengthp);
33 return ret;
35 if (!strcmp(fromcode,"autodetect_jp")) {
36 int ret;
37 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
38 it will fail. */
39 ret = iconv_string(tocode,"ISO-2022-JP-2",start,end,resultp,lengthp);
40 if (!(ret < 0 && errno == EILSEQ))
41 return ret;
42 /* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
43 is unavoidable. People will condemn SHIFT_JIS.
44 If we tried SHIFT_JIS first, then some short EUC-JP inputs would
45 come out wrong, and people would condemn EUC-JP and Unix, which
46 would not be good. */
47 ret = iconv_string(tocode,"EUC-JP",start,end,resultp,lengthp);
48 if (!(ret < 0 && errno == EILSEQ))
49 return ret;
50 /* Finally try SHIFT_JIS. */
51 ret = iconv_string(tocode,"SHIFT_JIS",start,end,resultp,lengthp);
52 return ret;
54 if (!strcmp(fromcode,"autodetect_kr")) {
55 int ret;
56 /* Try 7-bit encoding first. If the input contains bytes >= 0x80,
57 it will fail. */
58 ret = iconv_string(tocode,"ISO-2022-KR",start,end,resultp,lengthp);
59 if (!(ret < 0 && errno == EILSEQ))
60 return ret;
61 /* Finally try EUC-KR. */
62 ret = iconv_string(tocode,"EUC-KR",start,end,resultp,lengthp);
63 return ret;
65 errno = EINVAL;
66 return -1;
68 /* Determine the length we need. */
70 size_t count = 0;
71 char tmpbuf[tmpbufsize];
72 const char* inptr = start;
73 size_t insize = end-start;
74 while (insize > 0) {
75 char* outptr = tmpbuf;
76 size_t outsize = tmpbufsize;
77 size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
78 if (res == (size_t)(-1)) {
79 if (errno == EINVAL)
80 break;
81 else {
82 int saved_errno = errno;
83 iconv_close(cd);
84 errno = saved_errno;
85 return -1;
88 count += outptr-tmpbuf;
91 char* outptr = tmpbuf;
92 size_t outsize = tmpbufsize;
93 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
94 if (res == (size_t)(-1)) {
95 int saved_errno = errno;
96 iconv_close(cd);
97 errno = saved_errno;
98 return -1;
100 count += outptr-tmpbuf;
102 length = count;
104 if (lengthp != NULL)
105 *lengthp = length;
106 if (resultp == NULL) {
107 iconv_close(cd);
108 return 0;
110 result = (*resultp == NULL ? malloc(length) : realloc(*resultp,length));
111 *resultp = result;
112 if (length == 0) {
113 iconv_close(cd);
114 return 0;
116 if (result == NULL) {
117 iconv_close(cd);
118 errno = ENOMEM;
119 return -1;
121 iconv(cd,NULL,NULL,NULL,NULL); /* return to the initial state */
122 /* Do the conversion for real. */
124 const char* inptr = start;
125 size_t insize = end-start;
126 char* outptr = result;
127 size_t outsize = length;
128 while (insize > 0) {
129 size_t res = iconv(cd,&inptr,&insize,&outptr,&outsize);
130 if (res == (size_t)(-1)) {
131 if (errno == EINVAL)
132 break;
133 else {
134 int saved_errno = errno;
135 iconv_close(cd);
136 errno = saved_errno;
137 return -1;
142 size_t res = iconv(cd,NULL,NULL,&outptr,&outsize);
143 if (res == (size_t)(-1)) {
144 int saved_errno = errno;
145 iconv_close(cd);
146 errno = saved_errno;
147 return -1;
150 if (outsize != 0) abort();
152 iconv_close(cd);
153 return 0;