2 * Copyright 2003-2008, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
10 #include <CharacterSet.h>
11 #include <CharacterSetRoster.h>
19 //#define DEBUG_CONV 1
22 # define DEBPRINT(ARGS) printf ARGS;
24 # define DEBPRINT(ARGS) ;
27 using namespace BPrivate
;
29 int iconvctl(iconv_t icd
, int request
, void* argument
);
33 discard_invalid_input_character(iconv_t
* conversion
, char** inputBuffer
,
41 // skip the invalid input character only
43 for (; left
<= *inputLeft
; left
++) {
44 // reset internal state
45 iconv(*conversion
, NULL
, NULL
, NULL
, NULL
);
47 char* buffer
= *inputBuffer
;
48 char* output
= outputBuffer
;
49 size_t outputLeft
= 1;
50 size_t size
= iconv(*conversion
, &buffer
, &left
,
51 &output
, &outputLeft
);
53 if (size
!= (size_t)-1) {
54 // should not reach here
58 if (errno
== EINVAL
) {
59 // too few input bytes provided,
60 // increase input buffer size and try again
64 if (errno
== EILSEQ
) {
65 // minimal size of input buffer found
69 // should not reach here
78 convert_encoding(const char* from
, const char* to
, const char* src
,
79 int32
* srcLen
, char* dst
, int32
* dstLen
, int32
* state
,
88 // TODO: this doesn't work, as the state is reset every time!
89 iconv_t conversion
= iconv_open(to
, from
);
90 if (conversion
== (iconv_t
)-1) {
91 DEBPRINT(("iconv_open failed\n"));
95 size_t outputLeft
= *dstLen
;
97 if (state
== NULL
|| *state
== 0) {
101 iconv(conversion
, NULL
, NULL
, &dst
, &outputLeft
);
104 char** inputBuffer
= const_cast<char**>(&src
);
105 size_t inputLeft
= *srcLen
;
107 size_t nonReversibleConversions
= iconv(conversion
, inputBuffer
,
108 &inputLeft
, &dst
, &outputLeft
);
109 if (nonReversibleConversions
== (size_t)-1) {
110 if (errno
== E2BIG
) {
111 // Not enough room in the output buffer for the next converted character
112 // This is not a "real" error, we just quit out.
117 case EILSEQ
: // unable to generate a corresponding character
119 discard_invalid_input_character(&conversion
, inputBuffer
,
122 // prepare to convert the substitute character to target encoding
123 char original
= substitute
;
125 char* copy
= &original
;
127 // Perform the conversion
128 // We ignore any errors during this as part of robustness/best-effort
129 // We use ISO-8859-1 as a source because it is a single byte encoding
130 // It also overlaps UTF-8 for the lower 128 characters. It is also
131 // likely to have a mapping to almost any target encoding.
132 iconv_t iso8859_1to
= iconv_open(to
,"ISO-8859-1");
133 if (iso8859_1to
!= (iconv_t
)-1) {
134 iconv(iso8859_1to
, 0, 0, 0, 0);
135 iconv(iso8859_1to
, ©
, &len
, &dst
, &outputLeft
);
136 iconv_close(iso8859_1to
);
141 case EINVAL
: // incomplete multibyte sequence at the end of the input
142 // TODO inputLeft bytes from inputBuffer should
143 // be stored in state variable, so that conversion
144 // can continue when the caller provides the missing
145 // bytes with the next call of this method
147 // we just eat bad bytes, as part of robustness/best-effort
153 // unknown error, completely bail
154 status_t status
= errno
;
155 iconv_close(conversion
);
159 } while (inputLeft
> 0 && outputLeft
> 0);
161 *srcLen
-= inputLeft
;
162 *dstLen
-= outputLeft
;
163 iconv_close(conversion
);
170 convert_to_utf8(uint32 srcEncoding
, const char* src
, int32
* srcLen
,
171 char* dst
, int32
* dstLen
, int32
* state
, char substitute
)
173 const BCharacterSet
* charset
= BCharacterSetRoster::GetCharacterSetByConversionID(
179 fprintf(stderr
, "convert_to_utf8(%s) : \"", charset
->GetName());
180 for (int i
= 0 ; i
< *srcLen
; i
++) {
181 fprintf(stderr
, "%c", src
[i
]);
183 fprintf(stderr
, "\"\n");
186 return convert_encoding(charset
->GetName(), "UTF-8", src
, srcLen
,
187 dst
, dstLen
, state
, substitute
);
192 convert_from_utf8(uint32 dstEncoding
, const char* src
, int32
* srcLen
,
193 char* dst
, int32
* dstLen
, int32
* state
, char substitute
)
195 const BCharacterSet
* charset
= BCharacterSetRoster::GetCharacterSetByConversionID(
201 fprintf(stderr
, "convert_from_utf8(%s) : \"", charset
->GetName());
202 for (int i
= 0 ; i
< *srcLen
; i
++) {
203 fprintf(stderr
, "%c", src
[i
]);
205 fprintf(stderr
, "\"\n");
208 return convert_encoding("UTF-8", charset
->GetName(), src
, srcLen
,
209 dst
, dstLen
, state
, substitute
);