1 /* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 As a special exception, if you link the code in this file with
20 files compiled with a GNU compiler to produce an executable,
21 that does not cause the resulting executable to be covered by
22 the GNU Lesser General Public License. This exception does not
23 however invalidate any other reasons why the executable file
24 might be covered by the GNU Lesser General Public License.
25 This exception applies to code released by its copyright holders
26 in files containing the exception. */
38 # include <langinfo.h>
39 # include <locale/localeinfo.h>
40 # include <wcsmbs/wcsmbsload.h>
41 # include <iconv/gconv_int.h>
42 # include <shlib-compat.h>
45 /* Prototypes of libio's codecvt functions. */
46 static enum __codecvt_result
do_out (struct _IO_codecvt
*codecvt
,
48 const wchar_t *from_start
,
49 const wchar_t *from_end
,
50 const wchar_t **from_stop
, char *to_start
,
51 char *to_end
, char **to_stop
);
52 static enum __codecvt_result
do_unshift (struct _IO_codecvt
*codecvt
,
53 __mbstate_t
*statep
, char *to_start
,
54 char *to_end
, char **to_stop
);
55 static enum __codecvt_result
do_in (struct _IO_codecvt
*codecvt
,
57 const char *from_start
,
59 const char **from_stop
, wchar_t *to_start
,
60 wchar_t *to_end
, wchar_t **to_stop
);
61 static int do_encoding (struct _IO_codecvt
*codecvt
);
62 static int do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
63 const char *from_start
,
64 const char *from_end
, _IO_size_t max
);
65 static int do_max_length (struct _IO_codecvt
*codecvt
);
66 static int do_always_noconv (struct _IO_codecvt
*codecvt
);
69 /* The functions used in `codecvt' for libio are always the same. */
70 struct _IO_codecvt __libio_codecvt
=
72 .__codecvt_destr
= NULL
, /* Destructor, never used. */
73 .__codecvt_do_out
= do_out
,
74 .__codecvt_do_unshift
= do_unshift
,
75 .__codecvt_do_in
= do_in
,
76 .__codecvt_do_encoding
= do_encoding
,
77 .__codecvt_do_always_noconv
= do_always_noconv
,
78 .__codecvt_do_length
= do_length
,
79 .__codecvt_do_max_length
= do_max_length
84 struct __gconv_trans_data __libio_translit attribute_hidden
=
90 /* Return orientation of stream. If mode is nonzero try to change
91 * the orientation first.
97 _IO_fwide(_IO_FILE
*fp
, int mode
)
99 /* Normalize the value. */
100 mode
= mode
< 0 ? -1 : (mode
== 0 ? 0 : 1);
103 /* The caller simply wants to know about the current orientation. */
107 #if defined SHARED && defined _LIBC \
108 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
109 if (__builtin_expect (&_IO_stdin_used
== NULL
, 0)
110 && (fp
== _IO_stdin
|| fp
== _IO_stdout
|| fp
== _IO_stderr
))
111 /* This is for a stream in the glibc 2.0 format. */
115 if (fp
->_mode
!= 0) {
116 /* The orientation already has been determined. */
121 struct _IO_codecvt
*cc
= fp
->_codecvt
= &fp
->_wide_data
->_codecvt
;
123 fp
->_wide_data
->_IO_read_ptr
= fp
->_wide_data
->_IO_read_end
;
124 fp
->_wide_data
->_IO_write_ptr
= fp
->_wide_data
->_IO_write_base
;
126 /* Get the character conversion functions based on the currently
127 * selected locale for LC_CTYPE.
131 struct gconv_fcts fcts
;
133 /* Clear the state. We start all over again. */
134 memset (&fp
->_wide_data
->_IO_state
, '\0', sizeof (__mbstate_t
));
135 memset (&fp
->_wide_data
->_IO_last_state
, '\0', sizeof (__mbstate_t
));
137 __wcsmbs_clone_conv (&fcts
);
138 assert (fcts
.towc_nsteps
== 1);
139 assert (fcts
.tomb_nsteps
== 1);
141 /* The functions are always the same. */
142 *cc
= __libio_codecvt
;
144 cc
->__cd_in
.__cd
.__nsteps
= fcts
.towc_nsteps
;
145 cc
->__cd_in
.__cd
.__steps
= fcts
.towc
;
147 cc
->__cd_in
.__cd
.__data
[0].__invocation_counter
= 0;
148 cc
->__cd_in
.__cd
.__data
[0].__internal_use
= 1;
149 cc
->__cd_in
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
150 cc
->__cd_in
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
152 /* XXX For now no transliteration. */
153 cc
->__cd_in
.__cd
.__data
[0].__trans
= NULL
;
155 cc
->__cd_out
.__cd
.__nsteps
= fcts
.tomb_nsteps
;
156 cc
->__cd_out
.__cd
.__steps
= fcts
.tomb
;
158 cc
->__cd_out
.__cd
.__data
[0].__invocation_counter
= 0;
159 cc
->__cd_out
.__cd
.__data
[0].__internal_use
= 1;
160 cc
->__cd_out
.__cd
.__data
[0].__flags
= __GCONV_IS_LAST
;
161 cc
->__cd_out
.__cd
.__data
[0].__statep
= &fp
->_wide_data
->_IO_state
;
163 /* And now the transliteration. */
164 cc
->__cd_out
.__cd
.__data
[0].__trans
= &__libio_translit
;
167 # ifdef _GLIBCPP_USE_WCHAR_T
169 /* Determine internal and external character sets.
171 XXX For now we make our life easy: we assume a fixed internal
172 encoding (as most sane systems have; hi HP/UX!). If somebody
173 cares about systems which changing internal charsets they
174 should come up with a solution for the determination of the
175 currently used internal character set. */
176 const char *internal_ccs
= _G_INTERNAL_CCS
;
177 const char *external_ccs
= NULL
;
179 # ifdef HAVE_NL_LANGINFO
180 external_ccs
= nl_langinfo (CODESET
);
182 if (external_ccs
== NULL
)
183 external_ccs
= "ISO-8859-1";
185 cc
->__cd_in
= iconv_open (internal_ccs
, external_ccs
);
186 if (cc
->__cd_in
!= (iconv_t
) -1)
187 cc
->__cd_out
= iconv_open (external_ccs
, internal_ccs
);
189 if (cc
->__cd_in
== (iconv_t
) -1 || cc
->__cd_out
== (iconv_t
) -1)
191 if (cc
->__cd_in
!= (iconv_t
) -1)
192 iconv_close (cc
->__cd_in
);
198 # error "somehow determine this from LC_CTYPE"
202 /* From now on use the wide character callback functions. */
203 ((struct _IO_FILE_plus
*) fp
)->vtable
= fp
->_wide_data
->_wide_vtable
;
205 /* One last twist: we get the current stream position. The wide
206 char streams have much more problems with not knowing the
207 current position and so we should disable the optimization
208 which allows the functions without knowing the position. */
209 fp
->_offset
= _IO_SYSSEEK (fp
, 0, _IO_seek_cur
);
211 /* Set the mode now. */
217 static enum __codecvt_result
218 do_out (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
219 const wchar_t *from_start
, const wchar_t *from_end
,
220 const wchar_t **from_stop
, char *to_start
, char *to_end
,
223 enum __codecvt_result result
;
226 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
229 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
231 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= to_start
;
232 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= to_end
;
233 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
235 status
= DL_CALL_FCT (gs
->__fct
,
236 (gs
, codecvt
->__cd_out
.__cd
.__data
, &from_start_copy
,
237 (const unsigned char *) from_end
, NULL
,
240 *from_stop
= (wchar_t *) from_start_copy
;
241 *to_stop
= codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
246 case __GCONV_EMPTY_INPUT
:
247 result
= __codecvt_ok
;
250 case __GCONV_FULL_OUTPUT
:
251 case __GCONV_INCOMPLETE_INPUT
:
252 result
= __codecvt_partial
;
256 result
= __codecvt_error
;
260 # ifdef _GLIBCPP_USE_WCHAR_T
262 const char *from_start_copy
= (const char *) from_start
;
263 size_t from_len
= from_end
- from_start
;
264 char *to_start_copy
= to_start
;
265 size_t to_len
= to_end
- to_start
;
266 res
= iconv (codecvt
->__cd_out
, &from_start_copy
, &from_len
,
267 &to_start_copy
, &to_len
);
269 if (res
== 0 || from_len
== 0)
270 result
= __codecvt_ok
;
271 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
272 result
= __codecvt_partial
;
274 result
= __codecvt_error
;
277 /* Decide what to do. */
278 result
= __codecvt_error
;
286 static enum __codecvt_result
287 do_unshift (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
288 char *to_start
, char *to_end
, char **to_stop
)
290 enum __codecvt_result result
;
293 struct __gconv_step
*gs
= codecvt
->__cd_out
.__cd
.__steps
;
297 codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
= to_start
;
298 codecvt
->__cd_out
.__cd
.__data
[0].__outbufend
= to_end
;
299 codecvt
->__cd_out
.__cd
.__data
[0].__statep
= statep
;
301 status
= DL_CALL_FCT (gs
->__fct
,
302 (gs
, codecvt
->__cd_out
.__cd
.__data
, NULL
, NULL
,
303 NULL
, &dummy
, 1, 0));
305 *to_stop
= codecvt
->__cd_out
.__cd
.__data
[0].__outbuf
;
310 case __GCONV_EMPTY_INPUT
:
311 result
= __codecvt_ok
;
314 case __GCONV_FULL_OUTPUT
:
315 case __GCONV_INCOMPLETE_INPUT
:
316 result
= __codecvt_partial
;
320 result
= __codecvt_error
;
324 # ifdef _GLIBCPP_USE_WCHAR_T
326 char *to_start_copy
= (char *) to_start
;
327 size_t to_len
= to_end
- to_start
;
329 res
= iconv (codecvt
->__cd_out
, NULL
, NULL
, &to_start_copy
, &to_len
);
332 result
= __codecvt_ok
;
333 else if (to_len
< codecvt
->__codecvt_do_max_length (codecvt
))
334 result
= __codecvt_partial
;
336 result
= __codecvt_error
;
338 /* Decide what to do. */
339 result
= __codecvt_error
;
347 static enum __codecvt_result
348 do_in (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
349 const char *from_start
, const char *from_end
, const char **from_stop
,
350 wchar_t *to_start
, wchar_t *to_end
, wchar_t **to_stop
)
352 enum __codecvt_result result
;
355 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
358 const unsigned char *from_start_copy
= (unsigned char *) from_start
;
360 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (char *) to_start
;
361 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (char *) to_end
;
362 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
364 status
= DL_CALL_FCT (gs
->__fct
,
365 (gs
, codecvt
->__cd_in
.__cd
.__data
, &from_start_copy
,
366 from_end
, NULL
, &dummy
, 0, 0));
368 *from_stop
= from_start_copy
;
369 *to_stop
= (wchar_t *) codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
;
374 case __GCONV_EMPTY_INPUT
:
375 result
= __codecvt_ok
;
378 case __GCONV_FULL_OUTPUT
:
379 case __GCONV_INCOMPLETE_INPUT
:
380 result
= __codecvt_partial
;
384 result
= __codecvt_error
;
388 # ifdef _GLIBCPP_USE_WCHAR_T
390 const char *from_start_copy
= (const char *) from_start
;
391 size_t from_len
= from_end
- from_start
;
392 char *to_start_copy
= (char *) from_start
;
393 size_t to_len
= to_end
- to_start
;
395 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
396 &to_start_copy
, &to_len
);
399 result
= __codecvt_ok
;
400 else if (to_len
== 0)
401 result
= __codecvt_partial
;
402 else if (from_len
< codecvt
->__codecvt_do_max_length (codecvt
))
403 result
= __codecvt_partial
;
405 result
= __codecvt_error
;
407 /* Decide what to do. */
408 result
= __codecvt_error
;
417 do_encoding (struct _IO_codecvt
*codecvt
)
420 /* See whether the encoding is stateful. */
421 if (codecvt
->__cd_in
.__cd
.__steps
[0].__stateful
)
423 /* Fortunately not. Now determine the input bytes for the conversion
424 necessary for each wide character. */
425 if (codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
426 != codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
)
427 /* Not a constant value. */
430 return codecvt
->__cd_in
.__cd
.__steps
[0].__min_needed_from
;
432 /* Worst case scenario. */
439 do_always_noconv (struct _IO_codecvt
*codecvt
)
446 do_length (struct _IO_codecvt
*codecvt
, __mbstate_t
*statep
,
447 const char *from_start
, const char *from_end
, _IO_size_t max
)
451 const unsigned char *cp
= (const unsigned char *) from_start
;
453 struct __gconv_step
*gs
= codecvt
->__cd_in
.__cd
.__steps
;
457 codecvt
->__cd_in
.__cd
.__data
[0].__outbuf
= (char *) to_buf
;
458 codecvt
->__cd_in
.__cd
.__data
[0].__outbufend
= (char *) &to_buf
[max
];
459 codecvt
->__cd_in
.__cd
.__data
[0].__statep
= statep
;
461 status
= DL_CALL_FCT (gs
->__fct
,
462 (gs
, codecvt
->__cd_in
.__cd
.__data
, &cp
, from_end
,
463 NULL
, &dummy
, 0, 0));
465 result
= cp
- (const unsigned char *) from_start
;
467 # ifdef _GLIBCPP_USE_WCHAR_T
468 const char *from_start_copy
= (const char *) from_start
;
469 size_t from_len
= from_end
- from_start
;
472 char *to_start
= (char *) to_buf
;
474 res
= iconv (codecvt
->__cd_in
, &from_start_copy
, &from_len
,
477 result
= from_start_copy
- (char *) from_start
;
479 /* Decide what to do. */
489 do_max_length (struct _IO_codecvt
*codecvt
)
492 return codecvt
->__cd_in
.__cd
.__steps
[0].__max_needed_from
;