1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * \brief Source: Text conversion from one charset to another
34 #include "strutil.h" /* utf-8 functions */
36 #include "util.h" /* concat_dir_and_file() */
41 struct codepage_desc
*codepages
;
43 unsigned char conv_displ
[256];
44 unsigned char conv_input
[256];
46 const char *cp_display
= NULL
;
47 const char *cp_source
= NULL
;
51 load_codepages_list (void)
57 char *default_codepage
= NULL
;
59 fname
= concat_dir_and_file (mc_home
, CHARSETS_INDEX
);
60 if (!(f
= fopen (fname
, "r"))) {
61 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
64 fname
= concat_dir_and_file (mc_home_alt
, CHARSETS_INDEX
);
65 if (!(f
= fopen (fname
, "r"))) {
66 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
69 /* file is not found, add defaullt codepage */
71 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
72 codepages
[0].id
= g_strdup ("ASCII");
73 codepages
[0].name
= g_strdup (_("7-bit ASCII"));
79 for (n_codepages
= 0; fgets (buf
, sizeof (buf
), f
);)
80 if (buf
[0] != '\n' && buf
[0] != '\0' && buf
[0] != '#')
84 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
86 for (n_codepages
= 0; fgets (buf
, sizeof buf
, f
);) {
87 /* split string into id and cpname */
89 size_t buflen
= strlen (buf
);
91 if (*p
== '\n' || *p
== '\0' || *p
== '#')
94 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
95 buf
[buflen
- 1] = '\0';
96 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
106 if (strcmp (buf
, "default") == 0)
107 default_codepage
= g_strdup (p
);
109 codepages
[n_codepages
].id
= g_strdup (buf
);
110 codepages
[n_codepages
].name
= g_strdup (p
);
115 if (default_codepage
!= NULL
) {
116 display_codepage
= get_codepage_index (default_codepage
);
117 g_free (default_codepage
);
120 result
= n_codepages
;
127 free_codepages_list (void)
129 if (n_codepages
> 0) {
131 for (i
= 0; i
< n_codepages
; i
++) {
132 g_free (codepages
[i
].id
);
133 g_free (codepages
[i
].name
);
141 #define OTHER_8BIT "Other_8_bit"
144 get_codepage_id (const int n
)
146 return (n
< 0) ? OTHER_8BIT
: codepages
[n
].id
;
150 get_codepage_index (const char *id
)
153 if (strcmp (id
, OTHER_8BIT
) == 0)
155 if (codepages
== NULL
)
157 for (i
= 0; codepages
[i
].id
; ++i
)
158 if (strcmp (id
, codepages
[i
].id
) == 0)
164 translate_character (GIConv cd
, char c
)
166 gchar
*tmp_buff
= NULL
;
167 gsize bytes_read
, bytes_written
= 0;
168 const char *ibuf
= &c
;
173 tmp_buff
= g_convert_with_iconv (ibuf
, ibuflen
, cd
, &bytes_read
, &bytes_written
, NULL
);
183 * FIXME: This assumes that ASCII is always the first encoding
189 init_translation_table (int cpsource
, int cpdisplay
)
194 /* Fill inpit <-> display tables */
196 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
197 for (i
= 0; i
<= 255; ++i
) {
200 cp_source
= cp_display
;
205 for (i
= 0; i
<= 127; ++i
) {
209 cp_source
= (char *) codepages
[cpsource
].id
;
210 cp_display
= (char *) codepages
[cpdisplay
].id
;
212 /* display <- inpit table */
214 cd
= g_iconv_open (cp_display
, cp_source
);
215 if (cd
== INVALID_CONV
) {
216 g_snprintf (errbuf
, sizeof (errbuf
),
217 _("Cannot translate from %s to %s"), cp_source
, cp_display
);
221 for (i
= 128; i
<= 255; ++i
)
222 conv_displ
[i
] = translate_character (cd
, i
);
226 /* inpit <- display table */
228 cd
= g_iconv_open (cp_source
, cp_display
);
229 if (cd
== INVALID_CONV
) {
230 g_snprintf (errbuf
, sizeof (errbuf
),
231 _("Cannot translate from %s to %s"), cp_display
, cp_source
);
235 for (i
= 128; i
<= 255; ++i
) {
237 ch
= translate_character (cd
, i
);
238 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
247 convert_to_display (char *str
)
253 *str
= conv_displ
[(unsigned char) *str
];
259 str_convert_to_display (char *str
)
261 return str_nconvert_to_display (str
, -1);
266 str_nconvert_to_display (char *str
, int len
)
272 return g_string_new("");
274 if (cp_display
== cp_source
)
275 return g_string_new(str
);
277 conv
= str_crt_conv_from (cp_source
);
279 buff
= g_string_new("");
280 str_nconvert (conv
, str
, len
, buff
);
281 str_close_conv (conv
);
286 convert_from_input (char *str
)
292 *str
= conv_input
[(unsigned char) *str
];
298 str_convert_to_input (char *str
)
300 return str_nconvert_to_input (str
, -1);
304 str_nconvert_to_input (char *str
, int len
)
310 return g_string_new("");
312 if (cp_display
== cp_source
)
313 return g_string_new(str
);
315 conv
= str_crt_conv_to (cp_source
);
317 buff
= g_string_new("");
318 str_nconvert (conv
, str
, len
, buff
);
319 str_close_conv (conv
);
324 convert_from_utf_to_current (const char *str
)
326 unsigned char buf_ch
[6 + 1];
327 unsigned char ch
= '.';
334 cp_to
= get_codepage_id ( source_codepage
);
335 conv
= str_crt_conv_to ( cp_to
);
337 if (conv
!= INVALID_CONV
) {
338 switch (str_translate_char (conv
, str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
347 str_close_conv (conv
);
355 convert_from_utf_to_current_c (const int input_char
, GIConv conv
)
357 unsigned char str
[6 + 1];
358 unsigned char buf_ch
[6 + 1];
359 unsigned char ch
= '.';
363 res
= g_unichar_to_utf8 (input_char
, (char *)str
);
369 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
382 convert_from_8bit_to_utf_c (const char input_char
, GIConv conv
)
384 unsigned char str
[2];
385 unsigned char buf_ch
[6 + 1];
389 str
[0] = (unsigned char) input_char
;
392 switch (str_translate_char (conv
, (char *)str
, -1, (char *)buf_ch
, sizeof(buf_ch
))) {
394 res
= g_utf8_get_char_validated ((char *)buf_ch
, -1);
410 convert_from_8bit_to_utf_c2 (const char input_char
)
412 unsigned char str
[2];
413 unsigned char buf_ch
[6 + 1];
419 str
[0] = (unsigned char) input_char
;
422 cp_from
= get_codepage_id ( source_codepage
);
423 conv
= str_crt_conv_to (cp_from
);
425 if (conv
!= INVALID_CONV
) {
426 switch (str_translate_char (conv
, (char *) str
, -1, (char *) buf_ch
, sizeof(buf_ch
))) {
428 res
= g_utf8_get_char_validated ((char *) buf_ch
, -1);
440 str_close_conv (conv
);
445 #endif /* HAVE_CHARSET */