1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
32 #include "strutil.h" /* utf-8 functions */
37 struct codepage_desc
*codepages
;
39 unsigned char conv_displ
[256];
40 unsigned char conv_input
[256];
43 load_codepages_list (void)
50 extern int display_codepage
;
51 char *default_codepage
= NULL
;
53 fname
= concat_dir_and_file (mc_home
, CHARSETS_INDEX
);
54 if (!(f
= fopen (fname
, "r"))) {
55 fprintf (stderr
, _("Warning: file %s not found\n"), fname
);
61 for (n_codepages
= 0; fgets (buf
, sizeof (buf
), f
);)
62 if (buf
[0] != '\n' && buf
[0] != '\0' && buf
[0] != '#')
66 codepages
= g_new0 (struct codepage_desc
, n_codepages
+ 1);
68 for (n_codepages
= 0; fgets (buf
, sizeof buf
, f
);) {
69 /* split string into id and cpname */
71 int buflen
= strlen (buf
);
73 if (*p
== '\n' || *p
== '\0' || *p
== '#')
76 if (buflen
> 0 && buf
[buflen
- 1] == '\n')
77 buf
[buflen
- 1] = '\0';
78 while (*p
!= '\t' && *p
!= ' ' && *p
!= '\0')
85 while (*p
== '\t' || *p
== ' ')
90 if (strcmp (buf
, "default") == 0) {
91 default_codepage
= g_strdup (p
);
95 codepages
[n_codepages
].id
= g_strdup (buf
);
96 codepages
[n_codepages
].name
= g_strdup (p
);
100 if (default_codepage
) {
101 display_codepage
= get_codepage_index (default_codepage
);
102 g_free (default_codepage
);
105 result
= n_codepages
;
112 free_codepages_list (void)
114 if (n_codepages
> 0) {
116 for (i
= 0; i
< n_codepages
; i
++) {
117 g_free (codepages
[i
].id
);
118 g_free (codepages
[i
].name
);
126 #define OTHER_8BIT "Other_8_bit"
129 get_codepage_id (int n
)
131 return (n
< 0) ? OTHER_8BIT
: codepages
[n
].id
;
135 get_codepage_index (const char *id
)
138 if (strcmp (id
, OTHER_8BIT
) == 0)
140 for (i
= 0; codepages
[i
].id
; ++i
)
141 if (strcmp (id
, codepages
[i
].id
) == 0)
147 translate_character (iconv_t cd
, char c
)
149 char outbuf
[4], *obuf
;
150 size_t ibuflen
, obuflen
, count
;
152 ICONV_CONST
char *ibuf
= &c
;
157 count
= iconv (cd
, &ibuf
, &ibuflen
, &obuf
, &obuflen
);
158 if (count
!= ((size_t) -1) && ibuflen
== 0)
167 * FIXME: This assumes that ASCII is always the first encoding
173 init_translation_table (int cpsource
, int cpdisplay
)
177 const char *cpsour
, *cpdisp
;
179 /* Fill inpit <-> display tables */
181 if (cpsource
< 0 || cpdisplay
< 0 || cpsource
== cpdisplay
) {
182 for (i
= 0; i
<= 255; ++i
) {
189 for (i
= 0; i
<= 127; ++i
) {
194 cpsour
= codepages
[cpsource
].id
;
195 cpdisp
= codepages
[cpdisplay
].id
;
197 /* display <- inpit table */
199 cd
= iconv_open (cpdisp
, cpsour
);
200 if (cd
== (iconv_t
) - 1) {
201 g_snprintf (errbuf
, sizeof (errbuf
),
202 _("Cannot translate from %s to %s"), cpsour
, cpdisp
);
206 for (i
= 128; i
<= 255; ++i
)
207 conv_displ
[i
] = translate_character (cd
, i
);
211 /* inpit <- display table */
213 cd
= iconv_open (cpsour
, cpdisp
);
214 if (cd
== (iconv_t
) - 1) {
215 g_snprintf (errbuf
, sizeof (errbuf
),
216 _("Cannot translate from %s to %s"), cpdisp
, cpsour
);
220 for (i
= 128; i
<= 255; ++i
) {
222 ch
= translate_character (cd
, i
);
223 conv_input
[i
] = (ch
== UNKNCHAR
) ? i
: ch
;
232 convert_to_display (char *str
)
238 *str
= conv_displ
[(unsigned char) *str
];
244 convert_from_input (char *str
)
250 *str
= conv_input
[(unsigned char) *str
];
256 convert_from_utf_to_current (const char *str
)
262 char *cp_from
= NULL
;
264 GString
*translated_data
;
266 translated_data
= g_string_new ("");
267 cp_from
= get_codepage_id ( source_codepage
);
268 conv
= str_crt_conv_from (cp_from
);
270 if (conv
!= INVALID_CONV
) {
271 if (str_convert (conv
, (char *) str
, translated_data
) != ESTR_FAILURE
) {
272 ch
= translated_data
->str
[0];
276 str_close_conv (conv
);
278 g_string_free (translated_data
, TRUE
);
284 convert_from_utf_to_current_c (const int input_char
)
286 unsigned char str
[6 + 1];
287 unsigned char buf_ch
[6 + 1];
288 unsigned char ch
= '.';
290 char *cp_from
= NULL
;
292 GString
*translated_data
;
295 res
= g_unichar_to_utf8 (input_char
, str
);
301 cp_from
= get_codepage_id ( source_codepage
);
302 conv
= str_crt_conv_from (cp_from
);
304 if (conv
!= INVALID_CONV
) {
305 switch (str_translate_char (conv
, str
, res
, buf_ch
, sizeof(buf_ch
))) {
315 str_close_conv (conv
);
321 #endif /* HAVE_CHARSET */