Ticket #1809 (invalid length of nonprintable chars)
[kaloumi3.git] / src / charsets.c
blobf12d1753a23dd2b2b493ed7006e564f5f1c08ee4
1 /* Text conversion from one charset to another.
3 Copyright (C) 2001 Walery Studennikov <despair@sama.ru>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 /** \file charsets.c
21 * \brief Source: Text conversion from one charset to another
24 #include <config.h>
26 #ifdef HAVE_CHARSET
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include "global.h"
33 #include "charsets.h"
34 #include "strutil.h" /* utf-8 functions */
35 #include "main.h"
36 #include "util.h" /* concat_dir_and_file() */
37 #include "fileloc.h"
39 int n_codepages = 0;
41 struct codepage_desc *codepages;
43 unsigned char conv_displ[256];
44 unsigned char conv_input[256];
46 const char *cp_display = NULL;
47 const char *cp_source = NULL;
50 int
51 load_codepages_list (void)
53 int result = -1;
54 FILE *f;
55 char *fname;
56 char buf[BUF_MEDIUM];
57 char *default_codepage = NULL;
59 fname = concat_dir_and_file (mc_home, CHARSETS_INDEX);
60 if (!(f = fopen (fname, "r"))) {
61 fprintf (stderr, _("Warning: file %s not found\n"), fname);
62 g_free (fname);
64 fname = concat_dir_and_file (mc_home_alt, CHARSETS_INDEX);
65 if (!(f = fopen (fname, "r"))) {
66 fprintf (stderr, _("Warning: file %s not found\n"), fname);
67 g_free (fname);
69 /* file is not found, add defaullt codepage */
70 n_codepages = 1;
71 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
72 codepages[0].id = g_strdup ("ASCII");
73 codepages[0].name = g_strdup (_("7-bit ASCII"));
74 return n_codepages;
77 g_free (fname);
79 for (n_codepages = 0; fgets (buf, sizeof (buf), f);)
80 if (buf[0] != '\n' && buf[0] != '\0' && buf[0] != '#')
81 ++n_codepages;
82 rewind (f);
84 codepages = g_new0 (struct codepage_desc, n_codepages + 1);
86 for (n_codepages = 0; fgets (buf, sizeof buf, f);) {
87 /* split string into id and cpname */
88 char *p = buf;
89 size_t buflen = strlen (buf);
91 if (*p == '\n' || *p == '\0' || *p == '#')
92 continue;
94 if (buflen > 0 && buf[buflen - 1] == '\n')
95 buf[buflen - 1] = '\0';
96 while (*p != '\t' && *p != ' ' && *p != '\0')
97 ++p;
98 if (*p == '\0')
99 goto fail;
101 *p++ = '\0';
102 g_strstrip (p);
103 if (*p == '\0')
104 goto fail;
106 if (strcmp (buf, "default") == 0)
107 default_codepage = g_strdup (p);
108 else {
109 codepages[n_codepages].id = g_strdup (buf);
110 codepages[n_codepages].name = g_strdup (p);
111 ++n_codepages;
115 if (default_codepage != NULL) {
116 display_codepage = get_codepage_index (default_codepage);
117 g_free (default_codepage);
120 result = n_codepages;
121 fail:
122 fclose (f);
123 return result;
126 void
127 free_codepages_list (void)
129 if (n_codepages > 0) {
130 int i;
131 for (i = 0; i < n_codepages; i++) {
132 g_free (codepages[i].id);
133 g_free (codepages[i].name);
135 n_codepages = 0;
136 g_free (codepages);
137 codepages = 0;
141 #define OTHER_8BIT "Other_8_bit"
143 const char *
144 get_codepage_id (const int n)
146 return (n < 0) ? OTHER_8BIT : codepages[n].id;
150 get_codepage_index (const char *id)
152 int i;
153 if (strcmp (id, OTHER_8BIT) == 0)
154 return -1;
155 if (codepages == NULL)
156 return -1;
157 for (i = 0; codepages[i].id; ++i)
158 if (strcmp (id, codepages[i].id) == 0)
159 return i;
160 return -1;
163 static char
164 translate_character (GIConv cd, char c)
166 gchar *tmp_buff = NULL;
167 gsize bytes_read, bytes_written = 0;
168 const char *ibuf = &c;
169 char ch = UNKNCHAR;
171 int ibuflen = 1;
173 tmp_buff = g_convert_with_iconv (ibuf, ibuflen, cd, &bytes_read, &bytes_written, NULL);
174 if ( tmp_buff )
175 ch = tmp_buff[0];
176 g_free (tmp_buff);
177 return ch;
180 char errbuf[255];
183 * FIXME: This assumes that ASCII is always the first encoding
184 * in mc.charsets
186 #define CP_ASCII 0
188 const char *
189 init_translation_table (int cpsource, int cpdisplay)
191 int i;
192 GIConv cd;
194 /* Fill inpit <-> display tables */
196 if (cpsource < 0 || cpdisplay < 0 || cpsource == cpdisplay) {
197 for (i = 0; i <= 255; ++i) {
198 conv_displ[i] = i;
199 conv_input[i] = i;
200 cp_source = cp_display;
202 return NULL;
205 for (i = 0; i <= 127; ++i) {
206 conv_displ[i] = i;
207 conv_input[i] = i;
209 cp_source = (char *) codepages[cpsource].id;
210 cp_display = (char *) codepages[cpdisplay].id;
212 /* display <- inpit table */
214 cd = g_iconv_open (cp_display, cp_source);
215 if (cd == INVALID_CONV) {
216 g_snprintf (errbuf, sizeof (errbuf),
217 _("Cannot translate from %s to %s"), cp_source, cp_display);
218 return errbuf;
221 for (i = 128; i <= 255; ++i)
222 conv_displ[i] = translate_character (cd, i);
224 g_iconv_close (cd);
226 /* inpit <- display table */
228 cd = g_iconv_open (cp_source, cp_display);
229 if (cd == INVALID_CONV) {
230 g_snprintf (errbuf, sizeof (errbuf),
231 _("Cannot translate from %s to %s"), cp_display, cp_source);
232 return errbuf;
235 for (i = 128; i <= 255; ++i) {
236 unsigned char ch;
237 ch = translate_character (cd, i);
238 conv_input[i] = (ch == UNKNCHAR) ? i : ch;
241 g_iconv_close (cd);
243 return NULL;
246 void
247 convert_to_display (char *str)
249 if (!str)
250 return;
252 while (*str) {
253 *str = conv_displ[(unsigned char) *str];
254 str++;
258 GString *
259 str_convert_to_display (char *str)
261 return str_nconvert_to_display (str, -1);
265 GString *
266 str_nconvert_to_display (char *str, int len)
268 GString *buff;
269 GIConv conv;
271 if (!str)
272 return g_string_new("");
274 if (cp_display == cp_source)
275 return g_string_new(str);
277 conv = str_crt_conv_from (cp_source);
279 buff = g_string_new("");
280 str_nconvert (conv, str, len, buff);
281 str_close_conv (conv);
282 return buff;
285 void
286 convert_from_input (char *str)
288 if (!str)
289 return;
291 while (*str) {
292 *str = conv_input[(unsigned char) *str];
293 str++;
297 GString *
298 str_convert_to_input (char *str)
300 return str_nconvert_to_input (str, -1);
303 GString *
304 str_nconvert_to_input (char *str, int len)
306 GString *buff;
307 GIConv conv;
309 if (!str)
310 return g_string_new("");
312 if (cp_display == cp_source)
313 return g_string_new(str);
315 conv = str_crt_conv_to (cp_source);
317 buff = g_string_new("");
318 str_nconvert (conv, str, len, buff);
319 str_close_conv (conv);
320 return buff;
323 unsigned char
324 convert_from_utf_to_current (const char *str)
326 unsigned char buf_ch[6 + 1];
327 unsigned char ch = '.';
328 GIConv conv;
329 const char *cp_to;
331 if (!str)
332 return '.';
334 cp_to = get_codepage_id ( source_codepage );
335 conv = str_crt_conv_to ( cp_to );
337 if (conv != INVALID_CONV) {
338 switch (str_translate_char (conv, str, -1, (char *)buf_ch, sizeof(buf_ch))) {
339 case ESTR_SUCCESS:
340 ch = buf_ch[0];
341 break;
342 case ESTR_PROBLEM:
343 case ESTR_FAILURE:
344 ch = '.';
345 break;
347 str_close_conv (conv);
350 return ch;
354 unsigned char
355 convert_from_utf_to_current_c (const int input_char, GIConv conv)
357 unsigned char str[6 + 1];
358 unsigned char buf_ch[6 + 1];
359 unsigned char ch = '.';
361 int res = 0;
363 res = g_unichar_to_utf8 (input_char, (char *)str);
364 if ( res == 0 ) {
365 return ch;
367 str[res] = '\0';
369 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
370 case ESTR_SUCCESS:
371 ch = buf_ch[0];
372 break;
373 case ESTR_PROBLEM:
374 case ESTR_FAILURE:
375 ch = '.';
376 break;
378 return ch;
382 convert_from_8bit_to_utf_c (const char input_char, GIConv conv)
384 unsigned char str[2];
385 unsigned char buf_ch[6 + 1];
386 int ch = '.';
387 int res = 0;
389 str[0] = (unsigned char) input_char;
390 str[1] = '\0';
392 switch (str_translate_char (conv, (char *)str, -1, (char *)buf_ch, sizeof(buf_ch))) {
393 case ESTR_SUCCESS:
394 res = g_utf8_get_char_validated ((char *)buf_ch, -1);
395 if ( res < 0 ) {
396 ch = buf_ch[0];
397 } else {
398 ch = res;
400 break;
401 case ESTR_PROBLEM:
402 case ESTR_FAILURE:
403 ch = '.';
404 break;
406 return ch;
410 convert_from_8bit_to_utf_c2 (const char input_char)
412 unsigned char str[2];
413 unsigned char buf_ch[6 + 1];
414 int ch = '.';
415 int res = 0;
416 GIConv conv;
417 const char *cp_from;
419 str[0] = (unsigned char) input_char;
420 str[1] = '\0';
422 cp_from = get_codepage_id ( source_codepage );
423 conv = str_crt_conv_to (cp_from);
425 if (conv != INVALID_CONV) {
426 switch (str_translate_char (conv, (char *) str, -1, (char *) buf_ch, sizeof(buf_ch))) {
427 case ESTR_SUCCESS:
428 res = g_utf8_get_char_validated ((char *) buf_ch, -1);
429 if ( res < 0 ) {
430 ch = buf_ch[0];
431 } else {
432 ch = res;
434 break;
435 case ESTR_PROBLEM:
436 case ESTR_FAILURE:
437 ch = '.';
438 break;
440 str_close_conv (conv);
442 return ch;
445 #endif /* HAVE_CHARSET */