Merge branch 'trivia'
[empathy-mirror.git] / libempathy-gtk / totem-subtitle-encoding.c
blobf376ea7287ce8b24a8dc2ee6821c270ff23df02f
1 /*
2 * Copyright (C) 2001-2006 Bastien Nocera <hadess@hadess.net>
4 * encoding list copied from gnome-terminal/encoding.c
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * The Totem project hereby grant permission for non-gpl compatible GStreamer
21 * plugins to be used and distributed together with GStreamer and Totem. This
22 * permission are above and beyond the permissions granted by the GPL license
23 * Totem is covered by.
25 * Monday 7th February 2005: Christian Schaller: Add exception clause.
26 * See license_change file for details.
30 #include "config.h"
31 #include <glib/gi18n-lib.h>
32 #include "totem-subtitle-encoding.h"
33 #include <string.h>
35 typedef enum
37 SUBTITLE_ENCODING_CURRENT_LOCALE,
39 SUBTITLE_ENCODING_ISO_8859_6,
40 SUBTITLE_ENCODING_IBM_864,
41 SUBTITLE_ENCODING_MAC_ARABIC,
42 SUBTITLE_ENCODING_WINDOWS_1256,
44 SUBTITLE_ENCODING_ARMSCII_8,
46 SUBTITLE_ENCODING_ISO_8859_4,
47 SUBTITLE_ENCODING_ISO_8859_13,
48 SUBTITLE_ENCODING_WINDOWS_1257,
50 SUBTITLE_ENCODING_ISO_8859_14,
52 SUBTITLE_ENCODING_ISO_8859_2,
53 SUBTITLE_ENCODING_IBM_852,
54 SUBTITLE_ENCODING_MAC_CE,
55 SUBTITLE_ENCODING_WINDOWS_1250,
57 SUBTITLE_ENCODING_GB18030,
58 SUBTITLE_ENCODING_GB2312,
59 SUBTITLE_ENCODING_GBK,
60 SUBTITLE_ENCODING_HZ,
62 SUBTITLE_ENCODING_BIG5,
63 SUBTITLE_ENCODING_BIG5_HKSCS,
64 SUBTITLE_ENCODING_EUC_TW,
66 SUBTITLE_ENCODING_MAC_CROATIAN,
68 SUBTITLE_ENCODING_ISO_8859_5,
69 SUBTITLE_ENCODING_IBM_855,
70 SUBTITLE_ENCODING_ISO_IR_111,
71 SUBTITLE_ENCODING_KOI8_R,
72 SUBTITLE_ENCODING_MAC_CYRILLIC,
73 SUBTITLE_ENCODING_WINDOWS_1251,
75 SUBTITLE_ENCODING_CP_866,
77 SUBTITLE_ENCODING_MAC_UKRAINIAN,
78 SUBTITLE_ENCODING_KOI8_U,
80 SUBTITLE_ENCODING_GEOSTD8,
82 SUBTITLE_ENCODING_ISO_8859_7,
83 SUBTITLE_ENCODING_MAC_GREEK,
84 SUBTITLE_ENCODING_WINDOWS_1253,
86 SUBTITLE_ENCODING_MAC_GUJARATI,
88 SUBTITLE_ENCODING_MAC_GURMUKHI,
90 SUBTITLE_ENCODING_ISO_8859_8_I,
91 SUBTITLE_ENCODING_IBM_862,
92 SUBTITLE_ENCODING_MAC_HEBREW,
93 SUBTITLE_ENCODING_WINDOWS_1255,
95 SUBTITLE_ENCODING_ISO_8859_8,
97 SUBTITLE_ENCODING_MAC_DEVANAGARI,
99 SUBTITLE_ENCODING_MAC_ICELANDIC,
101 SUBTITLE_ENCODING_EUC_JP,
102 SUBTITLE_ENCODING_ISO_2022_JP,
103 SUBTITLE_ENCODING_SHIFT_JIS,
105 SUBTITLE_ENCODING_EUC_KR,
106 SUBTITLE_ENCODING_ISO_2022_KR,
107 SUBTITLE_ENCODING_JOHAB,
108 SUBTITLE_ENCODING_UHC,
110 SUBTITLE_ENCODING_ISO_8859_10,
112 SUBTITLE_ENCODING_MAC_FARSI,
114 SUBTITLE_ENCODING_ISO_8859_16,
115 SUBTITLE_ENCODING_MAC_ROMANIAN,
117 SUBTITLE_ENCODING_ISO_8859_3,
119 SUBTITLE_ENCODING_TIS_620,
121 SUBTITLE_ENCODING_ISO_8859_9,
122 SUBTITLE_ENCODING_IBM_857,
123 SUBTITLE_ENCODING_MAC_TURKISH,
124 SUBTITLE_ENCODING_WINDOWS_1254,
126 SUBTITLE_ENCODING_UTF_7,
127 SUBTITLE_ENCODING_UTF_8,
128 SUBTITLE_ENCODING_UTF_16,
129 SUBTITLE_ENCODING_UCS_2,
130 SUBTITLE_ENCODING_UCS_4,
132 SUBTITLE_ENCODING_ISO_8859_1,
133 SUBTITLE_ENCODING_ISO_8859_15,
134 SUBTITLE_ENCODING_IBM_850,
135 SUBTITLE_ENCODING_MAC_ROMAN,
136 SUBTITLE_ENCODING_WINDOWS_1252,
138 SUBTITLE_ENCODING_TCVN,
139 SUBTITLE_ENCODING_VISCII,
140 SUBTITLE_ENCODING_WINDOWS_1258,
142 SUBTITLE_ENCODING_LAST
143 } SubtitleEncodingIndex;
146 typedef struct
148 int index;
149 gboolean valid;
150 const char *charset;
151 const char *name;
152 } SubtitleEncoding;
155 static SubtitleEncoding encodings[] = {
157 {SUBTITLE_ENCODING_CURRENT_LOCALE, TRUE,
158 NULL, N_("Current Locale")},
160 {SUBTITLE_ENCODING_ISO_8859_6, FALSE,
161 "ISO-8859-6", N_("Arabic")},
162 {SUBTITLE_ENCODING_IBM_864, FALSE,
163 "IBM864", N_("Arabic")},
164 {SUBTITLE_ENCODING_MAC_ARABIC, FALSE,
165 "MAC_ARABIC", N_("Arabic")},
166 {SUBTITLE_ENCODING_WINDOWS_1256, FALSE,
167 "WINDOWS-1256", N_("Arabic")},
169 {SUBTITLE_ENCODING_ARMSCII_8, FALSE,
170 "ARMSCII-8", N_("Armenian")},
172 {SUBTITLE_ENCODING_ISO_8859_4, FALSE,
173 "ISO-8859-4", N_("Baltic")},
174 {SUBTITLE_ENCODING_ISO_8859_13, FALSE,
175 "ISO-8859-13", N_("Baltic")},
176 {SUBTITLE_ENCODING_WINDOWS_1257, FALSE,
177 "WINDOWS-1257", N_("Baltic")},
179 {SUBTITLE_ENCODING_ISO_8859_14, FALSE,
180 "ISO-8859-14", N_("Celtic")},
182 {SUBTITLE_ENCODING_ISO_8859_2, FALSE,
183 "ISO-8859-2", N_("Central European")},
184 {SUBTITLE_ENCODING_IBM_852, FALSE,
185 "IBM852", N_("Central European")},
186 {SUBTITLE_ENCODING_MAC_CE, FALSE,
187 "MAC_CE", N_("Central European")},
188 {SUBTITLE_ENCODING_WINDOWS_1250, FALSE,
189 "WINDOWS-1250", N_("Central European")},
191 {SUBTITLE_ENCODING_GB18030, FALSE,
192 "GB18030", N_("Chinese Simplified")},
193 {SUBTITLE_ENCODING_GB2312, FALSE,
194 "GB2312", N_("Chinese Simplified")},
195 {SUBTITLE_ENCODING_GBK, FALSE,
196 "GBK", N_("Chinese Simplified")},
197 {SUBTITLE_ENCODING_HZ, FALSE,
198 "HZ", N_("Chinese Simplified")},
200 {SUBTITLE_ENCODING_BIG5, FALSE,
201 "BIG5", N_("Chinese Traditional")},
202 {SUBTITLE_ENCODING_BIG5_HKSCS, FALSE,
203 "BIG5-HKSCS", N_("Chinese Traditional")},
204 {SUBTITLE_ENCODING_EUC_TW, FALSE,
205 "EUC-TW", N_("Chinese Traditional")},
207 {SUBTITLE_ENCODING_MAC_CROATIAN, FALSE,
208 "MAC_CROATIAN", N_("Croatian")},
210 {SUBTITLE_ENCODING_ISO_8859_5, FALSE,
211 "ISO-8859-5", N_("Cyrillic")},
212 {SUBTITLE_ENCODING_IBM_855, FALSE,
213 "IBM855", N_("Cyrillic")},
214 {SUBTITLE_ENCODING_ISO_IR_111, FALSE,
215 "ISO-IR-111", N_("Cyrillic")},
216 {SUBTITLE_ENCODING_KOI8_R, FALSE,
217 "KOI8-R", N_("Cyrillic")},
218 {SUBTITLE_ENCODING_MAC_CYRILLIC, FALSE,
219 "MAC-CYRILLIC", N_("Cyrillic")},
220 {SUBTITLE_ENCODING_WINDOWS_1251, FALSE,
221 "WINDOWS-1251", N_("Cyrillic")},
223 {SUBTITLE_ENCODING_CP_866, FALSE,
224 "CP866", N_("Cyrillic/Russian")},
226 {SUBTITLE_ENCODING_MAC_UKRAINIAN, FALSE,
227 "MAC_UKRAINIAN", N_("Cyrillic/Ukrainian")},
228 {SUBTITLE_ENCODING_KOI8_U, FALSE,
229 "KOI8-U", N_("Cyrillic/Ukrainian")},
231 {SUBTITLE_ENCODING_GEOSTD8, FALSE,
232 "GEORGIAN-PS", N_("Georgian")},
234 {SUBTITLE_ENCODING_ISO_8859_7, FALSE,
235 "ISO-8859-7", N_("Greek")},
236 {SUBTITLE_ENCODING_MAC_GREEK, FALSE,
237 "MAC_GREEK", N_("Greek")},
238 {SUBTITLE_ENCODING_WINDOWS_1253, FALSE,
239 "WINDOWS-1253", N_("Greek")},
241 {SUBTITLE_ENCODING_MAC_GUJARATI, FALSE,
242 "MAC_GUJARATI", N_("Gujarati")},
244 {SUBTITLE_ENCODING_MAC_GURMUKHI, FALSE,
245 "MAC_GURMUKHI", N_("Gurmukhi")},
247 {SUBTITLE_ENCODING_ISO_8859_8_I, FALSE,
248 "ISO-8859-8-I", N_("Hebrew")},
249 {SUBTITLE_ENCODING_IBM_862, FALSE,
250 "IBM862", N_("Hebrew")},
251 {SUBTITLE_ENCODING_MAC_HEBREW, FALSE,
252 "MAC_HEBREW", N_("Hebrew")},
253 {SUBTITLE_ENCODING_WINDOWS_1255, FALSE,
254 "WINDOWS-1255", N_("Hebrew")},
256 {SUBTITLE_ENCODING_ISO_8859_8, FALSE,
257 "ISO-8859-8", N_("Hebrew Visual")},
259 {SUBTITLE_ENCODING_MAC_DEVANAGARI, FALSE,
260 "MAC_DEVANAGARI", N_("Hindi")},
262 {SUBTITLE_ENCODING_MAC_ICELANDIC, FALSE,
263 "MAC_ICELANDIC", N_("Icelandic")},
265 {SUBTITLE_ENCODING_EUC_JP, FALSE,
266 "EUC-JP", N_("Japanese")},
267 {SUBTITLE_ENCODING_ISO_2022_JP, FALSE,
268 "ISO2022JP", N_("Japanese")},
269 {SUBTITLE_ENCODING_SHIFT_JIS, FALSE,
270 "SHIFT-JIS", N_("Japanese")},
272 {SUBTITLE_ENCODING_EUC_KR, FALSE,
273 "EUC-KR", N_("Korean")},
274 {SUBTITLE_ENCODING_ISO_2022_KR, FALSE,
275 "ISO2022KR", N_("Korean")},
276 {SUBTITLE_ENCODING_JOHAB, FALSE,
277 "JOHAB", N_("Korean")},
278 {SUBTITLE_ENCODING_UHC, FALSE,
279 "UHC", N_("Korean")},
281 {SUBTITLE_ENCODING_ISO_8859_10, FALSE,
282 "ISO-8859-10", N_("Nordic")},
284 {SUBTITLE_ENCODING_MAC_FARSI, FALSE,
285 "MAC_FARSI", N_("Persian")},
287 {SUBTITLE_ENCODING_ISO_8859_16, FALSE,
288 "ISO-8859-16", N_("Romanian")},
289 {SUBTITLE_ENCODING_MAC_ROMANIAN, FALSE,
290 "MAC_ROMANIAN", N_("Romanian")},
292 {SUBTITLE_ENCODING_ISO_8859_3, FALSE,
293 "ISO-8859-3", N_("South European")},
295 {SUBTITLE_ENCODING_TIS_620, FALSE,
296 "TIS-620", N_("Thai")},
298 {SUBTITLE_ENCODING_ISO_8859_9, FALSE,
299 "ISO-8859-9", N_("Turkish")},
300 {SUBTITLE_ENCODING_IBM_857, FALSE,
301 "IBM857", N_("Turkish")},
302 {SUBTITLE_ENCODING_MAC_TURKISH, FALSE,
303 "MAC_TURKISH", N_("Turkish")},
304 {SUBTITLE_ENCODING_WINDOWS_1254, FALSE,
305 "WINDOWS-1254", N_("Turkish")},
307 {SUBTITLE_ENCODING_UTF_7, FALSE,
308 "UTF-7", N_("Unicode")},
309 {SUBTITLE_ENCODING_UTF_8, FALSE,
310 "UTF-8", N_("Unicode")},
311 {SUBTITLE_ENCODING_UTF_16, FALSE,
312 "UTF-16", N_("Unicode")},
313 {SUBTITLE_ENCODING_UCS_2, FALSE,
314 "UCS-2", N_("Unicode")},
315 {SUBTITLE_ENCODING_UCS_4, FALSE,
316 "UCS-4", N_("Unicode")},
318 {SUBTITLE_ENCODING_ISO_8859_1, FALSE,
319 "ISO-8859-1", N_("Western")},
320 {SUBTITLE_ENCODING_ISO_8859_15, FALSE,
321 "ISO-8859-15", N_("Western")},
322 {SUBTITLE_ENCODING_IBM_850, FALSE,
323 "IBM850", N_("Western")},
324 {SUBTITLE_ENCODING_MAC_ROMAN, FALSE,
325 "MAC_ROMAN", N_("Western")},
326 {SUBTITLE_ENCODING_WINDOWS_1252, FALSE,
327 "WINDOWS-1252", N_("Western")},
329 {SUBTITLE_ENCODING_TCVN, FALSE,
330 "TCVN", N_("Vietnamese")},
331 {SUBTITLE_ENCODING_VISCII, FALSE,
332 "VISCII", N_("Vietnamese")},
333 {SUBTITLE_ENCODING_WINDOWS_1258, FALSE,
334 "WINDOWS-1258", N_("Vietnamese")}
337 static const SubtitleEncoding *
338 find_encoding_by_charset (const char *charset)
340 int i;
342 i = 1; /* skip current locale */
343 while (i < SUBTITLE_ENCODING_LAST) {
344 if (strcasecmp (charset, encodings[i].charset) == 0)
345 return &encodings[i];
347 ++i;
350 if (strcasecmp (charset,
351 encodings[SUBTITLE_ENCODING_CURRENT_LOCALE].charset) == 0)
352 return &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
354 return NULL;
357 static void
358 subtitle_encoding_init (void)
360 int i;
361 gsize bytes_read, bytes_written;
362 gchar *converted;
363 gchar ascii_sample[96];
365 g_get_charset ((const char **)
366 &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE].charset);
368 g_assert (G_N_ELEMENTS (encodings) == SUBTITLE_ENCODING_LAST);
370 /* Initialize the sample text with all of the printing ASCII characters
371 * from space (32) to the tilde (126), 95 in all. */
372 for (i = 0; i < (int) sizeof (ascii_sample); i++)
373 ascii_sample[i] = i + 32;
375 ascii_sample[sizeof (ascii_sample) - 1] = '\0';
377 i = 0;
378 while (i < SUBTITLE_ENCODING_LAST) {
379 bytes_read = 0;
380 bytes_written = 0;
382 g_assert (encodings[i].index == i);
384 /* Translate the names */
385 encodings[i].name = _(encodings[i].name);
387 /* Test that the encoding is a proper superset of ASCII (which naive
388 * apps are going to use anyway) by attempting to validate the text
389 * using the current encoding. This also flushes out any encodings
390 * which the underlying GIConv implementation can't support.
392 converted = g_convert (ascii_sample, sizeof (ascii_sample) - 1,
393 encodings[i].charset, encodings[i].charset,
394 &bytes_read, &bytes_written, NULL);
396 /* The encoding is only valid if ASCII passes through cleanly. */
397 if (i == SUBTITLE_ENCODING_CURRENT_LOCALE)
398 encodings[i].valid = TRUE;
399 else
400 encodings[i].valid =
401 (bytes_read == (sizeof (ascii_sample) - 1)) &&
402 (converted != NULL) && (strcmp (converted, ascii_sample) == 0);
404 #ifdef DEBUG_ENCODINGS
405 if (!encodings[i].valid) {
406 g_print ("Rejecting encoding %s as invalid:\n", encodings[i].charset);
407 g_print (" input \"%s\"\n", ascii_sample);
408 g_print (" output \"%s\"\n\n", converted ? converted : "(null)");
410 #endif
412 /* Discard the converted string. */
413 g_free (converted);
415 ++i;
419 static int
420 subtitle_encoding_get_index (const char *charset)
422 const SubtitleEncoding *e;
424 e = find_encoding_by_charset (charset);
425 if (e != NULL)
426 return e->index;
427 else
428 return SUBTITLE_ENCODING_CURRENT_LOCALE;
431 static const char *
432 subtitle_encoding_get_charset (int index_)
434 const SubtitleEncoding *e;
436 if (index_ >= SUBTITLE_ENCODING_LAST)
437 e = &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
438 else if (index_ < SUBTITLE_ENCODING_CURRENT_LOCALE)
439 e = &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
440 else if (!encodings[index_].valid)
441 e = &encodings[SUBTITLE_ENCODING_CURRENT_LOCALE];
442 else
443 e = &encodings[index_];
444 return e->charset;
447 enum
449 INDEX_COL,
450 NAME_COL
453 static gint
454 compare (GtkTreeModel * model, GtkTreeIter * a, GtkTreeIter * b, gpointer data)
456 gchar *str_a, *str_b;
457 gint result;
459 gtk_tree_model_get (model, a, NAME_COL, &str_a, -1);
460 gtk_tree_model_get (model, b, NAME_COL, &str_b, -1);
462 result = strcmp (str_a, str_b);
464 g_free (str_a);
465 g_free (str_b);
467 return result;
470 static void
471 is_encoding_sensitive (GtkCellLayout * cell_layout,
472 GtkCellRenderer * cell,
473 GtkTreeModel * tree_model, GtkTreeIter * iter, gpointer data)
476 gboolean sensitive;
478 sensitive = !gtk_tree_model_iter_has_child (tree_model, iter);
479 g_object_set (cell, "sensitive", sensitive, NULL);
482 static GtkTreeModel *
483 subtitle_encoding_create_store (void)
485 gchar *label;
486 const gchar *lastlang = "";
487 GtkTreeIter iter, iter2;
488 GtkTreeStore *store;
489 int i;
491 store = gtk_tree_store_new (2, G_TYPE_INT, G_TYPE_STRING);
493 for (i = 0; i < SUBTITLE_ENCODING_LAST; i++) {
494 if (encodings[i].valid) {
495 if (strcmp (lastlang, encodings[i].name)) {
496 lastlang = encodings[i].name;
497 gtk_tree_store_append (store, &iter, NULL);
498 gtk_tree_store_set (store, &iter, INDEX_COL,
499 -1, NAME_COL, lastlang, -1);
501 label = g_strdup_printf("%s (%s)", lastlang, encodings[i].charset);
502 gtk_tree_store_append (store, &iter2, &iter);
503 gtk_tree_store_set (store, &iter2, INDEX_COL,
504 encodings[i].index, NAME_COL, label, -1);
505 g_free(label);
508 gtk_tree_sortable_set_default_sort_func (GTK_TREE_SORTABLE (store),
509 compare, NULL, NULL);
510 gtk_tree_sortable_set_sort_column_id (GTK_TREE_SORTABLE (store),
511 NAME_COL, GTK_SORT_ASCENDING);
512 return GTK_TREE_MODEL (store);
515 static void
516 subtitle_encoding_combo_render (GtkComboBox * combo)
518 GtkCellRenderer *renderer;
520 renderer = gtk_cell_renderer_text_new ();
521 gtk_cell_layout_pack_start (GTK_CELL_LAYOUT (combo), renderer, TRUE);
522 gtk_cell_layout_set_attributes (GTK_CELL_LAYOUT (combo), renderer,
523 "text", NAME_COL, NULL);
524 gtk_cell_layout_set_cell_data_func (GTK_CELL_LAYOUT (combo),
525 renderer, is_encoding_sensitive, NULL, NULL);
528 const char *
529 totem_subtitle_encoding_get_selected (GtkComboBox * combo)
531 GtkTreeModel *model;
532 GtkTreeIter iter;
533 gint index_ = -1;
535 model = gtk_combo_box_get_model (combo);
536 if (gtk_combo_box_get_active_iter (combo, &iter)) {
537 gtk_tree_model_get (model, &iter, INDEX_COL, &index_, -1);
539 if (index_ == -1)
540 return NULL;
541 return subtitle_encoding_get_charset (index_);
544 void
545 totem_subtitle_encoding_set (GtkComboBox * combo, const char *encoding)
547 GtkTreeModel *model;
548 GtkTreeIter iter, iter2;
549 gint index_, i;
551 g_return_if_fail (encoding != NULL);
553 model = gtk_combo_box_get_model (combo);
554 index_ = subtitle_encoding_get_index (encoding);
555 gtk_tree_model_get_iter_first (model, &iter);
556 do {
557 if (!gtk_tree_model_iter_has_child (model, &iter))
558 continue;
559 if (!gtk_tree_model_iter_children (model, &iter2, &iter))
560 continue;
561 do {
562 gtk_tree_model_get (model, &iter2, INDEX_COL, &i, -1);
563 if (i == index_)
564 break;
565 } while (gtk_tree_model_iter_next (model, &iter2));
566 if (i == index_)
567 break;
568 } while (gtk_tree_model_iter_next (model, &iter));
569 gtk_combo_box_set_active_iter (combo, &iter2);
572 void
573 totem_subtitle_encoding_init (GtkComboBox *combo)
575 GtkTreeModel *model;
576 subtitle_encoding_init ();
577 model = subtitle_encoding_create_store ();
578 gtk_combo_box_set_model (combo, model);
579 g_object_unref (model);
580 subtitle_encoding_combo_render (combo);
584 * vim: sw=2 ts=8 cindent noai bs=2