grub2: bring back build of aros-side grub2 tools
[AROS.git] / workbench / libs / codesets / src / codesets.c
blob30d40c281259f810204629aaf88d38c10d5bc86b
1 /***************************************************************************
3 codesets.library - Amiga shared library for handling different codesets
4 Copyright (C) 2001-2005 by Alfonso [alfie] Ranieri <alforan@tin.it>.
5 Copyright (C) 2005-2014 codesets.library Open Source Team
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 codesets.library project: http://sourceforge.net/projects/codesetslib/
19 Most of the code included in this file was relicensed from GPL to LGPL
20 from the source code of SimpleMail (http://www.sf.net/projects/simplemail)
21 with full permissions by its authors.
23 $Id$
25 ***************************************************************************/
27 #include "lib.h"
29 #include <clib/alib_protos.h>
31 #include <diskfont/glyph.h>
32 #include <diskfont/diskfonttag.h>
33 #include <proto/diskfont.h>
34 #include <ctype.h>
35 #include <limits.h>
37 #ifdef __MORPHOS__
38 #include <proto/keymap.h>
39 #include <proto/locale.h>
40 #endif
42 #include "codesets_table.h"
43 #include "convertUTF.h"
44 #include "codepages.h"
46 #include "SDI_stdarg.h"
48 #include "debug.h"
50 #define __NOLIBBASE__
51 #include <proto/codesets.h>
53 /**************************************************************************/
55 // a union used for various type casts while avoiding the annoying "dereferencing
56 // type punned pointer is breaking strict alias rules" warnings of GCC4+
57 union TypeAliases
59 void **voidptr;
60 char **schar;
61 unsigned char **uchar;
62 STRPTR *strptr;
63 UTF8 **utf8;
64 const UTF8 **cutf8;
65 UTF16 **utf16;
66 const UTF16 **cutf16;
67 UTF32 **utf32;
68 const UTF32 **cutf32;
71 /// BIN_SEARCH()
72 // search a sorted array in O(log n) e.g.
73 // BIN_SEARCH(strings,0,sizeof(strings)/sizeof(strings[0]),strcmp(key,array[mid]),res);
74 #define BIN_SEARCH(array,low,high,compare,result) \
76 int l = low;\
77 int h = high;\
78 int m = (low+high)/2;\
79 result = NULL;\
80 while (l<=h)\
82 int d = compare;\
83 if (!d){ result = &array[m]; break; }\
84 if (d < 0) h = m - 1;\
85 else l = m + 1;\
86 m = (l + h)/2;\
90 ///
91 /// mystrdup()
92 static STRPTR mystrdup(const char *str)
94 STRPTR newStr = NULL;
96 ENTER();
98 if(str != NULL)
100 int len;
102 if((len = strlen(str)) > 0)
104 if((newStr = allocArbitrateVecPooled(len+1)) != NULL)
105 strlcpy(newStr, str, len+1);
109 RETURN(newStr);
110 return newStr;
114 /// mystrndup()
115 static STRPTR mystrndup(const char *str1, int n)
117 STRPTR dest;
119 ENTER();
121 if((dest = allocArbitrateVecPooled(n+1)) != NULL)
123 if(str1 != NULL)
124 strlcpy(dest, str1, n+1);
125 else
126 dest[0] = '\0';
129 RETURN(dest);
130 return dest;
134 /// readLine()
135 static BOOL readLine(BPTR fh, char *buf, ULONG size)
137 BOOL success = FALSE;
138 char *c;
140 ENTER();
142 if((c = FGets(fh, buf, size)) != NULL)
144 // we succeeded in reading something
145 success = TRUE;
147 // now find the end of the line and strip the LF/CR character
148 for(; *c; c++)
150 if(*c == '\n' || *c == '\r')
152 *c = '\0';
153 break;
158 RETURN(success);
159 return success;
163 /// getConfigItem()
164 static const char *getConfigItem(const char *buf, const char *item)
166 const char *configItem = NULL;
167 int len;
169 ENTER();
171 len = strlen(item);
173 if(strnicmp(buf, item, len) == 0)
175 char c;
177 buf += len;
179 // skip spaces
180 while((c = *buf) != '\0' && isspace(c))
181 buf++;
183 if(*buf == '=')
185 buf++;
187 // skip spaces
188 while((c = *buf) != '\0' && isspace(c))
189 buf++;
191 configItem = buf;
195 RETURN(configItem);
196 return configItem;
200 /// parseUtf8()
201 static int parseUtf8(CONST_STRPTR *ps)
203 CONST_STRPTR s = *ps;
204 int wc, n, i;
206 ENTER();
208 if(*s<0x80)
210 *ps = s+1;
212 RETURN(*s);
213 return *s;
216 if(*s<0xc2)
218 RETURN(-1);
219 return -1;
221 else
223 if(*s<0xe0)
225 if((s[1] & 0xc0)!=0x80)
227 RETURN(-1);
228 return -1;
231 *ps = s+2;
233 RETURN(((s[0] & 0x1f)<<6) | (s[1] & 0x3f));
234 return ((s[0] & 0x1f)<<6) | (s[1] & 0x3f);
236 else
238 if(*s<0xf0)
240 n = 3;
242 else
244 if(*s<0xf8)
246 n = 4;
248 else
250 if(*s<0xfc)
252 n = 5;
254 else
256 if(*s<0xfe)
258 n = 6;
260 else
262 RETURN(-1);
263 return -1;
271 wc = *s++ & ((1<<(7-n))-1);
273 for(i = 1; i<n; i++)
275 if((*s & 0xc0) != 0x80)
277 RETURN(-1);
278 return -1;
281 wc = (wc << 6) | (*s++ & 0x3f);
284 if(wc < (1 << (5 * n - 4)))
286 RETURN(-1);
287 return -1;
290 *ps = s;
292 RETURN(wc);
293 return wc;
297 /// countCodesets()
298 static int countCodesets(struct codesetList *csList, BOOL allowMultibyte)
300 struct Node *node;
301 int num = 0;
303 for(node = GetHead((struct List *)csList); node != NULL; node = GetSucc(node))
305 struct codeset *cs = (struct codeset *)node;
307 if(allowMultibyte == TRUE ||
308 (cs != CodesetsBase->utf8Codeset && cs != CodesetsBase->utf16Codeset && cs != CodesetsBase->utf32Codeset))
310 num++;
314 return num;
318 /// mapUTF8toASCII()
319 // in case some UTF8 sequences can not be converted during CodesetsUTF8ToStrA(), this
320 // function is used to replace these unknown sequences with lookalike characters that
321 // still make the text more readable. For more replacement see
322 // http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
324 // The conversion table in this function is partly borrowed from the awebcharset plugin
325 // written by Frank Weber. See http://cvs.sunsite.dk/viewcvs.cgi/aweb/plugins/charset/awebcharset.c
327 struct UTF8Replacement
329 const char *utf8; // the original UTF8 string we are going to replace
330 const int utf8len; // the length of the UTF8 string
331 const char *rep; // pointer to the replacement string
332 const int replen; // the length of the replacement string (minus for signalling an UTF8 string)
335 static int compareUTF8Replacements(const void *p1, const void *p2)
337 struct UTF8Replacement *key = (struct UTF8Replacement *)p1;
338 struct UTF8Replacement *rep = (struct UTF8Replacement *)p2;
339 int cmp;
341 // compare the length first, after that compare the strings
342 cmp = key->utf8len - rep->utf8len;
343 if(cmp == 0)
344 cmp = memcmp(key->utf8, rep->utf8, key->utf8len);
346 return cmp;
349 static int mapUTF8toASCII(const char **dst, const unsigned char *src, const int utf8len)
351 int len = 0;
352 struct UTF8Replacement key = { (char *)src, utf8len, NULL, 0 };
353 struct UTF8Replacement *rep;
355 static struct UTF8Replacement const utf8map[] =
357 // U+0100 ... U+017F (Latin Extended-A)
358 { "\xC4\x80", 2, "A", 1 }, // U+0100 -> A (LATIN CAPITAL LETTER A WITH MACRON)
359 { "\xC4\x81", 2, "a", 1 }, // U+0101 -> a (LATIN SMALL LETTER A WITH MACRON)
360 { "\xC4\x82", 2, "A", 1 }, // U+0102 -> A (LATIN CAPITAL LETTER A WITH BREVE)
361 { "\xC4\x83", 2, "a", 1 }, // U+0103 -> a (LATIN SMALL LETTER A WITH BREVE)
362 { "\xC4\x84", 2, "A", 1 }, // U+0104 -> A (LATIN CAPITAL LETTER A WITH OGONEK)
363 { "\xC4\x85", 2, "a", 1 }, // U+0105 -> a (LATIN SMALL LETTER A WITH OGONEK)
364 { "\xC4\x86", 2, "C", 1 }, // U+0106 -> C (LATIN CAPITAL LETTER C WITH ACUTE)
365 { "\xC4\x87", 2, "c", 1 }, // U+0107 -> c (LATIN SMALL LETTER C WITH ACUTE)
366 { "\xC4\x88", 2, "C", 1 }, // U+0108 -> C (LATIN CAPITAL LETTER C WITH CIRCUMFLEX)
367 { "\xC4\x89", 2, "c", 1 }, // U+0109 -> c (LATIN SMALL LETTER C WITH CIRCUMFLEX)
368 { "\xC4\x8A", 2, "C", 1 }, // U+010A -> C (LATIN CAPITAL LETTER C WITH DOT ABOVE)
369 { "\xC4\x8B", 2, "c", 1 }, // U+010B -> c (LATIN SMALL LETTER C WITH DOT ABOVE)
370 { "\xC4\x8C", 2, "C", 1 }, // U+010C -> C (LATIN CAPITAL LETTER C WITH CARON)
371 { "\xC4\x8D", 2, "c", 1 }, // U+010D -> c (LATIN SMALL LETTER C WITH CARON)
372 { "\xC4\x8E", 2, "D", 1 }, // U+010E -> D (LATIN CAPITAL LETTER D WITH CARON)
373 { "\xC4\x8F", 2, "d", 1 }, // U+010F -> d (LATIN SMALL LETTER D WITH CARON)
374 { "\xC4\x90", 2, "D", 1 }, // U+0110 -> D (LATIN CAPITAL LETTER D WITH STROKE)
375 { "\xC4\x91", 2, "d", 1 }, // U+0111 -> d (LATIN SMALL LETTER D WITH STROKE)
376 { "\xC4\x92", 2, "E", 1 }, // U+0112 -> E (LATIN CAPITAL LETTER E WITH MACRON)
377 { "\xC4\x93", 2, "e", 1 }, // U+0113 -> e (LATIN SMALL LETTER E WITH MACRON)
378 { "\xC4\x94", 2, "E", 1 }, // U+0114 -> E (LATIN CAPITAL LETTER E WITH BREVE)
379 { "\xC4\x95", 2, "e", 1 }, // U+0115 -> e (LATIN SMALL LETTER E WITH BREVE)
380 { "\xC4\x96", 2, "E", 1 }, // U+0116 -> E (LATIN CAPITAL LETTER E WITH DOT ABOVE)
381 { "\xC4\x97", 2, "e", 1 }, // U+0117 -> e (LATIN SMALL LETTER E WITH DOT ABOVE)
382 { "\xC4\x98", 2, "E", 1 }, // U+0118 -> E (LATIN CAPITAL LETTER E WITH OGONEK)
383 { "\xC4\x99", 2, "e", 1 }, // U+0119 -> e (LATIN SMALL LETTER E WITH OGONEK)
384 { "\xC4\x9A", 2, "E", 1 }, // U+011A -> E (LATIN CAPITAL LETTER E WITH CARON)
385 { "\xC4\x9B", 2, "e", 1 }, // U+011B -> e (LATIN SMALL LETTER E WITH CARON)
386 { "\xC4\x9C", 2, "G", 1 }, // U+011C -> G (LATIN CAPITAL LETTER G WITH CIRCUMFLEX)
387 { "\xC4\x9D", 2, "g", 1 }, // U+011D -> g (LATIN SMALL LETTER G WITH CIRCUMFLEX)
388 { "\xC4\x9E", 2, "G", 1 }, // U+011E -> G (LATIN CAPITAL LETTER G WITH BREVE)
389 { "\xC4\x9F", 2, "g", 1 }, // U+011F -> g (LATIN SMALL LETTER G WITH BREVE)
390 { "\xC4\xA0", 2, "G", 1 }, // U+0120 -> G (LATIN CAPITAL LETTER G WITH DOT ABOVE)
391 { "\xC4\xA1", 2, "g", 1 }, // U+0121 -> g (LATIN SMALL LETTER G WITH DOT ABOVE)
392 { "\xC4\xA2", 2, "G", 1 }, // U+0122 -> G (LATIN CAPITAL LETTER G WITH CEDILLA)
393 { "\xC4\xA3", 2, "g", 1 }, // U+0123 -> g (LATIN SMALL LETTER G WITH CEDILLA)
394 { "\xC4\xA4", 2, "H", 1 }, // U+0124 -> H (LATIN CAPITAL LETTER H WITH CIRCUMFLEX)
395 { "\xC4\xA5", 2, "h", 1 }, // U+0125 -> h (LATIN SMALL LETTER H WITH CIRCUMFLEX)
396 { "\xC4\xA6", 2, "H", 1 }, // U+0126 -> H (LATIN CAPITAL LETTER H WITH STROKE)
397 { "\xC4\xA7", 2, "h", 1 }, // U+0127 -> h (LATIN SMALL LETTER H WITH STROKE)
398 { "\xC4\xA8", 2, "I", 1 }, // U+0128 -> I (LATIN CAPITAL LETTER I WITH TILDE)
399 { "\xC4\xA9", 2, "i", 1 }, // U+0129 -> i (LATIN SMALL LETTER I WITH TILDE)
400 { "\xC4\xAA", 2, "I", 1 }, // U+012A -> I (LATIN CAPITAL LETTER I WITH MACRON)
401 { "\xC4\xAB", 2, "i", 1 }, // U+012B -> i (LATIN SMALL LETTER I WITH MACRON)
402 { "\xC4\xAC", 2, "I", 1 }, // U+012C -> I (LATIN CAPITAL LETTER I WITH BREVE)
403 { "\xC4\xAD", 2, "i", 1 }, // U+012D -> i (LATIN SMALL LETTER I WITH BREVE)
404 { "\xC4\xAE", 2, "I", 1 }, // U+012E -> I (LATIN CAPITAL LETTER I WITH OGONEK)
405 { "\xC4\xAF", 2, "i", 1 }, // U+012F -> i (LATIN SMALL LETTER I WITH OGONEK)
406 { "\xC4\xB0", 2, "I", 1 }, // U+0130 -> I (LATIN CAPITAL LETTER I WITH DOT ABOVE)
407 { "\xC4\xB1", 2, "i", 1 }, // U+0131 -> i (LATIN SMALL LETTER DOTLESS I)
408 { "\xC4\xB2", 2, "Ij", 2 }, // U+0132 -> Ij (LATIN CAPITAL LIGATURE IJ)
409 { "\xC4\xB3", 2, "ij", 2 }, // U+0133 -> ij (LATIN SMALL LIGATURE IJ)
410 { "\xC4\xB4", 2, "J", 1 }, // U+0134 -> J (LATIN CAPITAL LETTER J WITH CIRCUMFLEX)
411 { "\xC4\xB5", 2, "j", 1 }, // U+0135 -> j (LATIN SMALL LETTER J WITH CIRCUMFLEX)
412 { "\xC4\xB6", 2, "K", 1 }, // U+0136 -> K (LATIN CAPITAL LETTER K WITH CEDILLA)
413 { "\xC4\xB7", 2, "k", 1 }, // U+0137 -> k (LATIN SMALL LETTER K WITH CEDILLA)
414 { "\xC4\xB8", 2, "k", 1 }, // U+0138 -> k (LATIN SMALL LETTER KRA)
415 { "\xC4\xB9", 2, "L", 1 }, // U+0139 -> L (LATIN CAPITAL LETTER L WITH ACUTE)
416 { "\xC4\xBA", 2, "l", 1 }, // U+013A -> l (LATIN SMALL LETTER L WITH ACUTE)
417 { "\xC4\xBB", 2, "L", 1 }, // U+013B -> L (LATIN CAPITAL LETTER L WITH CEDILLA)
418 { "\xC4\xBC", 2, "l", 1 }, // U+013C -> l (LATIN SMALL LETTER L WITH CEDILLA)
419 { "\xC4\xBD", 2, "L", 1 }, // U+013D -> L (LATIN CAPITAL LETTER L WITH CARON)
420 { "\xC4\xBE", 2, "l", 1 }, // U+013E -> l (LATIN SMALL LETTER L WITH CARON)
421 { "\xC4\xBF", 2, "L", 1 }, // U+013F -> L (LATIN CAPITAL LETTER L WITH MIDDLE DOT)
422 { "\xC5\x80", 2, "l", 1 }, // U+0140 -> l (LATIN SMALL LETTER L WITH MIDDLE DOT)
423 { "\xC5\x81", 2, "L", 1 }, // U+0141 -> L (LATIN CAPITAL LETTER L WITH STROKE)
424 { "\xC5\x82", 2, "l", 1 }, // U+0142 -> l (LATIN SMALL LETTER L WITH STROKE)
425 { "\xC5\x83", 2, "N", 1 }, // U+0143 -> N (LATIN CAPITAL LETTER N WITH ACUTE)
426 { "\xC5\x84", 2, "n", 1 }, // U+0144 -> n (LATIN SMALL LETTER N WITH ACUTE)
427 { "\xC5\x85", 2, "N", 1 }, // U+0145 -> N (LATIN CAPITAL LETTER N WITH CEDILLA)
428 { "\xC5\x86", 2, "n", 1 }, // U+0146 -> n (LATIN SMALL LETTER N WITH CEDILLA)
429 { "\xC5\x87", 2, "N", 1 }, // U+0147 -> N (LATIN CAPITAL LETTER N WITH CARON)
430 { "\xC5\x88", 2, "n", 1 }, // U+0148 -> n (LATIN SMALL LETTER N WITH CARON)
431 { "\xC5\x89", 2, "'n", 2 }, // U+0149 -> 'n (LATIN SMALL LETTER N PRECEDED BY APOSTROPHE)
432 { "\xC5\x8A", 2, "Ng", 2 }, // U+014A -> Ng (LATIN CAPITAL LETTER ENG)
433 { "\xC5\x8B", 2, "ng", 2 }, // U+014B -> ng (LATIN SMALL LETTER ENG)
434 { "\xC5\x8C", 2, "O", 1 }, // U+014C -> O (LATIN CAPITAL LETTER O WITH MACRON)
435 { "\xC5\x8D", 2, "o", 1 }, // U+014D -> o (LATIN SMALL LETTER O WITH MACRON)
436 { "\xC5\x8E", 2, "O", 1 }, // U+014E -> O (LATIN CAPITAL LETTER O WITH BREVE)
437 { "\xC5\x8F", 2, "o", 1 }, // U+014F -> o (LATIN SMALL LETTER O WITH BREVE)
438 { "\xC5\x90", 2, "O", 1 }, // U+0150 -> O (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
439 { "\xC5\x91", 2, "o", 1 }, // U+0151 -> o (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
440 { "\xC5\x92", 2, "Oe", 2 }, // U+0152 -> Oe (LATIN CAPITAL LIGATURE OE)
441 { "\xC5\x93", 2, "oe", 2 }, // U+0153 -> oe (LATIN SMALL LIGATURE OE)
442 { "\xC5\x94", 2, "R", 1 }, // U+0154 -> R (LATIN CAPITAL LETTER R WITH ACUTE)
443 { "\xC5\x95", 2, "r", 1 }, // U+0155 -> r (LATIN SMALL LETTER R WITH ACUTE)
444 { "\xC5\x96", 2, "R", 1 }, // U+0156 -> R (LATIN CAPITAL LETTER R WITH CEDILLA)
445 { "\xC5\x97", 2, "r", 1 }, // U+0157 -> r (LATIN SMALL LETTER R WITH CEDILLA)
446 { "\xC5\x98", 2, "R", 1 }, // U+0158 -> R (LATIN CAPITAL LETTER R WITH CARON)
447 { "\xC5\x99", 2, "r", 1 }, // U+0159 -> r (LATIN SMALL LETTER R WITH CARON)
448 { "\xC5\x9A", 2, "S", 1 }, // U+015A -> S (LATIN CAPITAL LETTER S WITH ACUTE)
449 { "\xC5\x9B", 2, "s", 1 }, // U+015B -> s (LATIN SMALL LETTER S WITH ACUTE)
450 { "\xC5\x9C", 2, "S", 1 }, // U+015C -> S (LATIN CAPITAL LETTER S WITH CIRCUMFLEX)
451 { "\xC5\x9D", 2, "s", 1 }, // U+015D -> s (LATIN SMALL LETTER S WITH CIRCUMFLEX)
452 { "\xC5\x9E", 2, "S", 1 }, // U+015E -> S (LATIN CAPITAL LETTER S WITH CEDILLA)
453 { "\xC5\x9F", 2, "s", 1 }, // U+015F -> s (LATIN SMALL LETTER S WITH CEDILLA)
454 { "\xC5\xA0", 2, "S", 1 }, // U+0160 -> S (LATIN CAPITAL LETTER S WITH CARON)
455 { "\xC5\xA1", 2, "s", 1 }, // U+0161 -> s (LATIN SMALL LETTER S WITH CARON)
456 { "\xC5\xA2", 2, "T", 1 }, // U+0162 -> T (LATIN CAPITAL LETTER T WITH CEDILLA)
457 { "\xC5\xA3", 2, "t", 1 }, // U+0163 -> t (LATIN SMALL LETTER T WITH CEDILLA)
458 { "\xC5\xA4", 2, "T", 1 }, // U+0164 -> T (LATIN CAPITAL LETTER T WITH CARON)
459 { "\xC5\xA5", 2, "t", 1 }, // U+0165 -> t (LATIN SMALL LETTER T WITH CARON)
460 { "\xC5\xA6", 2, "T", 1 }, // U+0166 -> T (LATIN CAPITAL LETTER T WITH STROKE)
461 { "\xC5\xA7", 2, "t", 1 }, // U+0167 -> t (LATIN SMALL LETTER T WITH STROKE)
462 { "\xC5\xA8", 2, "U", 1 }, // U+0168 -> U (LATIN CAPITAL LETTER U WITH TILDE)
463 { "\xC5\xA9", 2, "u", 1 }, // U+0169 -> u (LATIN SMALL LETTER U WITH TILDE)
464 { "\xC5\xAA", 2, "U", 1 }, // U+016A -> U (LATIN CAPITAL LETTER U WITH MACRON)
465 { "\xC5\xAB", 2, "u", 1 }, // U+016B -> u (LATIN SMALL LETTER U WITH MACRON)
466 { "\xC5\xAC", 2, "U", 1 }, // U+016C -> U (LATIN CAPITAL LETTER U WITH BREVE)
467 { "\xC5\xAD", 2, "u", 1 }, // U+016D -> u (LATIN SMALL LETTER U WITH BREVE)
468 { "\xC5\xAE", 2, "U", 1 }, // U+016E -> U (LATIN CAPITAL LETTER U WITH RING ABOVE)
469 { "\xC5\xAF", 2, "u", 1 }, // U+016F -> u (LATIN SMALL LETTER U WITH RING ABOVE)
470 { "\xC5\xB0", 2, "U", 1 }, // U+0170 -> U (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
471 { "\xC5\xB1", 2, "u", 1 }, // U+0171 -> u (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
472 { "\xC5\xB2", 2, "U", 1 }, // U+0172 -> U (LATIN CAPITAL LETTER U WITH OGONEK)
473 { "\xC5\xB3", 2, "u", 1 }, // U+0173 -> u (LATIN SMALL LETTER U WITH OGONEK)
474 { "\xC5\xB4", 2, "W", 1 }, // U+0174 -> W (LATIN CAPITAL LETTER W WITH CIRCUMFLEX)
475 { "\xC5\xB5", 2, "w", 1 }, // U+0175 -> w (LATIN SMALL LETTER W WITH CIRCUMFLEX)
476 { "\xC5\xB6", 2, "Y", 1 }, // U+0176 -> Y (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX)
477 { "\xC5\xB7", 2, "y", 1 }, // U+0177 -> y (LATIN SMALL LETTER Y WITH CIRCUMFLEX)
478 { "\xC5\xB8", 2, "Y", 1 }, // U+0178 -> Y (LATIN CAPITAL LETTER Y WITH DIAERESIS)
479 { "\xC5\xB9", 2, "Z", 1 }, // U+0179 -> Z (LATIN CAPITAL LETTER Z WITH ACUTE)
480 { "\xC5\xBA", 2, "z", 1 }, // U+017A -> z (LATIN SMALL LETTER Z WITH ACUTE)
481 { "\xC5\xBB", 2, "Z", 1 }, // U+017B -> Z (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
482 { "\xC5\xBC", 2, "z", 1 }, // U+017C -> z (LATIN SMALL LETTER Z WITH DOT ABOVE)
483 { "\xC5\xBD", 2, "Z", 1 }, // U+017D -> Z (LATIN CAPITAL LETTER Z WITH CARON)
484 { "\xC5\xBE", 2, "z", 1 }, // U+017E -> z (LATIN SMALL LETTER Z WITH CARON)
485 { "\xC5\xBF", 2, "s", 1 }, // U+017F -> s (LATIN SMALL LETTER LONG S
487 // U+2000 ... U+206F (General Punctuation)
488 { "\xE2\x80\x90", 3, "-", 1 }, // U+2010 -> - (HYPHEN)
489 { "\xE2\x80\x91", 3, "-", 1 }, // U+2011 -> - (NON-BREAKING HYPHEN)
490 { "\xE2\x80\x92", 3, "--", 2 }, // U+2012 -> -- (FIGURE DASH)
491 { "\xE2\x80\x93", 3, "--", 2 }, // U+2013 -> -- (EN DASH)
492 { "\xE2\x80\x94", 3, "---", 3 }, // U+2014 -> --- (EM DASH)
493 { "\xE2\x80\x95", 3, "---", 3 }, // U+2015 -> --- (HORIZONTAL BAR)
494 { "\xE2\x80\x96", 3, "||", 2 }, // U+2016 -> || (DOUBLE VERTICAL LINE)
495 { "\xE2\x80\x97", 3, "_", 1 }, // U+2017 -> _ (DOUBLE LOW LINE)
496 { "\xE2\x80\x98", 3, "`", 1 }, // U+2018 -> ` (LEFT SINGLE QUOTATION MARK)
497 { "\xE2\x80\x99", 3, "'", 1 }, // U+2019 -> ' (RIGHT SINGLE QUOTATION MARK)
498 { "\xE2\x80\x9A", 3, ",", 1 }, // U+201A -> , (SINGLE LOW-9 QUOTATION MARK)
499 { "\xE2\x80\x9B", 3, "'", 1 }, // U+201B -> ' (SINGLE HIGH-REVERSED-9 QUOTATION MARK)
500 { "\xE2\x80\x9C", 3, "\"", 1 }, // U+201C -> " (LEFT DOUBLE QUOTATION MARK)
501 { "\xE2\x80\x9D", 3, "\"", 1 }, // U+201D -> " (RIGHT DOUBLE QUOTATION MARK)
502 { "\xE2\x80\x9E", 3, ",,", 2 }, // U+201E -> ,, (DOUBLE LOW-9 QUOTATION MARK)
503 { "\xE2\x80\x9F", 3, "``", 2 }, // U+201F -> `` (DOUBLE HIGH-REVERSED-9 QUOTATION MARK)
504 { "\xE2\x80\xA0", 3, "+", 1 }, // U+2020 -> + (DAGGER)
505 { "\xE2\x80\xA1", 3, "+", 1 }, // U+2021 -> + (DOUBLE DAGGER)
506 { "\xE2\x80\xA2", 3, "\xC2\xB7", -2 }, // U+2022 -> U+00B7 (BULLET) -> (MIDDLE POINT)
507 { "\xE2\x80\xA3", 3, ".", 1 }, // U+2023 -> . (TRIANGULAR BULLET)
508 { "\xE2\x80\xA4", 3, ".", 1 }, // U+2024 -> . (ONE DOT LEADER)
509 { "\xE2\x80\xA5", 3, "..", 2 }, // U+2025 -> .. (TWO DOT LEADER)
510 { "\xE2\x80\xA6", 3, "...", 3 }, // U+2026 -> ... (HORIZONTAL ELLIPSIS)
511 { "\xE2\x80\xA7", 3, "\xC2\xB7", -2 }, // U+2027 -> U+00B7 (HYPHENATION POINT) -> (MIDDLE POINT)
512 { "\xE2\x80\xB0", 3, "%.", 2 }, // U+2030 -> %. (PER MILLE SIGN)
513 { "\xE2\x80\xB1", 3, "%..", 3 }, // U+2031 -> %.. (PER TEN THOUSAND SIGN)
514 { "\xE2\x80\xB2", 3, "'", 1 }, // U+2032 -> ` (PRIME)
515 { "\xE2\x80\xB3", 3, "''", 2 }, // U+2033 -> '' (DOUBLE PRIME)
516 { "\xE2\x80\xB4", 3, "'''", 3 }, // U+2034 -> ''' (TRIPLE PRIME)
517 { "\xE2\x80\xB5", 3, "`", 1 }, // U+2035 -> ` (REVERSED PRIME)
518 { "\xE2\x80\xB6", 3, "``", 2 }, // U+2036 -> `` (REVERSED DOUBLE PRIME)
519 { "\xE2\x80\xB7", 3, "```", 3 }, // U+2037 -> ``` (REVERSED TRIPLE PRIME)
520 { "\xE2\x80\xB8", 3, "^", 1 }, // U+2038 -> ^ (CARET)
521 { "\xE2\x80\xB9", 3, "<", 1 }, // U+2039 -> < (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
522 { "\xE2\x80\xBA", 3, ">", 1 }, // U+203A -> > (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
523 { "\xE2\x80\xBB", 3, "\xC3\x97", -2 }, // U+203B -> U+00D7 (REFERENCE MARK) -> (MULTIPLICATION SIGN)
524 { "\xE2\x80\xBC", 3, "!!", 2 }, // U+203C -> !! (DOUBLE EXCLAMATION MARK)
525 { "\xE2\x80\xBD", 3, "?", 1 }, // U+203D -> ? (INTERROBANG)
526 { "\xE2\x81\x82", 3, "*", 1 }, // U+2042 -> * (ASTERISM)
527 { "\xE2\x81\x83", 3, ".", 1 }, // U+2043 -> . (HYPHEN BULLET)
528 { "\xE2\x81\x84", 3, "/", 1 }, // U+2044 -> / (FRACTION SLASH)
529 { "\xE2\x81\x87", 3, "??", 2 }, // U+2047 -> ?? (DOUBLE QUESTION MARK)
530 { "\xE2\x81\x88", 3, "?!", 2 }, // U+2048 -> ?! (QUESTION EXCLAMATION MARK)
531 { "\xE2\x81\x89", 3, "!?", 2 }, // U+2049 -> !? (EXCLAMATION QUESTION MARK)
532 { "\xE2\x81\x8E", 3, "*", 1 }, // U+204E -> * (LOW ASTERISK)
533 { "\xE2\x81\x8F", 3, ";", 1 }, // U+204F -> ; (REVERSED SEMICOLON)
534 { "\xE2\x81\x91", 3, "*", 1 }, // U+2051 -> * (TWO ASTERISKS ALIGNED VERTICALLY)
535 { "\xE2\x81\x92", 3, "-", 1 }, // U+2052 -> - (COMMERCIAL MINUS SIGN)
536 { "\xE2\x81\x93", 3, "~", 1 }, // U+2053 -> ~ (SWUNG DASH)
537 { "\xE2\x81\x95", 3, "*", 1 }, // U+2055 -> * (FLOWER PUNCTUATION MARK)
538 { "\xE2\x81\x97", 3, "''''", 4 }, // U+2057 -> '''' (QUADRUPLE PRIME)
539 { "\xE2\x81\x9A", 3, ":", 1 }, // U+205A -> : (TWO DOT PUNCTUATION)
540 { "\xE2\x81\x9C", 3, "+", 1 }, // U+205C -> + (DOTTED CROSS)
542 // U+20A0 ... U+20CF (Currency Symbols)
543 { "\xE2\x82\xA0", 3, "ECU", 3 }, // U+20A0 -> ECU (EURO-CURRENCY SIGN)
544 { "\xE2\x82\xA1", 3, "CRC", 3 }, // U+20A1 -> CRC (COLON SIGN)
545 { "\xE2\x82\xA2", 3, "BRC", 3 }, // U+20A2 -> BRC (CRUZEIRO SIGN)
546 { "\xE2\x82\xA3", 3, "BEF", 3 }, // U+20A3 -> BEF (FRENCH FRANC SIGN)
547 { "\xE2\x82\xA4", 3, "ITL", 3 }, // U+20A4 -> ITL (LIRA SIGN)
548 { "\xE2\x82\xA6", 3, "NGN", 3 }, // U+20A6 -> NGN (NEIRA SIGN)
549 { "\xE2\x82\xA7", 3, "ESP", 3 }, // U+20A7 -> ESP (PESETA SIGN)
550 { "\xE2\x82\xA8", 3, "MVQ", 3 }, // U+20A8 -> MVQ (RUPEE SIGN)
551 { "\xE2\x82\xA9", 3, "KPW", 3 }, // U+20A9 -> KPW (WON SIGN)
552 { "\xE2\x82\xAA", 3, "ILS", 3 }, // U+20AA -> ILS (NEW SHEQEL SIGN)
553 { "\xE2\x82\xAB", 3, "VNC", 3 }, // U+20AB -> VNC (DONG SIGN)
554 { "\xE2\x82\xAC", 3, "EUR", 3 }, // U+20AC -> EUR (EURO SIGN)
555 { "\xE2\x82\xAD", 3, "LAK", 3 }, // U+20AD -> LAK (KIP SIGN)
556 { "\xE2\x82\xAE", 3, "MNT", 3 }, // U+20AE -> MNT (TUGRIK SIGN)
557 { "\xE2\x82\xAF", 3, "GRD", 3 }, // U+20AF -> GRD (DRACHMA SIGN)
558 { "\xE2\x82\xB0", 3, "Pf", 2 }, // U+20B0 -> Pf (GERMAN PENNY SIGN)
559 { "\xE2\x82\xB1", 3, "P", 1 }, // U+20B1 -> P (PESO SIGN)
560 { "\xE2\x82\xB2", 3, "PYG", 3 }, // U+20B2 -> PYG (GUARANI SIGN)
561 { "\xE2\x82\xB3", 3, "ARA", 3 }, // U+20B3 -> ARA (AUSTRAL SIGN)
562 { "\xE2\x82\xB4", 3, "UAH", 3 }, // U+20B4 -> UAH (HRYVNIA SIGN)
563 { "\xE2\x82\xB5", 3, "GHS", 3 }, // U+20B5 -> GHS (CEDI SIGN)
565 // U+2190 ... U+21FF (Arrows)
566 { "\xE2\x86\x90", 3, "<-", 2 }, // U+2190 -> <- (LEFTWARDS ARROW)
567 { "\xE2\x86\x92", 3, "->", 2 }, // U+2192 -> -> (RIGHTWARDS ARROW)
570 ENTER();
572 // start with no replacement string
573 *dst = NULL;
575 // perform a binary search in the lookup table
576 if((rep = bsearch(&key, utf8map, sizeof(utf8map) / sizeof(utf8map[0]), sizeof(utf8map[0]), compareUTF8Replacements)) != NULL)
578 // if we found something, then copy this over to the result variables
579 *dst = rep->rep;
580 len = rep->replen;
583 RETURN(len);
584 return len;
588 /// matchCodesetAlias()
590 struct CodesetAliases
592 const char *MIMEname; // The official and correct MIME name for a codeset
593 const char *Aliases; // A space separated array with well-known aliases
596 const struct CodesetAliases codesetAliases[] =
598 // MIME name Aliases
599 { "Amiga-1251", "Ami1251 Amiga1251" },
600 { "AmigaPL", "AmiPL Amiga-PL" },
601 { "ISO-8859-1", "ISO8859-1 8859-1" },
602 { "ISO-8859-2", "ISO8859-2 8859-2" },
603 { "ISO-8859-3", "ISO8859-3 8859-3" },
604 { "ISO-8859-4", "ISO8859-4 8859-4" },
605 { "ISO-8859-5", "ISO8859-5 8859-5" },
606 { "ISO-8859-6", "ISO8859-6 8859-6" },
607 { "ISO-8859-7", "ISO8859-7 8859-7" },
608 { "ISO-8859-8", "ISO8859-8 8859-8" },
609 { "ISO-8859-9", "ISO8859-9 8859-9" },
610 { "ISO-8859-10", "ISO8859-10 8859-10" },
611 { "ISO-8859-11", "ISO8859-11 8859-11" },
612 { "ISO-8859-12", "ISO8859-12 8859-12" },
613 { "ISO-8859-13", "ISO8859-13 8859-13" },
614 { "ISO-8859-14", "ISO8859-14 8859-14" },
615 { "ISO-8859-15", "ISO8859-15 8859-15" },
616 { "ISO-8859-16", "ISO8859-16 8859-16" },
617 { "ISO-8859-10", "ISO8859-10 8859-10" },
618 { "KOI8-R", "KOI8R" },
619 { "US-ASCII", "ASCII" },
620 { "UTF-8", "UTF8 UTF" },
621 { "UTF-16", "UTF16" },
622 { "UTF-32", "UTF32" },
623 { "windows-1250", "cp1250 windows1250" },
624 { "windows-1251", "cp1251 windows1251" },
625 { "windows-1252", "cp1252 windows1252" },
626 { "windows-1253", "cp1253 windows1253" },
627 { "windows-1254", "cp1254 windows1254" },
628 { "windows-1255", "cp1255 windows1255" },
629 { "windows-1256", "cp1256 windows1256" },
630 { "windows-1257", "cp1257 windows1257" },
631 { NULL, NULL, }
634 static const char *matchCodesetAlias(const char *search)
636 const char *result = NULL;
637 size_t len = strlen(search);
638 int i;
640 ENTER();
642 for(i=0; codesetAliases[i].MIMEname != NULL; i++)
644 BOOL found = FALSE;
646 // search the MIMEname first
647 if(stricmp(search, codesetAliases[i].MIMEname) == 0)
648 found = TRUE;
649 else
651 const char *s = codesetAliases[i].Aliases;
653 // loop through space separated list of aliases
654 while(s != NULL && *s != '\0')
656 if(strnicmp(search, s, len) == 0)
658 found = TRUE;
659 break;
662 if((s = strpbrk(s, " ")) != NULL)
663 s++;
667 if(found == TRUE)
669 result = codesetAliases[i].MIMEname;
671 break;
675 RETURN(result);
676 return result;
681 /**************************************************************************/
683 /// defaultCodeset()
684 static struct codeset *defaultCodeset(BOOL useSemaphore)
686 char buf[256];
687 struct codeset *codeset;
689 ENTER();
691 if(useSemaphore == TRUE)
692 ObtainSemaphoreShared(&CodesetsBase->libSem);
694 buf[0] = '\0';
695 GetVar("codeset_default" ,buf, sizeof(buf), GVF_GLOBAL_ONLY);
697 if(buf[0] == '\0' || (codeset = codesetsFind(&CodesetsBase->codesets, buf)) == NULL)
698 codeset = CodesetsBase->systemCodeset;
700 if(useSemaphore == TRUE)
701 ReleaseSemaphore(&CodesetsBase->libSem);
703 RETURN(codeset);
704 return codeset;
708 /// codesetsCmpUnicode()
709 // The compare function
710 static int codesetsCmpUnicode(const void *a1, const void *a2)
712 struct single_convert *arg1 = (struct single_convert *)a1;
713 struct single_convert *arg2 = (struct single_convert *)a2;
715 return strcmp((char*)&arg1->utf8[1], (char*)&arg2->utf8[1]);
719 /// codesetsReadTable()
721 #define ITEM_STANDARD "Standard"
722 #define ITEM_ALTSTANDARD "AltStandard"
723 #define ITEM_READONLY "ReadOnly"
724 #define ITEM_CHARACTERIZATION "Characterization"
726 // Reads a coding table and adds it
727 static BOOL codesetsReadTable(struct codesetList *csList, STRPTR name)
729 BPTR fh;
730 BOOL res = FALSE;
732 ENTER();
734 D(DBF_STARTUP, "trying to read charset file '%s'...", name);
736 if((fh = Open(name, MODE_OLDFILE)) != (BPTR)NULL)
738 struct codeset *codeset;
740 if((codeset = (struct codeset *)allocArbitrateVecPooled(sizeof(*codeset))) != NULL)
742 int i;
743 char buf[512];
745 memset(codeset, 0, sizeof(*codeset));
747 for(i = 0; i<256; i++)
749 codeset->table[i].code = i;
750 codeset->table[i].ucs4 = i;
753 while(readLine(fh, buf, sizeof(buf)) == TRUE)
755 const char *result;
757 if(buf[0] != '#')
759 if((result = getConfigItem(buf, ITEM_STANDARD)) != NULL)
760 codeset->name = mystrdup(result);
761 else if(codeset->name == NULL) // a valid file starts with "Standard" and nothing else!!
762 break;
763 else if((result = getConfigItem(buf, ITEM_ALTSTANDARD)) != NULL)
764 codeset->alt_name = mystrdup(result);
765 else if((result = getConfigItem(buf, ITEM_READONLY)) != NULL)
766 codeset->read_only = (atoi(result) == 0) ? 0 : 1;
767 else if((result = getConfigItem(buf, ITEM_CHARACTERIZATION)) != NULL)
769 if(result[0] == '_' && result[1] == '(' && result[2] == '"')
771 char *end = strchr(result + 3, '"');
773 if(end != NULL)
774 codeset->characterization = mystrndup(result+3, end-(result+3));
776 else
777 codeset->characterization = mystrdup(result);
779 else
781 char *p = buf;
782 int fmt2 = 0;
784 if(*p == '=' || (fmt2 = ((*p=='0') || (*(p+1)=='x'))))
786 p++;
787 p += fmt2;
789 i = strtol(p, &p, 16);
790 if(i>0 && i<256)
792 while(isspace(*p))
793 p++;
795 if(strnicmp(p, "U+", 2) == 0)
797 p += 2;
798 codeset->table[i].ucs4 = strtol(p, &p, 16);
800 else if(*p != '#')
802 codeset->table[i].ucs4 = strtol(p, &p, 0);
810 // check if there is not already codeset with the same name in here
811 if(codeset->name != NULL && codesetsFind(csList, codeset->name) == NULL)
813 for(i=0; i<256; i++)
815 UTF32 src = codeset->table[i].ucs4;
816 UTF32 *src_ptr = &src;
817 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
819 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
820 *dest_ptr = 0;
821 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)(&codeset->table[i].utf8[1]);
824 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
825 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
826 D(DBF_STARTUP, "adding external codeset '%s'", codeset->name);
827 AddTail((struct List *)csList, (struct Node *)&codeset->node);
829 res = TRUE;
831 else
833 // cleanup
834 if(codeset->name != NULL)
835 freeArbitrateVecPooled(codeset->name);
836 if(codeset->alt_name != NULL)
837 freeArbitrateVecPooled(codeset->alt_name);
838 if(codeset->characterization != NULL)
839 freeArbitrateVecPooled(codeset->characterization);
840 freeArbitrateVecPooled(codeset);
844 Close(fh);
847 RETURN(res);
848 return res;
851 /// codesetsScanDir()
852 static void codesetsScanDir(struct codesetList *csList, const char *dirPath)
854 ENTER();
856 if(dirPath != NULL && dirPath[0] != '\0')
858 #if defined(__amigaos4__)
859 APTR dirContext;
861 if((dirContext = ObtainDirContextTags(EX_StringNameInput, dirPath,
862 EX_DataFields, EXF_NAME|EXF_TYPE,
863 TAG_END)) != NULL)
865 struct ExamineData *exd;
867 D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
869 while((exd = ExamineDir(dirContext)) != NULL)
871 if(EXD_IS_FILE(exd))
873 char filePath[620];
875 strlcpy(filePath, dirPath, sizeof(filePath));
876 AddPart(filePath, exd->Name, sizeof(filePath));
878 D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
880 codesetsReadTable(csList, filePath);
884 ReleaseDirContext(dirContext);
886 #else
887 BPTR dirLock;
889 if((dirLock = Lock(dirPath, ACCESS_READ)))
891 struct ExAllControl *eac;
893 D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
895 if((eac = AllocDosObject(DOS_EXALLCONTROL, NULL)) != NULL)
897 struct ExAllData *ead;
898 struct ExAllData *eabuffer;
899 LONG more;
901 eac->eac_LastKey = 0;
902 eac->eac_MatchString = NULL;
903 eac->eac_MatchFunc = NULL;
905 if((eabuffer = allocVecPooled(CodesetsBase->pool, 10*sizeof(struct ExAllData))) != NULL)
907 char filePath[620];
911 more = ExAll(dirLock, eabuffer, 10*sizeof(struct ExAllData), ED_TYPE, eac);
912 if(!more && IoErr() != ERROR_NO_MORE_ENTRIES)
913 break;
915 if(eac->eac_Entries == 0)
916 continue;
918 ead = (struct ExAllData *)eabuffer;
921 // we only take that ead if it is a file (ed_Type < 0)
922 if(ead->ed_Type < 0)
924 strlcpy(filePath, dirPath, sizeof(filePath));
925 AddPart(filePath, (char *)ead->ed_Name, sizeof(filePath));
927 D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
929 codesetsReadTable(csList, filePath);
931 ead = ead->ed_Next;
933 while(ead != NULL);
935 while(more);
937 freeVecPooled(CodesetsBase->pool, eabuffer);
940 FreeDosObject(DOS_EXALLCONTROL, eac);
943 UnLock(dirLock);
945 #endif
948 LEAVE();
952 /// codesetsInit()
953 // Initialized and loads the codesets
954 BOOL codesetsInit(struct codesetList *csList)
956 BOOL success = FALSE;
957 struct codeset *codeset;
958 UTF32 src;
959 int i;
960 #if defined(__amigaos4__)
961 ULONG nextMIB = 3;
962 #endif
964 ENTER();
966 NewList((struct List *)csList);
968 // to make the list of the supported codesets complete we also add fake
969 // 'UTF-8', 'UTF-16' and 'UTF-32' only so that our users can query for those codesets as well.
970 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
971 goto end;
973 memset(codeset, 0, sizeof(*codeset));
974 codeset->name = mystrdup("UTF-8");
975 codeset->alt_name = mystrdup("UTF8");
976 codeset->characterization = mystrdup("Unicode");
977 codeset->read_only = 0;
978 D(DBF_STARTUP, "adding internal codeset 'UTF-8'");
979 AddTail((struct List *)csList, (struct Node *)&codeset->node);
980 CodesetsBase->utf8Codeset = codeset;
982 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
983 goto end;
985 memset(codeset, 0, sizeof(*codeset));
986 codeset->name = mystrdup("UTF-16");
987 codeset->alt_name = mystrdup("UTF16");
988 codeset->characterization = mystrdup("16-bit Unicode");
989 codeset->read_only = 0;
990 D(DBF_STARTUP, "adding internal codeset 'UTF-16'");
991 AddTail((struct List *)csList, (struct Node *)&codeset->node);
992 CodesetsBase->utf16Codeset = codeset;
994 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
995 goto end;
997 memset(codeset, 0, sizeof(*codeset));
998 codeset->name = mystrdup("UTF-32");
999 codeset->alt_name = mystrdup("UTF32");
1000 codeset->characterization = mystrdup("32-bit Unicode");
1001 codeset->read_only = 0;
1002 D(DBF_STARTUP, "adding internal codeset 'UTF-32'");
1003 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1004 CodesetsBase->utf32Codeset = codeset;
1006 // on AmigaOS4 we can use diskfont.library to inquire charset information as
1007 // it comes with a quite rich implementation of different charsets.
1008 #if defined(__amigaos4__)
1009 D(DBF_STARTUP, "OS4, asking diskfont.library for codesets");
1012 char *mimename;
1013 char *ianaName;
1014 ULONG *mapTable;
1015 ULONG curMIB = nextMIB;
1017 nextMIB = ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NEXTNUMBER);
1018 if(nextMIB == 0)
1019 break;
1021 mapTable = (ULONG *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MAPTABLE);
1022 mimename = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MIMENAME);
1023 ianaName = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NAME);
1024 if(mapTable != NULL && mimename != NULL && codesetsFind(csList, mimename) == NULL)
1026 D(DBF_STARTUP, "loading charset '%s' from diskfont.library...", mimename);
1028 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1029 goto end;
1031 codeset->name = mystrdup(mimename);
1032 codeset->alt_name = NULL;
1033 codeset->characterization = mystrdup(ianaName);
1034 codeset->read_only = 0;
1036 for(i=0; i<256; i++)
1038 UTF32 *src_ptr = &src;
1039 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1041 src = mapTable[i];
1043 codeset->table[i].code = i;
1044 codeset->table[i].ucs4 = src;
1045 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1046 *dest_ptr = 0;
1047 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1050 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1051 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1053 D(DBF_STARTUP, "adding diskfont.library codeset '%s'", codeset->name);
1054 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1057 while(TRUE);
1058 #endif
1060 #if defined(__MORPHOS__)
1062 struct Library *KeymapBase;
1063 struct Library *LocaleBase;
1064 // assume success at first
1065 BOOL success = TRUE;
1067 D(DBF_STARTUP, "MorphOS, asking keymap.library for codesets");
1068 if((KeymapBase = OpenLibrary("keymap.library", 51)) != NULL)
1070 if((LocaleBase = OpenLibrary("locale.library", 51)) != NULL)
1072 struct KeyMap *keymap = AskKeyMapDefault();
1073 // it doesn't matter if this call fails, as we don't depend on the system codesets
1074 CONST_STRPTR name = GetKeyMapCodepage(keymap);
1076 // legacy keymaps dont have codepage or Unicode mappings
1077 if(name != NULL && keymap != NULL)
1079 D(DBF_STARTUP, "loading charset '%s' from keymap.library...", name);
1081 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) != NULL)
1083 codeset->name = mystrdup(name);
1084 codeset->alt_name = NULL;
1085 codeset->characterization = mystrdup(name); // No further information available
1086 codeset->read_only = 0;
1088 for(i=0; i<256; i++)
1090 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1091 LONG rc;
1093 codeset->table[i].code = i;
1094 codeset->table[i].ucs4 = src = ToUCS4(i, keymap);
1096 // here we use UTF8_Encode() instead of ConvertUCS4ToUTF8() because
1097 // of an internal bug in MorphOS 2.2.
1098 rc = UTF8_Encode(src, dest_ptr);
1099 rc = rc > 0 ? rc : 1;
1101 dest_ptr[rc] = '\0';
1102 codeset->table[i].utf8[0] = rc;
1105 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1106 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1108 D(DBF_STARTUP, "adding keymap.library codeset '%s'", codeset->name);
1109 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1111 else
1113 // only failed memory allocations are treated as error
1114 success = FALSE;
1118 CloseLibrary(LocaleBase);
1121 CloseLibrary(KeymapBase);
1124 if(success == FALSE)
1125 goto end;
1127 #endif
1129 D(DBF_STARTUP, "loading charsets from LIBS:Charsets...");
1131 // we try to walk to the LIBS:Charsets directory on our own and readin our
1132 // own charset tables
1133 codesetsScanDir(csList, "LIBS:Charsets");
1136 // now we go and initialize our internally supported codesets but only if
1137 // we have not already loaded a charset with the same name
1139 D(DBF_STARTUP, "initializing internal charsets...");
1141 // ISO-8859-1 + EURO
1142 if(codesetsFind(csList, "ISO-8859-1 + Euro") == NULL)
1144 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1145 goto end;
1147 codeset->name = mystrdup("ISO-8859-1 + Euro");
1148 codeset->alt_name = NULL;
1149 codeset->characterization = mystrdup("West European (with EURO)");
1150 codeset->read_only = 1;
1152 for(i = 0; i<256; i++)
1154 UTF32 *src_ptr = &src;
1155 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1157 if(i==164)
1158 src = 0x20AC; // the EURO sign
1159 else
1160 src = i;
1162 codeset->table[i].code = i;
1163 codeset->table[i].ucs4 = src;
1164 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1165 *dest_ptr = 0;
1166 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1168 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1169 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1171 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1172 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1175 // ISO-8859-1
1176 if(codesetsFind(csList, "ISO-8859-1") == NULL)
1178 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1179 goto end;
1181 codeset->name = mystrdup("ISO-8859-1");
1182 codeset->alt_name = mystrdup("ISO8859-1");
1183 codeset->characterization = mystrdup("West European");
1184 codeset->read_only = 0;
1186 for(i = 0; i<256; i++)
1188 UTF32 *src_ptr = &src;
1189 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1191 src = i;
1193 codeset->table[i].code = i;
1194 codeset->table[i].ucs4 = src;
1195 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1196 *dest_ptr = 0;
1197 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1199 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1200 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1202 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1203 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1206 // ISO-8859-2
1207 if(codesetsFind(csList, "ISO-8859-2") == NULL)
1209 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1210 goto end;
1212 codeset->name = mystrdup("ISO-8859-2");
1213 codeset->alt_name = mystrdup("ISO8859-2");
1214 codeset->characterization = mystrdup("Central/East European");
1215 codeset->read_only = 0;
1217 for(i = 0; i<256; i++)
1219 UTF32 *src_ptr = &src;
1220 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1222 if(i<0xa0)
1223 src = i;
1224 else
1225 src = iso_8859_2_to_ucs4[i-0xa0];
1227 codeset->table[i].code = i;
1228 codeset->table[i].ucs4 = src;
1229 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr,dest_ptr+6, CSF_StrictConversion);
1230 *dest_ptr = 0;
1231 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1233 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1234 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1236 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1237 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1240 // ISO-8859-3
1241 if(codesetsFind(csList, "ISO-8859-3") == NULL)
1243 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1244 goto end;
1246 codeset->name = mystrdup("ISO-8859-3");
1247 codeset->alt_name = mystrdup("ISO8859-3");
1248 codeset->characterization = mystrdup("South European");
1249 codeset->read_only = 0;
1251 for(i = 0; i<256; i++)
1253 UTF32 *src_ptr = &src;
1254 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1256 if(i<0xa0)
1257 src = i;
1258 else
1259 src = iso_8859_3_to_ucs4[i-0xa0];
1261 codeset->table[i].code = i;
1262 codeset->table[i].ucs4 = src;
1263 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1264 *dest_ptr = 0;
1265 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1267 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1268 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1270 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1271 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1274 // ISO-8859-4
1275 if(codesetsFind(csList, "ISO-8859-4") == NULL)
1277 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1278 goto end;
1280 codeset->name = mystrdup("ISO-8859-4");
1281 codeset->alt_name = mystrdup("ISO8859-4");
1282 codeset->characterization = mystrdup("North European");
1283 codeset->read_only = 0;
1285 for(i = 0; i<256; i++)
1287 UTF32 *src_ptr = &src;
1288 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1290 if(i<0xa0)
1291 src = i;
1292 else
1293 src = iso_8859_4_to_ucs4[i-0xa0];
1295 codeset->table[i].code = i;
1296 codeset->table[i].ucs4 = src;
1297 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1298 *dest_ptr = 0;
1299 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1301 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1302 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1304 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1305 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1308 // ISO-8859-5
1309 if(codesetsFind(csList, "ISO-8859-5") == NULL)
1311 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1312 goto end;
1314 codeset->name = mystrdup("ISO-8859-5");
1315 codeset->alt_name = mystrdup("ISO8859-5");
1316 codeset->characterization = mystrdup("Slavic languages");
1317 codeset->read_only = 0;
1319 for(i = 0; i<256; i++)
1321 UTF32 *src_ptr = &src;
1322 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1324 if(i<0xa0)
1325 src = i;
1326 else
1327 src = iso_8859_5_to_ucs4[i-0xa0];
1329 codeset->table[i].code = i;
1330 codeset->table[i].ucs4 = src;
1331 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1332 *dest_ptr = 0;
1333 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1335 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1336 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1338 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1339 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1342 // ISO-8859-9
1343 if(codesetsFind(csList, "ISO-8859-9") == NULL)
1345 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1346 goto end;
1348 codeset->name = mystrdup("ISO-8859-9");
1349 codeset->alt_name = mystrdup("ISO8859-9");
1350 codeset->characterization = mystrdup("Turkish");
1351 codeset->read_only = 0;
1353 for(i = 0; i<256; i++)
1355 UTF32 *src_ptr = &src;
1356 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1358 if(i<0xa0)
1359 src = i;
1360 else
1361 src = iso_8859_9_to_ucs4[i-0xa0];
1363 codeset->table[i].code = i;
1364 codeset->table[i].ucs4 = src;
1365 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1366 *dest_ptr = 0;
1367 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1369 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1370 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1372 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1373 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1376 // ISO-8859-15
1377 if(codesetsFind(csList, "ISO-8859-15") == NULL)
1379 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1380 goto end;
1382 codeset->name = mystrdup("ISO-8859-15");
1383 codeset->alt_name = mystrdup("ISO8859-15");
1384 codeset->characterization = mystrdup("West European II");
1385 codeset->read_only = 0;
1387 for(i = 0; i<256; i++)
1389 UTF32 *src_ptr = &src;
1390 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1392 if(i<0xa0)
1393 src = i;
1394 else
1395 src = iso_8859_15_to_ucs4[i-0xa0];
1397 codeset->table[i].code = i;
1398 codeset->table[i].ucs4 = src;
1399 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1400 *dest_ptr = 0;
1401 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1403 memcpy(codeset->table_sorted,codeset->table,sizeof (codeset->table));
1404 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1406 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1407 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1410 // ISO-8859-16
1411 if(codesetsFind(csList, "ISO-8859-16") == NULL)
1413 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1414 goto end;
1416 codeset->name = mystrdup("ISO-8859-16");
1417 codeset->alt_name = mystrdup("ISO8869-16");
1418 codeset->characterization = mystrdup("South-Eastern European");
1419 codeset->read_only = 0;
1421 for(i=0;i<256;i++)
1423 UTF32 *src_ptr = &src;
1424 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1426 if(i < 0xa0)
1427 src = i;
1428 else
1429 src = iso_8859_16_to_ucs4[i-0xa0];
1431 codeset->table[i].code = i;
1432 codeset->table[i].ucs4 = src;
1433 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1434 *dest_ptr = 0;
1435 codeset->table[i].utf8[0] = (IPTR)dest_ptr - (IPTR)&codeset->table[i].utf8[1];
1437 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1438 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1440 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1441 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1444 // KOI8-R
1445 if(codesetsFind(csList, "KOI8-R") == NULL)
1447 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1448 goto end;
1450 codeset->name = mystrdup("KOI8-R");
1451 codeset->alt_name = mystrdup("KOI8R");
1452 codeset->characterization = mystrdup("Russian");
1453 codeset->read_only = 0;
1455 for(i = 0; i<256; i++)
1457 UTF32 *src_ptr = &src;
1458 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1460 if(i<0x80)
1461 src = i;
1462 else
1463 src = koi8r_to_ucs4[i-0x80];
1465 codeset->table[i].code = i;
1466 codeset->table[i].ucs4 = src;
1467 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1468 *dest_ptr = 0;
1469 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1471 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1472 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1474 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1475 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1478 // AmigaPL
1479 if(codesetsFind(csList, "AmigaPL") == NULL)
1481 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1482 goto end;
1484 codeset->name = mystrdup("AmigaPL");
1485 codeset->alt_name = mystrdup("AmiPL");
1486 codeset->characterization = mystrdup("Polish (Amiga)");
1487 codeset->read_only = 1;
1489 for(i=0; i<256; i++)
1491 UTF32 *src_ptr = &src;
1492 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1494 if(i<0xa0)
1495 src = i;
1496 else
1497 src = amigapl_to_ucs4[i-0xa0];
1499 codeset->table[i].code = i;
1500 codeset->table[i].ucs4 = src;
1501 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1502 *dest_ptr = 0;
1503 codeset->table[i].utf8[0] = (IPTR)dest_ptr-(IPTR)&codeset->table[i].utf8[1];
1505 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1506 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1508 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1509 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1512 // Amiga-1251
1513 if(codesetsFind(csList, "Amiga-1251") == NULL)
1515 if((codeset = allocArbitrateVecPooled(sizeof(*codeset))) == NULL)
1516 goto end;
1518 codeset->name = mystrdup("Amiga-1251");
1519 codeset->alt_name = mystrdup("Ami1251");
1520 codeset->characterization = mystrdup("Cyrillic (Amiga)");
1521 codeset->read_only = 1;
1523 for(i=0; i<256; i++)
1525 UTF32 *src_ptr = &src;
1526 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1528 if(i < 0xa0)
1529 src = i;
1530 else
1531 src = amiga1251_to_ucs4[i-0xa0];
1533 codeset->table[i].code = i;
1534 codeset->table[i].ucs4 = src;
1535 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1536 *dest_ptr = 0;
1537 codeset->table[i].utf8[0] = (char*)dest_ptr - (char*)&codeset->table[i].utf8[1];
1539 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1540 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), codesetsCmpUnicode);
1542 D(DBF_STARTUP, "adding internal codeset '%s'", codeset->name);
1543 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1546 success = TRUE;
1548 end:
1549 RETURN(success);
1550 return success;
1554 /// codesetsCleanup()
1555 // Cleanup the memory for the codeset
1556 void codesetsCleanup(struct codesetList *csList)
1558 struct codeset *code;
1560 ENTER();
1562 while((code = (struct codeset *)RemHead((struct List *)csList)) != NULL)
1564 if(code->name != NULL)
1565 freeArbitrateVecPooled(code->name);
1566 if(code->alt_name != NULL)
1567 freeArbitrateVecPooled(code->alt_name);
1568 if(code->characterization != NULL)
1569 freeArbitrateVecPooled(code->characterization);
1571 freeArbitrateVecPooled(code);
1574 LEAVE();
1578 /// codesetsFind()
1579 // Returns the given codeset.
1580 struct codeset *codesetsFind(struct codesetList *csList, const char *name)
1582 struct codeset *res = NULL;
1584 ENTER();
1586 if(name != NULL && name[0] != '\0')
1588 struct Node *node;
1589 const char *matchedName;
1591 if((matchedName = matchCodesetAlias(name)) != NULL)
1592 name = matchedName;
1594 for(node = GetHead((struct List *)csList); node != NULL; node = GetSucc(node))
1596 struct codeset *mstate = (struct codeset *)node;
1598 if(stricmp(name, mstate->name) == 0 ||
1599 (mstate->alt_name != NULL && stricmp(name, mstate->alt_name) == 0))
1601 // break out
1602 res = mstate;
1603 break;
1608 RETURN(res);
1609 return res;
1613 /// checkTextAgainstSingleCodeset
1614 // check how good a text can be represented by a specific codeset
1615 static int checkTextAgainstSingleCodeset(CONST_STRPTR text, ULONG textLen, struct codeset *codeset)
1617 int errors = textLen;
1619 ENTER();
1621 if(codeset->read_only == 0 &&
1622 codeset != CodesetsBase->utf8Codeset &&
1623 codeset != CodesetsBase->utf16Codeset &&
1624 codeset != CodesetsBase->utf32Codeset)
1626 CONST_STRPTR text_ptr = text;
1627 ULONG i;
1629 errors = 0;
1631 // the following identification/detection routine is NOT really smart.
1632 // we just see how each UTF8 string is the representation of each char
1633 // in our source text and then check if they are valid or not. As said,
1634 // not very smart, but we don't have anything better right now :(
1635 for(i=0; i < textLen; i++)
1637 unsigned char c = *text_ptr++;
1639 if(c != '\0')
1641 struct single_convert *f = &codeset->table[c];
1643 if(f->utf8[0] == 0x00 || f->utf8[1] == 0x00)
1644 errors++;
1646 else
1647 break;
1650 else
1651 W(DBF_STARTUP, "codeset '%s' is either read-only (%ld) or UTF8/16/32 (%ld)", codeset->name, codeset->read_only, codeset == CodesetsBase->utf8Codeset || codeset == CodesetsBase->utf16Codeset || codeset == CodesetsBase->utf32Codeset);
1653 D(DBF_STARTUP, "tried to identify text as '%s' text with %ld of %ld errors", codeset->name, errors, textLen);
1655 RETURN(errors);
1656 return errors;
1660 /// checkTextAgainstCodesetList
1661 static int checkTextAgainstCodesetList(CONST_STRPTR text, ULONG textLen, struct codesetList *csList, struct codeset **bestCodeset)
1663 struct Node *node;
1664 int bestErrors = textLen;
1666 ENTER();
1668 *bestCodeset = NULL;
1670 for(node = GetHead((struct List *)csList); node != NULL; node = GetSucc(node))
1672 struct codeset *codeset = (struct codeset *)node;
1673 int errors;
1675 errors = checkTextAgainstSingleCodeset(text, textLen, codeset);
1676 if(errors < bestErrors)
1678 *bestCodeset = codeset;
1679 bestErrors = errors;
1681 if(bestErrors == 0)
1682 break;
1686 RETURN(bestErrors);
1687 return bestErrors;
1691 /// codesetsFindBest()
1692 // Returns the best codeset for the given text
1693 static struct codeset *codesetsFindBest(struct TagItem *attrs, ULONG csFamily, CONST_STRPTR text, ULONG textLen, int *errorPtr)
1695 struct codeset *bestCodeset = NULL;
1696 int bestErrors = textLen;
1697 BOOL found = FALSE;
1699 ENTER();
1701 ObtainSemaphoreShared(&CodesetsBase->libSem);
1703 // in case the user specified the codeset family as a
1704 // cyrillic one we go and do our cyrillic specific analysis first
1705 if(csFamily == CSV_CodesetFamily_Cyrillic)
1707 #define NUM_CYRILLIC 3
1709 struct CodesetSearch
1711 const char *name;
1712 const char *data;
1715 struct CodesetSearch search[NUM_CYRILLIC];
1716 unsigned char *p;
1717 unsigned char *tp;
1718 int ctr[NUM_CYRILLIC];
1719 int Nmax;
1720 int NGlob = 1;
1721 int max;
1722 int gr = 0;
1723 int lr = 0;
1725 D(DBF_STARTUP, "performing cyrillic analysis");
1727 search[0].name = "windows-1251";
1728 search[0].data = cp1251_data;
1729 search[1].name = "IBM866";
1730 search[1].data = cp866_data;
1731 search[2].name = "KOI8-R";
1732 search[2].data = koi8r_data;
1734 memset(&ctr, 0, sizeof(ctr));
1736 tp = (unsigned char *)text;
1740 int n;
1741 int mid = max = -466725766; // TODO: what's the magic behind this constant?
1742 Nmax = 0;
1744 for(n=0; n < NUM_CYRILLIC; n++)
1746 unsigned char la = 0;
1747 unsigned char *tptr = (unsigned char *)search[n].data;
1749 p = tp;
1753 unsigned char lb = (*p++) ^ 128;
1755 if(!((la | lb) & 128))
1756 ctr[n] += (signed char)tptr[(la << 7) + lb];
1758 la = lb;
1760 while(*p);
1762 if(max < ctr[n])
1764 mid = max;
1765 max = ctr[n];
1766 Nmax = n+1;
1770 tp = p;
1771 if((max >= 500) && ((max-mid) >= 1000))
1773 lr = gr = 1;
1774 NGlob = Nmax;
1777 while((*p) && (!gr));
1779 if(gr || ((!(*p)) && lr))
1780 Nmax = NGlob;
1782 // if our analysis found something, we go and try
1783 // to find the corresponding codeset in out codeset list
1784 if(max != 0)
1786 struct TagItem *tstate = attrs;
1787 struct TagItem *tag;
1789 D(DBF_STARTUP, "identified text as '%s", search[Nmax-1].name);
1791 // now we walk through our taglist and check if the user
1792 // supplied
1793 while((tag = NextTagItem((APTR)&tstate)) != NULL)
1795 if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1797 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1799 if((bestCodeset = codesetsFind(csList, search[Nmax-1].name)) != NULL)
1800 break;
1804 // if we still haven't found the matching codeset
1805 // we search the internal list
1806 if(bestCodeset == NULL)
1807 bestCodeset = codesetsFind(&CodesetsBase->codesets, search[Nmax-1].name);
1809 bestErrors = 0;
1811 found = TRUE;
1815 // if we haven't found the best codeset (through the cyrillic analysis)
1816 // we go and do the dumb latin search in our codesetlist
1817 if(found == FALSE)
1819 struct TagItem *tstate = attrs;
1820 struct TagItem *tag;
1822 // check text against all codesets in all supplied lists of codesets
1823 while((tag = NextTagItem((APTR)&tstate)) != NULL)
1825 switch(tag->ti_Tag)
1827 case CSA_CodesetList:
1829 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1830 struct codeset *bestCodesetInList;
1831 int bestErrorsInList;
1833 D(DBF_STARTUP, "checking against external codeset list");
1834 bestErrorsInList = checkTextAgainstCodesetList(text, textLen, csList, &bestCodesetInList);
1835 if(bestErrorsInList < bestErrors && bestCodesetInList != NULL)
1837 bestCodeset = bestCodesetInList;
1838 bestErrors = bestErrorsInList;
1840 if(bestErrors == 0)
1841 break;
1844 break;
1848 // we didn't find a "best" codeset in the supplied codesets lists so far,
1849 // so now we check against our internal list
1850 if(bestErrors != 0)
1852 struct codeset *bestCodesetInList;
1853 int bestErrorsInList;
1855 D(DBF_STARTUP, "checking against internal codeset list");
1856 bestErrorsInList = checkTextAgainstCodesetList(text, textLen, &CodesetsBase->codesets, &bestCodesetInList);
1857 if(bestErrorsInList < bestErrors && bestCodesetInList != NULL)
1859 bestCodeset = bestCodesetInList;
1860 bestErrors = bestErrorsInList;
1865 ReleaseSemaphore(&CodesetsBase->libSem);
1867 if(errorPtr != NULL)
1868 *errorPtr = bestErrors;
1870 RETURN(bestCodeset);
1871 return bestCodeset;
1876 /**************************************************************************/
1878 /// CodesetsSupportedA()
1879 LIBPROTO(CodesetsSupportedA, STRPTR *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
1881 STRPTR *array = NULL;
1882 struct TagItem *tstate = attrs;
1883 struct TagItem *tag;
1884 BOOL allowMultibyte;
1885 int numCodesets;
1887 ENTER();
1889 allowMultibyte = GetTagData(CSA_AllowMultibyteCodesets, TRUE, attrs);
1891 ObtainSemaphoreShared(&CodesetsBase->libSem);
1893 // first we need to check how many codesets our supplied
1894 // lists carry.
1895 numCodesets = countCodesets(&CodesetsBase->codesets, allowMultibyte);
1896 while((tag = NextTagItem((APTR)&tstate)) != NULL)
1898 switch(tag->ti_Tag)
1900 case CSA_CodesetList:
1902 numCodesets += countCodesets((struct codesetList *)tag->ti_Data, allowMultibyte);
1904 break;
1908 // now that we know how many codesets we have in our lists we
1909 // can put their names into our string arrays
1910 if(numCodesets > 0)
1912 if((array = allocArbitrateVecPooled((numCodesets+1)*sizeof(STRPTR))) != NULL)
1914 struct Node *node;
1915 int i=0;
1917 // first we walk through the internal codesets list and
1918 // add the names
1919 for(node = GetHead((struct List *)&CodesetsBase->codesets); node != NULL; node = GetSucc(node))
1921 struct codeset *code = (struct codeset *)node;
1923 if(allowMultibyte == TRUE ||
1924 (code != CodesetsBase->utf8Codeset && code != CodesetsBase->utf16Codeset && code != CodesetsBase->utf32Codeset))
1926 array[i] = code->name;
1927 i++;
1931 // reset the tstate
1932 tstate = attrs;
1934 // then we also iterate through our private codesets list
1935 while((tag = NextTagItem((APTR)&tstate)) != NULL)
1937 switch(tag->ti_Tag)
1939 case CSA_CodesetList:
1941 for(node = GetHead((struct List *)tag->ti_Data); node != NULL; node = GetSucc(node))
1943 struct codeset *code = (struct codeset *)node;
1945 if(allowMultibyte == TRUE ||
1946 (code != CodesetsBase->utf8Codeset && code != CodesetsBase->utf16Codeset && code != CodesetsBase->utf32Codeset))
1948 array[i] = code->name;
1949 i++;
1953 break;
1957 array[i] = NULL;
1961 ReleaseSemaphore(&CodesetsBase->libSem);
1963 RETURN(array);
1964 return array;
1967 #if defined(__amigaos4__)
1968 LIBPROTOVA(CodesetsSupported, STRPTR *, REG(a6, UNUSED __BASE_OR_IFACE), ...)
1970 STRPTR *res;
1971 VA_LIST args;
1973 VA_START(args, ICodesets);
1974 res = CodesetsSupportedA(VA_ARG(args, struct TagItem *));
1975 VA_END(args);
1977 return res;
1979 #endif
1982 /// CodesetsFreeA()
1983 LIBPROTO(CodesetsFreeA, void, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, APTR obj), REG(a1, UNUSED struct TagItem *attrs))
1985 ENTER();
1987 if(obj != NULL)
1988 freeArbitrateVecPooled(obj);
1990 LEAVE();
1993 #if defined(__amigaos4__)
1994 LIBPROTOVA(CodesetsFree, void, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, APTR obj), ...)
1996 VA_LIST args;
1998 VA_START(args, obj);
1999 CodesetsFreeA(obj, VA_ARG(args, struct TagItem *));
2000 VA_END(args);
2002 #endif
2005 /// CodesetsSetDefaultA()
2006 LIBPROTO(CodesetsSetDefaultA, struct codeset *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
2008 struct codeset *codeset;
2010 ENTER();
2012 ObtainSemaphoreShared(&CodesetsBase->libSem);
2014 if((codeset = codesetsFind(&CodesetsBase->codesets, name)) != NULL)
2016 ULONG flags;
2018 flags = GVF_SAVE_VAR;
2019 if(GetTagData(CSA_Save, FALSE, attrs))
2020 SET_FLAG(flags, GVF_GLOBAL_ONLY);
2022 SetVar("codeset_default", codeset->name, strlen(codeset->name), flags);
2025 ReleaseSemaphore(&CodesetsBase->libSem);
2027 RETURN(codeset);
2028 return codeset;
2031 #if defined(__amigaos4__)
2032 LIBPROTOVA(CodesetsSetDefault, struct codeset *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, STRPTR name), ...)
2034 struct codeset *cs;
2035 VA_LIST args;
2037 VA_START(args, name);
2038 cs = CodesetsSetDefaultA(name, VA_ARG(args, struct TagItem *));
2039 VA_END(args);
2041 return cs;
2043 #endif
2046 /// CodesetsFindA()
2047 LIBPROTO(CodesetsFindA, struct codeset *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
2049 struct codeset *codeset = NULL;
2051 ENTER();
2053 ObtainSemaphoreShared(&CodesetsBase->libSem);
2055 // if no name pointer was supplied we have to return
2056 // the default codeset only.
2057 if(name != NULL)
2059 // we first walk through our internal list and check if we
2060 // can find the requested codeset
2061 codeset = codesetsFind(&CodesetsBase->codesets, name);
2063 if(codeset == NULL)
2065 struct TagItem *tstate = attrs;
2066 struct TagItem *tag;
2068 // now we walk through our taglist and check if the user
2069 // supplied
2070 while((tag = NextTagItem((APTR)&tstate)) != NULL)
2072 if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
2074 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
2076 if((codeset = codesetsFind(csList, name)) != NULL)
2077 break;
2083 // check if we found something or not.
2084 if(codeset == NULL && GetTagData(CSA_FallbackToDefault, TRUE, attrs))
2085 codeset = defaultCodeset(FALSE);
2087 ReleaseSemaphore(&CodesetsBase->libSem);
2089 RETURN(codeset);
2090 return codeset;
2093 #if defined(__amigaos4__)
2094 LIBPROTOVA(CodesetsFind, struct codeset *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, STRPTR name), ...)
2096 struct codeset *cs;
2097 VA_LIST args;
2099 VA_START(args, name);
2100 cs = CodesetsFindA(name, VA_ARG(args, struct TagItem *));
2101 VA_END(args);
2103 return cs;
2105 #endif
2108 /// CodesetsFindBestA()
2109 LIBPROTO(CodesetsFindBestA, struct codeset *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
2111 struct codeset *codeset = NULL;
2112 char *text;
2113 ULONG textLen;
2115 ENTER();
2117 ObtainSemaphoreShared(&CodesetsBase->libSem);
2119 text = (char *)GetTagData(CSA_Source, 0, attrs);
2120 textLen = GetTagData(CSA_SourceLen, text != NULL ? strlen(text) : 0, attrs);
2122 if(text != NULL && textLen != 0)
2124 int numErrors = 0;
2125 ULONG csFamily = GetTagData(CSA_CodesetFamily, CSV_CodesetFamily_Latin, attrs);
2126 int *errorPtr = (int *)GetTagData(CSA_ErrPtr, 0, attrs);
2128 codeset = codesetsFindBest(attrs, csFamily, text, textLen, &numErrors);
2130 if(errorPtr != NULL)
2131 *errorPtr = numErrors;
2133 // if we still haven't got the codeset we fallback to the default
2134 if(codeset == NULL && GetTagData(CSA_FallbackToDefault, FALSE, attrs))
2135 codeset = defaultCodeset(FALSE);
2138 ReleaseSemaphore(&CodesetsBase->libSem);
2140 RETURN(codeset);
2141 return codeset;
2144 #if defined(__amigaos4__)
2145 LIBPROTOVA(CodesetsFindBest, struct codeset *, REG(a6, UNUSED __BASE_OR_IFACE), ...)
2147 struct codeset *cs;
2148 VA_LIST args;
2150 VA_START(args, ICodesets);
2151 cs = CodesetsFindBestA(VA_ARG(args, struct TagItem *));
2152 VA_END(args);
2154 return cs;
2156 #endif
2159 /// CodesetsUTF8Len()
2160 // Returns the number of characters a utf8 string has. This is not
2161 // identically with the size of memory is required to hold the string.
2162 LIBPROTO(CodesetsUTF8Len, ULONG, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, const UTF8 *str))
2164 int len = 0;
2165 unsigned char c;
2167 ENTER();
2169 if(str != NULL)
2171 while((c = *str++))
2173 len++;
2174 str += trailingBytesForUTF8[c];
2178 RETURN((ULONG)len);
2179 return (ULONG)len;
2183 /// CodesetsStrLenA()
2184 LIBPROTO(CodesetsStrLenA, ULONG, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, STRPTR str), REG(a1, struct TagItem *attrs))
2186 ULONG res = 0;
2188 ENTER();
2190 if(str != NULL)
2192 struct codeset *codeset;
2193 int len;
2194 STRPTR src;
2195 int utf;
2197 if((codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
2198 codeset = defaultCodeset(TRUE);
2200 if(codeset == CodesetsBase->utf32Codeset)
2202 utf = 32;
2203 len = utf32_strlen((UTF32 *)str);
2205 else if(codeset == CodesetsBase->utf16Codeset)
2207 utf = 16;
2208 len = utf16_strlen((UTF16 *)str);
2210 else
2212 utf = 0;
2213 len = strlen(str);
2216 len = GetTagData(CSA_SourceLen, len, attrs);
2218 src = str;
2220 if(utf != 0)
2222 void *srcend = src + len;
2223 UTF8 *dstlen = NULL;
2224 union TypeAliases srcAlias;
2225 union TypeAliases dstAlias;
2227 srcAlias.strptr = &src;
2228 dstAlias.utf8 = &dstlen;
2230 switch(utf)
2232 case 16:
2233 CodesetsConvertUTF16toUTF8(srcAlias.cutf16, srcend, dstAlias.utf8, NULL, 0);
2234 break;
2236 case 32:
2237 CodesetsConvertUTF32toUTF8(srcAlias.cutf32, srcend, dstAlias.utf8, NULL, 0);
2238 break;
2240 res = (IPTR)dstlen;
2242 else
2244 UBYTE c;
2246 res = 0;
2248 while((c = *src++) != '\0' && len != 0)
2250 res += codeset->table[c].utf8[0];
2251 len--;
2256 RETURN(res);
2257 return res;
2260 #if defined(__amigaos4__)
2261 LIBPROTOVA(CodesetsStrLen, ULONG, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, STRPTR str), ...)
2263 ULONG res;
2264 VA_LIST args;
2266 VA_START(args, str);
2267 res = CodesetsStrLenA(str, VA_ARG(args, struct TagItem *));
2268 VA_END(args);
2270 return res;
2272 #endif
2275 /// CodesetsUTF8ToStrA()
2276 // Converts an UTF8 string to a given charset. Return the number of bytes
2277 // written to dest excluding the NULL byte (which is always ensured by this
2278 // function; it means a NULL str will produce "" as dest; anyway you should
2279 // check NULL str to not waste your time!).
2280 LIBPROTO(CodesetsUTF8ToStrA, STRPTR, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
2282 UTF8 *src;
2283 ULONG srcLen;
2284 ULONG destLen = 0;
2285 ULONG *destLenPtr;
2286 ULONG n = 0;
2287 STRPTR dest = NULL;
2289 ENTER();
2291 if((src = (UTF8 *)GetTagData(CSA_Source, 0, attrs)) != NULL &&
2292 (srcLen = GetTagData(CSA_SourceLen, src != NULL ? strlen((char *)src) : 0, attrs)) > 0)
2294 struct convertMsg msg;
2295 struct codeset *codeset;
2296 struct Hook *destHook;
2297 struct Hook *mapForeignCharsHook;
2298 char buf[256];
2299 STRPTR destIter = NULL;
2300 char *b = NULL;
2301 int i = 0;
2302 unsigned char *s = src;
2303 unsigned char *e = (src+srcLen);
2304 int numConvErrors = 0;
2305 int *numConvErrorsPtr;
2306 BOOL mapForeignChars;
2307 APTR pool = NULL;
2308 struct SignalSemaphore *sem = NULL;
2309 int utf;
2310 ULONG char_size;
2312 // get some more optional attributes
2313 destHook = (struct Hook *)GetTagData(CSA_DestHook, 0, attrs);
2314 destLen = GetTagData(CSA_DestLen, 0, attrs);
2315 numConvErrorsPtr = (int *)GetTagData(CSA_ErrPtr, 0, attrs);
2316 mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2317 mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, 0, attrs);
2319 // get the destination codeset pointer
2320 if((codeset = (struct codeset *)GetTagData(CSA_DestCodeset, 0, attrs)) == NULL)
2321 codeset = defaultCodeset(TRUE);
2322 if(codeset == CodesetsBase->utf32Codeset)
2324 utf = 32;
2325 char_size = 4;
2327 else if(codeset == CodesetsBase->utf16Codeset)
2329 utf = 16;
2330 char_size = 2;
2332 else
2334 utf = 0;
2335 char_size = 1;
2338 // first we make sure we allocate enough memory
2339 // for our destination buffer
2340 if(destHook != NULL)
2342 if(destLen < 16 || destLen > sizeof(buf))
2343 destLen = sizeof(buf);
2345 msg.state = CSV_Translating;
2346 b = buf;
2347 i = 0;
2349 else
2351 // in case the user wants us to dynamically generate the
2352 // destination buffer we do it right now
2353 if((dest = (STRPTR)GetTagData(CSA_Dest, 0, attrs)) == NULL ||
2354 GetTagData(CSA_AllocIfNeeded, TRUE, attrs) != FALSE)
2356 ULONG len = 0;
2358 // calculate the destLen
2359 if(utf)
2361 void *dstlen = NULL;
2362 union TypeAliases srcAlias;
2363 union TypeAliases dstAlias;
2365 srcAlias.uchar = &s;
2366 dstAlias.voidptr = &dstlen;
2368 switch(utf)
2370 case 16:
2371 CodesetsConvertUTF8toUTF16(srcAlias.cutf8, e, dstAlias.utf16, NULL, 0);
2372 break;
2374 case 32:
2375 CodesetsConvertUTF8toUTF32(srcAlias.cutf8, e, dstAlias.utf32, NULL, 0);
2376 break;
2378 len = (IPTR)dstlen;
2380 else
2382 while(s < e)
2384 unsigned char c = *s++;
2386 len++;
2387 s += trailingBytesForUTF8[c];
2391 if(dest == NULL || (destLen < len+1))
2393 if((pool = (APTR)GetTagData(CSA_Pool, 0, attrs)) != NULL)
2395 if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)) != NULL)
2396 ObtainSemaphore(sem);
2398 // allocate the destination buffer
2399 dest = allocVecPooled(pool, len+char_size);
2401 if(sem != NULL)
2402 ReleaseSemaphore(sem);
2404 else
2405 dest = allocArbitrateVecPooled(len+char_size);
2407 destLen = len+char_size;
2410 if(dest == NULL)
2412 RETURN(NULL);
2413 return NULL;
2417 destIter = dest;
2420 // now we convert the src string to the
2421 // destination buffer.
2422 s = src;
2423 if(utf != 0)
2425 void *dstend;
2427 if(destHook != NULL)
2429 ULONG r = CSR_TargetExhausted;
2431 dstend = b + destLen - char_size;
2434 union TypeAliases srcAlias;
2435 union TypeAliases dstAlias;
2437 srcAlias.uchar = &s;
2438 dstAlias.schar = &b;
2440 switch(utf)
2442 case 16:
2443 r = CodesetsConvertUTF8toUTF16(srcAlias.cutf8, e, dstAlias.utf16, dstend, 0);
2444 break;
2446 case 32:
2447 r = CodesetsConvertUTF8toUTF32(srcAlias.cutf8, e, dstAlias.utf32, dstend, 0);
2448 break;
2450 b[0] = 0;
2451 if(char_size > 1)
2452 b[1] = 0;
2453 if(r != CSR_TargetExhausted)
2454 msg.state = CSV_End;
2455 msg.len = b-buf;
2456 CallHookPkt(destHook,&msg,buf);
2458 b = buf;
2459 n += msg.len;
2461 while(r == CSR_TargetExhausted);
2463 else
2465 union TypeAliases srcAlias;
2466 union TypeAliases dstAlias;
2468 srcAlias.uchar = &s;
2469 dstAlias.strptr = &destIter;
2470 dstend = destIter + destLen - char_size;
2471 switch(utf)
2473 case 16:
2474 CodesetsConvertUTF8toUTF16(srcAlias.cutf8, e, dstAlias.utf16, dstend, 0);
2475 break;
2477 case 32:
2478 CodesetsConvertUTF8toUTF32(srcAlias.cutf8, e, dstAlias.utf32, dstend, 0);
2479 break;
2481 n = destIter-dest;
2484 else
2486 for(;;n++)
2488 if(destHook == NULL && n >= destLen-1)
2489 break;
2491 // convert until we reach the end of the
2492 // source buffer.
2493 if(s < e)
2495 unsigned char c = *s;
2496 unsigned char d = '?';
2497 const char *repstr = NULL;
2498 int replen = 0;
2500 // check if the char is a >7bit char
2501 if(c > 127)
2503 struct single_convert *f;
2504 int lenAdd = trailingBytesForUTF8[c];
2505 int lenStr = lenAdd+1;
2506 unsigned char *src = s;
2510 // start each iteration with "no replacement found yet"
2511 repstr = NULL;
2512 replen = 0;
2514 // search in the UTF8 conversion table of the current charset if
2515 // we have a replacement character for the char sequence starting at s
2516 BIN_SEARCH(codeset->table_sorted, 0, 255, strncmp((char *)src, (char *)codeset->table_sorted[m].utf8+1, lenStr), f);
2518 if(f != NULL)
2520 d = f->code;
2521 replen = -1;
2523 break;
2525 else
2527 // the analysed char sequence (s) is not convertable to a
2528 // single visible char replacement, so we normally have to put
2529 // a ? sign as a "unknown char" sign at the very position.
2531 // For convienence we, however, allow users to replace these
2532 // UTF8 characters with char sequences that "looklike" the
2533 // original char.
2534 if(mapForeignChars == TRUE)
2535 replen = mapUTF8toASCII(&repstr, src, lenStr);
2537 // call the hook only, if the internal table yielded no suitable
2538 // replacement
2539 if(replen == 0 && mapForeignCharsHook != NULL)
2541 struct replaceMsg rmsg;
2543 rmsg.dst = (char **)&repstr;
2544 rmsg.src = src;
2545 rmsg.srclen = lenStr;
2546 replen = CallHookPkt(mapForeignCharsHook, &rmsg, NULL);
2549 if(replen < 0)
2551 D(DBF_UTF, "got UTF8 replacement (%ld)", replen);
2553 // stay in the loop as long as one replacement function delivers
2554 // further UTF8 replacement sequences
2555 src = (unsigned char *)repstr;
2556 // remember the length of the replaced string, as we might do another
2557 // iteration in the loop which might result in a further replacement
2558 lenStr = -replen;
2560 else if(replen == 0)
2562 D(DBF_UTF, "found no ASCII replacement for UTF8 string (%ld)", replen);
2563 repstr = NULL;
2565 else
2566 D(DBF_UTF, "got replacement string '%s' (%ld)", repstr ? repstr : "<null>", replen);
2569 while(replen < 0);
2571 if(repstr == NULL || replen == 0)
2573 if(replen >= 0)
2575 d = '?';
2576 numConvErrors++;
2580 s += lenAdd;
2582 else
2583 d = c;
2585 if(destHook != NULL)
2587 if(replen > 1)
2589 while(replen > 0)
2591 *b++ = *repstr;
2592 repstr++;
2593 i++;
2594 replen--;
2596 if(i%(destLen-1)==0)
2598 *b = '\0';
2599 msg.len = i;
2600 CallHookPkt(destHook, &msg, buf);
2602 b = buf;
2603 *b = '\0';
2604 i = 0;
2608 else
2610 *b++ = replen > 0 ? *repstr : d;
2611 i++;
2614 if(i%(destLen-1)==0)
2616 *b = '\0';
2617 msg.len = i;
2618 CallHookPkt(destHook, &msg, buf);
2620 b = buf;
2621 *b = '\0';
2622 i = 0;
2625 else
2627 if(replen > 1)
2629 ULONG destPos = destIter-dest;
2631 if(pool != NULL)
2633 if(sem != NULL)
2634 ObtainSemaphore(sem);
2636 // allocate the destination buffer
2637 dest = reallocVecPooled(pool, dest, destLen, destLen+replen-1);
2639 if(sem != NULL)
2640 ReleaseSemaphore(sem);
2642 else
2643 dest = reallocArbitrateVecPooled(dest, destLen, destLen+replen-1);
2645 if(dest == NULL)
2647 RETURN(NULL);
2648 return NULL;
2651 destIter = dest+destPos;
2652 memcpy(destIter, repstr, replen);
2654 // adjust our loop pointer and destination length
2655 destIter += replen;
2656 destLen += replen-1;
2658 else if(replen == 1)
2659 *destIter++ = *repstr;
2660 else
2661 *destIter++ = d;
2664 s++;
2666 else
2667 break;
2670 if(destHook != NULL)
2672 msg.state = CSV_End;
2673 msg.len = i;
2674 *b = '\0';
2675 CallHookPkt(destHook,&msg,buf);
2677 else
2678 *destIter = '\0';
2681 // let us write the number of conversion errors
2682 // to the proper variable pointer, if wanted
2683 if(numConvErrorsPtr != NULL)
2684 *numConvErrorsPtr = numConvErrors;
2687 // put the final length of our destination buffer
2688 // into the destLenPtr
2689 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)) != NULL)
2691 if(destLen > 0)
2692 *destLenPtr = destLen-1;
2693 else
2694 *destLenPtr = 0;
2697 RETURN(dest);
2698 return dest;
2701 #if defined(__amigaos4__)
2702 LIBPROTOVA(CodesetsUTF8ToStr, STRPTR, REG(a6, UNUSED __BASE_OR_IFACE), ...)
2704 STRPTR res;
2705 VA_LIST args;
2707 VA_START(args, ICodesets);
2708 res = CodesetsUTF8ToStrA(VA_ARG(args, struct TagItem *));
2709 VA_END(args);
2711 return res;
2713 #endif
2716 /// CodesetsUTF8CreateA()
2717 // Converts a string and a charset to an UTF8. Returns the UTF8.
2718 // If a destination hook is supplied always return 0.
2719 // If from is NULL, it returns NULL and doesn't call the hook.
2720 LIBPROTO(CodesetsUTF8CreateA, UTF8 *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
2722 UTF8 *from;
2723 UTF8 *dest;
2724 struct codeset *codeset;
2725 ULONG fromLen, *destLenPtr;
2726 ULONG n;
2727 int utf;
2729 ENTER();
2731 dest = NULL;
2732 n = 0;
2734 if((codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
2735 codeset = defaultCodeset(TRUE);
2736 if(codeset == CodesetsBase->utf32Codeset)
2737 utf = 32;
2738 else if(codeset == CodesetsBase->utf16Codeset)
2739 utf = 16;
2740 else
2741 utf = 0;
2743 from = (UTF8 *)GetTagData(CSA_Source, 0, attrs);
2744 if(from != NULL)
2746 switch(utf)
2748 case 32:
2749 fromLen = utf32_strlen((UTF32 *)from);
2750 break;
2752 case 16:
2753 fromLen = utf16_strlen((UTF16 *)from);
2754 break;
2756 default:
2757 fromLen = strlen((char *)from);
2758 break;
2761 else
2762 fromLen = 0;
2763 fromLen = GetTagData(CSA_SourceLen, fromLen, attrs);
2765 if(from != NULL && fromLen != 0)
2767 struct convertMsg msg;
2768 struct Hook *hook;
2769 ULONG destLen;
2770 int i = 0;
2771 TEXT buf[256];
2772 STRPTR src, destPtr = NULL, b = NULL;
2773 ULONG c;
2775 hook = (struct Hook *)GetTagData(CSA_DestHook, 0, attrs);
2776 destLen = GetTagData(CSA_DestLen, 0, attrs);
2778 if(hook != NULL)
2780 if(destLen<16 || destLen>sizeof(buf))
2781 destLen = sizeof(buf);
2783 msg.state = CSV_Translating;
2784 b = buf;
2785 i = 0;
2787 else
2789 if((dest = (UTF8 *)GetTagData(CSA_Dest, 0, attrs)) != NULL ||
2790 GetTagData(CSA_AllocIfNeeded, TRUE, attrs))
2792 ULONG len;
2794 src = (STRPTR)from;
2796 if(utf != 0)
2798 void *srcend = src + fromLen;
2799 UTF8 *dstlen = NULL;
2800 union TypeAliases srcAlias;
2801 union TypeAliases dstAlias;
2803 srcAlias.strptr = &src;
2804 dstAlias.utf8 = &dstlen;
2806 switch(utf)
2808 case 16:
2809 CodesetsConvertUTF16toUTF8(srcAlias.cutf16, srcend, dstAlias.utf8, NULL, 0);
2810 break;
2812 case 32:
2813 CodesetsConvertUTF32toUTF8(srcAlias.cutf32, srcend, dstAlias.utf8, NULL, 0);
2814 break;
2816 len = (IPTR)dstlen;
2818 else
2820 ULONG flen = fromLen;
2822 len = 0;
2823 while((c = *src++) != '\0' && flen != 0)
2825 len += codeset->table[c].utf8[0];
2826 flen--;
2829 D(DBF_UTF, "Calculated output UTF-8 buffer length: %lu", len);
2831 if(dest == NULL || (destLen<len+1))
2833 APTR pool;
2834 struct SignalSemaphore *sem;
2836 if((pool = (APTR)GetTagData(CSA_Pool, 0, attrs)) != NULL)
2838 if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)) != NULL)
2839 ObtainSemaphore(sem);
2841 // allocate the destination buffer
2842 dest = allocVecPooled(pool,len+1);
2844 if(sem != NULL)
2845 ReleaseSemaphore(sem);
2847 else
2848 dest = allocArbitrateVecPooled(len+1);
2850 destLen = len;
2853 if(dest == NULL)
2855 RETURN(NULL);
2856 return NULL;
2860 destPtr = (STRPTR)dest;
2863 src = (STRPTR)from;
2864 if(utf != 0)
2866 void *srcend = src + fromLen;
2867 UTF8 *dstend;
2869 if(hook != NULL)
2871 ULONG r = CSR_TargetExhausted;
2872 union TypeAliases srcAlias;
2873 union TypeAliases dstAlias;
2875 srcAlias.strptr = &src;
2876 dstAlias.strptr = &b;
2877 dstend = (UTF8 *)(b + destLen - 1);
2880 switch(utf)
2882 case 16:
2883 r = CodesetsConvertUTF16toUTF8(srcAlias.cutf16, srcend, dstAlias.utf8, dstend, 0);
2884 break;
2886 case 32:
2887 r = CodesetsConvertUTF32toUTF8(srcAlias.cutf32, srcend, dstAlias.utf8, dstend, 0);
2888 break;
2890 *b = 0;
2891 if(r != CSR_TargetExhausted)
2892 msg.state = CSV_End;
2893 msg.len = b-buf;
2894 CallHookPkt(hook,&msg,buf);
2896 b = buf;
2897 n += msg.len;
2899 while(r == CSR_TargetExhausted);
2901 else
2903 union TypeAliases srcAlias;
2904 union TypeAliases dstAlias;
2906 srcAlias.strptr = &src;
2907 dstAlias.strptr = &destPtr;
2908 dstend = (UTF8 *)(destPtr + destLen);
2909 switch(utf)
2911 case 16:
2912 CodesetsConvertUTF16toUTF8(srcAlias.cutf16, srcend, dstAlias.utf8, dstend, 0);
2913 break;
2915 case 32:
2916 CodesetsConvertUTF32toUTF8(srcAlias.cutf32, srcend, dstAlias.utf8, dstend, 0);
2917 break;
2919 n = destPtr-(STRPTR)dest;
2922 else
2924 for(; fromLen && (c = *src); src++, fromLen--)
2926 UTF8 *utf8_seq;
2928 for(utf8_seq = &codeset->table[c].utf8[1]; (c = *utf8_seq); utf8_seq++)
2930 if(hook != NULL)
2932 *b++ = c;
2933 i++;
2935 if(i%(destLen-1)==0)
2937 *b = 0;
2938 msg.len = i;
2939 CallHookPkt(hook,&msg,buf);
2941 b = buf;
2942 *b = 0;
2943 i = 0;
2946 else
2948 if(n>=destLen)
2949 break;
2951 *destPtr++ = c;
2954 n++;
2958 if(hook != NULL)
2960 msg.state = CSV_End;
2961 msg.len = i;
2962 *b = 0;
2963 CallHookPkt(hook,&msg,buf);
2965 else
2967 *destPtr = 0;
2972 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)) != NULL)
2973 *destLenPtr = n;
2975 RETURN(dest);
2976 return dest;
2979 #if defined(__amigaos4__)
2980 LIBPROTOVA(CodesetsUTF8Create, UTF8 *, REG(a6, UNUSED __BASE_OR_IFACE), ...)
2982 UTF8 *res;
2983 VA_LIST args;
2985 VA_START(args, ICodesets);
2986 res = CodesetsUTF8CreateA(VA_ARG(args, struct TagItem *));
2987 VA_END(args);
2989 return res;
2991 #endif
2994 /// CodesetsIsValidUTF8()
2995 #define GOOD_UCS(c) \
2996 ((c) >= 160 && ((c) & ~0x3ff) != 0xd800 && \
2997 (c) != 0xfeff && (c) != 0xfffe && (c) != 0xffff)
2999 LIBPROTO(CodesetsIsValidUTF8, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, CONST_STRPTR s))
3001 CONST_STRPTR t = s;
3002 int n;
3004 ENTER();
3006 while((n = parseUtf8(&t)) != 0)
3008 if(!GOOD_UCS(n))
3010 RETURN(FALSE);
3011 return FALSE;
3015 RETURN(TRUE);
3016 return TRUE;
3020 /// CodesetsConvertStrA()
3021 // Converts a given string from one source Codeset to a given destination
3022 // codeset and returns the convert string
3023 LIBPROTO(CodesetsConvertStrA, STRPTR, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
3025 struct codeset *srcCodeset;
3026 STRPTR srcStr = NULL;
3027 STRPTR dstStr = NULL;
3028 ULONG srcLen = 0;
3029 ULONG dstLen = 0;
3030 ULONG charSize = 0;
3032 ENTER();
3034 // get the ptr to the src string we want to convert
3035 // from the source codeset to the dest codeset.
3036 srcStr = (STRPTR)GetTagData(CSA_Source, 0, attrs);
3038 // get the pointer to the codeset in which the src string is encoded
3039 if((srcCodeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
3040 srcCodeset = defaultCodeset(TRUE);
3042 if(srcStr != NULL)
3044 if(srcCodeset == CodesetsBase->utf32Codeset)
3046 srcLen = utf32_strlen((UTF32 *)srcStr);
3047 charSize = sizeof(UTF32);
3049 else if(srcCodeset == CodesetsBase->utf16Codeset)
3051 srcLen = utf16_strlen((UTF16 *)srcStr);
3052 charSize = sizeof(UTF16);
3054 else
3056 srcLen = strlen(srcStr);
3057 charSize = sizeof(char);
3060 else
3061 srcLen = 0;
3062 srcLen = GetTagData(CSA_SourceLen, srcLen, attrs);
3064 if(srcStr != NULL && srcLen > 0)
3066 struct codeset *dstCodeset;
3068 // get the pointer to the codeset in which the dst string should be encoded
3069 if((dstCodeset = (struct codeset *)GetTagData(CSA_DestCodeset, 0, attrs)) == NULL)
3070 dstCodeset = defaultCodeset(TRUE);
3072 D(DBF_UTF, "srcCodeset: '%s' dstCodeset: '%s'", srcCodeset->name, dstCodeset->name);
3074 if(srcCodeset != NULL && dstCodeset != NULL)
3076 // check that the user didn't supplied the very same codeset
3077 // or otherwise a conversion is not required.
3078 if(srcCodeset != dstCodeset)
3080 BOOL utf8Create = FALSE;
3081 BOOL strCreate = FALSE;
3082 UTF8 *utf8str;
3083 ULONG utf8strLen = 0;
3084 ULONG *destLenPtr = NULL;
3085 BOOL mapForeignChars;
3086 struct Hook *mapForeignCharsHook;
3088 mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
3089 mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, 0, attrs);
3091 // if the source codeset is UTF-8 we don't have to use the UTF8Create()
3092 // function and can directly call the UTF8ToStr() function
3093 if(srcCodeset != CodesetsBase->utf8Codeset)
3095 struct TagItem tags[] = { { CSA_SourceCodeset, (IPTR)srcCodeset },
3096 { CSA_Source, (IPTR)srcStr },
3097 { CSA_SourceLen, srcLen },
3098 { CSA_DestLenPtr, (IPTR)&utf8strLen },
3099 { TAG_DONE, 0 } };
3101 utf8str = CodesetsUTF8CreateA((struct TagItem *)&tags[0]);
3103 utf8Create = TRUE;
3105 else
3107 utf8str = (UTF8 *)srcStr;
3108 utf8strLen = srcLen;
3111 // in case the destination codeset is UTF-8 we don't have to actually
3112 // use the UTF8ToStr() function and can immediately return our
3113 // UTF8 string
3114 if(utf8str != NULL && utf8strLen > 0 && dstCodeset != CodesetsBase->utf8Codeset)
3116 struct TagItem tags[] = { { CSA_DestCodeset, (IPTR)dstCodeset },
3117 { CSA_Source, (IPTR)utf8str },
3118 { CSA_SourceLen, utf8strLen },
3119 { CSA_DestLenPtr, (IPTR)&dstLen },
3120 { CSA_MapForeignChars, mapForeignChars },
3121 { CSA_MapForeignCharsHook, (IPTR)mapForeignCharsHook },
3122 { TAG_DONE, 0 } };
3124 dstStr = CodesetsUTF8ToStrA((struct TagItem *)&tags[0]);
3126 strCreate = TRUE;
3128 else
3130 dstStr = (STRPTR)utf8str;
3131 dstLen = utf8strLen;
3134 D(DBF_UTF, "srcStr: %lx srcLen: %ld dstStr: %lx dstLen: %ld utf8create: %ld strCreate: %ld", srcStr, srcLen,
3135 dstStr, dstLen,
3136 utf8Create,
3137 strCreate);
3139 // if everything was successfull we can go and finalize everything
3140 if(dstStr != NULL && utf8str != NULL)
3142 // as the conversion was a two way pass we have to either free the
3143 // memory of the utf8 string or not
3144 if(utf8Create == TRUE && strCreate == TRUE)
3145 CodesetsFreeA(utf8str, NULL);
3147 // if the user wants to be informed abour the length
3148 // of our destination string we store the length now in the supplied ptr.
3149 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)) != NULL)
3150 *destLenPtr = dstLen;
3152 D(DBF_UTF, "successfully converted string with len %ld", dstLen);
3154 else
3156 W(DBF_ALWAYS, "an error occurred while trying to convert a string");
3158 // free all memory in case the conversion didn't work out
3159 if(utf8Create == TRUE && utf8str != NULL)
3160 CodesetsFreeA(utf8str, NULL);
3162 if(strCreate == TRUE && dstStr != NULL)
3163 CodesetsFreeA(dstStr, NULL);
3165 dstStr = NULL;
3168 else
3170 // we got the same source and destination codesets passed in
3171 // instead of failing silently we just create a copy of the source string
3172 ULONG *destLenPtr = NULL;
3174 // allocate memory for the destination string, including a trailing NUL byte
3175 if((dstStr = allocArbitrateVecPooled(srcLen + charSize)) != NULL)
3177 // just copy the source string without any further modification
3178 // we must use memcpy() as the source string could be UTF16/32 encoded and
3179 // thus strcpy() would not do what we want.
3180 memcpy(dstStr, srcStr, srcLen + charSize);
3181 dstLen = srcLen;
3182 D(DBF_UTF, "successfully copied string with len %ld", dstLen);
3184 else
3185 W(DBF_ALWAYS, "no memory for dest string");
3187 // if the user wants to be informed abour the length
3188 // of our destination string we store the length now in the supplied ptr.
3189 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)) != NULL)
3190 *destLenPtr = dstLen;
3195 RETURN(dstStr);
3196 return dstStr;
3199 #if defined(__amigaos4__)
3200 LIBPROTOVA(CodesetsConvertStr, STRPTR, REG(a6, UNUSED __BASE_OR_IFACE), ...)
3202 STRPTR res;
3203 VA_LIST args;
3205 VA_START(args, ICodesets);
3206 res = CodesetsConvertStrA(VA_ARG(args, struct TagItem *));
3207 VA_END(args);
3209 return res;
3211 #endif
3214 /// CodesetsFreeVecPooledA()
3215 LIBPROTO(CodesetsFreeVecPooledA, void, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, APTR pool), REG(a1, APTR mem), REG(a2, struct TagItem *attrs))
3217 ENTER();
3219 if(pool != NULL && mem != NULL)
3221 struct SignalSemaphore *sem;
3223 if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)) != NULL)
3224 ObtainSemaphore(sem);
3226 freeVecPooled(pool,mem);
3228 if(sem != NULL)
3229 ReleaseSemaphore(sem);
3232 LEAVE();
3235 #if defined(__amigaos4__)
3236 LIBPROTOVA(CodesetsFreeVecPooled, void, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, APTR pool), REG(a1, APTR mem), ...)
3238 VA_LIST args;
3240 VA_START(args, mem);
3241 CodesetsFreeVecPooledA(pool, mem, VA_ARG(args, struct TagItem *));
3242 VA_END(args);
3244 #endif
3247 /// CodesetsListCreateA()
3248 LIBPROTO(CodesetsListCreateA, struct codesetList *, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
3250 struct codesetList *csList = NULL;
3252 ENTER();
3254 // no matter what, we create a codesets list we will return to the user
3255 if((csList = allocArbitrateVecPooled(sizeof(struct codesetList))) != NULL)
3257 BOOL scanProgDir = TRUE;
3258 struct TagItem *tstate = attrs;
3259 struct TagItem *tag;
3261 // initialize the new private codeset list and put it into a separate list
3262 NewList((struct List *)csList);
3264 // first we get the path of the directory from which we go
3265 // and scan for charset tables from
3266 while((tag = NextTagItem((APTR)&tstate)) != NULL)
3268 switch(tag->ti_Tag)
3270 case CSA_CodesetDir:
3272 codesetsScanDir(csList, (STRPTR)tag->ti_Data);
3274 scanProgDir = FALSE;
3276 break;
3278 case CSA_CodesetFile:
3280 codesetsReadTable(csList, (STRPTR)tag->ti_Data);
3282 scanProgDir = FALSE;
3284 break;
3286 case CSA_SourceCodeset:
3288 struct codeset *cs = (struct codeset *)tag->ti_Data;
3290 AddTail((struct List *)csList, (struct Node *)&cs->node);
3292 scanProgDir = FALSE;
3294 break;
3298 // in case the user also wants us to scan PROGDIR:
3299 // we do so
3300 if(scanProgDir == TRUE)
3301 codesetsScanDir(csList, "PROGDIR:Charsets");
3304 RETURN(csList);
3305 return csList;
3308 #if defined(__amigaos4__)
3309 LIBPROTOVA(CodesetsListCreate, struct codesetList *, REG(a6, UNUSED __BASE_OR_IFACE), ...)
3311 struct codesetList *res;
3312 VA_LIST args;
3314 VA_START(args, ICodesets);
3315 res = CodesetsListCreateA(VA_ARG(args, struct TagItem *));
3316 VA_END(args);
3318 return res;
3320 #endif
3323 /// CodesetsListDeleteA()
3324 LIBPROTO(CodesetsListDeleteA, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
3326 BOOL result = FALSE;
3327 struct TagItem *tstate = attrs;
3328 struct TagItem *tag;
3329 BOOL freeCodesets;
3331 ENTER();
3333 // check if the caller wants us also to free the codesets
3334 freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3336 // now we iterate through or tagItems and see what the
3337 // user wants to remove from the list
3338 while((tag = NextTagItem((APTR)&tstate)) != NULL)
3340 switch(tag->ti_Tag)
3342 case CSA_CodesetList:
3344 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
3346 if(csList != NULL)
3348 // cleanup the codesets within the list
3349 if(freeCodesets == TRUE)
3350 codesetsCleanup(csList);
3352 // then free the list itICodesets
3353 freeArbitrateVecPooled(csList);
3355 result = TRUE;
3358 break;
3362 RETURN(result);
3363 return result;
3366 #if defined(__amigaos4__)
3367 LIBPROTOVA(CodesetsListDelete, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), ...)
3369 BOOL result;
3370 VA_LIST args;
3372 VA_START(args, ICodesets);
3373 result = CodesetsListDeleteA(VA_ARG(args, struct TagItem *));
3374 VA_END(args);
3376 return result;
3378 #endif
3381 /// CodesetsListAddA()
3382 LIBPROTO(CodesetsListAddA, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct codesetList *csList), REG(a1, struct TagItem *attrs))
3384 BOOL result = FALSE;
3386 ENTER();
3388 if(csList != NULL)
3390 struct TagItem *tstate = attrs;
3391 struct TagItem *tag;
3393 // now we iterate through or tagItems and see if the user
3394 // wants to scan a whole directory or just adds a file.
3395 while((tag = NextTagItem((APTR)&tstate)) != NULL)
3397 switch(tag->ti_Tag)
3399 case CSA_CodesetDir:
3401 codesetsScanDir(csList, (STRPTR)tag->ti_Data);
3402 result = TRUE;
3404 break;
3406 case CSA_CodesetFile:
3408 codesetsReadTable(csList, (STRPTR)tag->ti_Data);
3409 result = TRUE;
3411 break;
3413 case CSA_SourceCodeset:
3415 struct codeset *cs = (struct codeset *)tag->ti_Data;
3417 AddTail((struct List *)csList, (struct Node *)&cs->node);
3418 result = TRUE;
3420 break;
3425 RETURN(result);
3426 return result;
3429 #if defined(__amigaos4__)
3430 LIBPROTOVA(CodesetsListAdd, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct codesetList *csList), ...)
3432 BOOL result;
3433 VA_LIST args;
3435 VA_START(args, csList);
3436 result = CodesetsListAddA(csList, VA_ARG(args, struct TagItem *));
3437 VA_END(args);
3439 return result;
3441 #endif
3444 /// CodesetsListRemoveA()
3445 LIBPROTO(CodesetsListRemoveA, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), REG(a0, struct TagItem *attrs))
3447 BOOL result = FALSE;
3448 struct TagItem *tstate = attrs;
3449 struct TagItem *tag;
3450 BOOL freeCodesets;
3452 ENTER();
3454 // check if the caller wants us also to free the codesets
3455 freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3457 // now we iterate through or tagItems and see what the
3458 // user wants to remove from the list
3459 while((tag = NextTagItem((APTR)&tstate)) != NULL)
3461 switch(tag->ti_Tag)
3463 case CSA_SourceCodeset:
3465 struct codeset *removeCS = (struct codeset *)tag->ti_Data;
3467 if(removeCS != NULL)
3469 struct Node *node;
3470 BOOL isExternalNode = TRUE;
3472 ObtainSemaphore(&CodesetsBase->libSem);
3474 // iterate over our internal list an check whether the given
3475 // node is part of that list
3476 for(node = GetHead((struct List *)&CodesetsBase->codesets); node != NULL; node = GetSucc(node))
3478 if((struct codeset *)node == removeCS)
3480 isExternalNode = FALSE;
3481 break;
3485 ReleaseSemaphore(&CodesetsBase->libSem);
3487 if(isExternalNode == TRUE)
3489 Remove((struct Node *)removeCS);
3491 // free all codesets data if requested
3492 if(freeCodesets == TRUE)
3494 if(removeCS->name != NULL)
3495 freeArbitrateVecPooled(removeCS->name);
3496 if(removeCS->alt_name != NULL)
3497 freeArbitrateVecPooled(removeCS->alt_name);
3498 if(removeCS->characterization != NULL)
3499 freeArbitrateVecPooled(removeCS->characterization);
3501 freeArbitrateVecPooled(removeCS);
3504 result = TRUE;
3506 else
3507 W(DBF_ALWAYS, "user tried to remove an internal codeset!");
3510 break;
3514 RETURN(result);
3515 return result;
3518 #if defined(__amigaos4__)
3519 LIBPROTOVA(CodesetsListRemove, BOOL, REG(a6, UNUSED __BASE_OR_IFACE), ...)
3521 BOOL result;
3522 VA_LIST args;
3524 VA_START(args, ICodesets);
3525 result = CodesetsListRemoveA(VA_ARG(args, struct TagItem *));
3526 VA_END(args);
3528 return result;
3530 #endif
3534 /**************************************************************************/