workbench/libs/codesetslib/src/codesets.c

   1 /***************************************************************************
   2
   3  codesets.library - Amiga shared library for handling different codesets
   4  Copyright (C) 2001-2005 by Alfonso [alfie] Ranieri <alforan@tin.it>.
   5  Copyright (C) 2005-2009 by codesets.library Open Source Team
   6
   7  This library is free software; you can redistribute it and/or
   8  modify it under the terms of the GNU Lesser General Public
   9  License as published by the Free Software Foundation; either
  10  version 2.1 of the License, or (at your option) any later version.
  11
  12  This library is distributed in the hope that it will be useful,
  13  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  Lesser General Public License for more details.
  16
  17  codesets.library project: http://sourceforge.net/projects/codesetslib/
  18
  19  Most of the code included in this file was relicensed from GPL to LGPL
  20  from the source code of SimpleMail (http://www.sf.net/projects/simplemail)
  21  with full permissions by its authors.
  22
  23  $Id$
  24
  25 ***************************************************************************/
  26
  27 #include "lib.h"
  28
  29 #include <clib/alib_protos.h>
  30
  31 #include <diskfont/glyph.h>
  32 #include <diskfont/diskfonttag.h>
  33 #include <proto/diskfont.h>
  34 #include <ctype.h>
  35 #include <limits.h>
  36
  37 #ifdef __MORPHOS__
  38 #include <proto/keymap.h>
  39 #include <proto/locale.h>
  40 #endif
  41
  42 #include "codesets_table.h"
  43 #include "convertUTF.h"
  44 #include "codepages.h"
  45
  46 #include "SDI_stdarg.h"
  47
  48 #include "debug.h"
  49
  50 /**************************************************************************/
  51
  52 /// BIN_SEARCH()
  53 // search a sorted array in O(log n) e.g.
  54 // BIN_SEARCH(strings,0,sizeof(strings)/sizeof(strings[0]),strcmp(key,array[mid]),res);
  55 #define BIN_SEARCH(array,low,high,compare,result) \
  56   {\
  57     int l = low;\
  58     int h = high;\
  59     int m = (low+high)/2;\
  60     result = NULL;\
  61     while (l<=h)\
  62     {\
  63       int d = compare;\
  64       if (!d){ result = &array[m]; break; }\
  65       if (d < 0) h = m - 1;\
  66       else l = m + 1;\
  67       m = (l + h)/2;\
  68     }\
  69   }
  70
  71 ///
  72 /// mystrdup()
  73 static STRPTR
  74 mystrdup(const char *str)
  75 {
  76   STRPTR newStr = NULL;
  77
  78   ENTER();
  79
  80   if(str != NULL)
  81   {
  82     int len;
  83
  84     if((len = strlen(str)) > 0)
  85     {
  86       if((newStr = allocArbitrateVecPooled(len+1)) != NULL)
  87         strlcpy(newStr, str, len+1);
  88     }
  89   }
  90
  91   RETURN(newStr);
  92   return newStr;
  93 }
  94 ///
  95 /// mystrndup()
  96 static STRPTR
  97 mystrndup(const char *str1, int n)
  98 {
  99   STRPTR dest;
 100
 101   ENTER();
 102
 103   if((dest = allocArbitrateVecPooled(n+1)) != NULL)
 104   {
 105     if(str1 != NULL)
 106       strlcpy(dest, str1, n+1);
 107     else
 108       dest[0] = '\0';
 109
 110     dest[n] = '\0';
 111   }
 112
 113   RETURN(dest);
 114   return dest;
 115 }
 116 ///
 117 /// readLine()
 118 static ULONG
 119 readLine(BPTR fh, char *buf, ULONG size)
 120 {
 121   char *c;
 122
 123   ENTER();
 124
 125   if((c = FGets(fh, buf, size)) == NULL)
 126   {
 127     RETURN(FALSE);
 128     return FALSE;
 129   }
 130
 131   for(; *c; c++)
 132   {
 133     if(*c == '\n' || *c == '\r')
 134     {
 135       *c = '\0';
 136       break;
 137     }
 138   }
 139
 140   RETURN(TRUE);
 141   return TRUE;
 142 }
 143 ///
 144 /// getConfigItem()
 145 static const char * getConfigItem(const char *buf, const char *item, int len)
 146 {
 147   ENTER();
 148
 149   if(strnicmp(buf, item, len) == 0)
 150   {
 151     UBYTE c;
 152
 153     buf += len;
 154
 155     /* skip spaces */
 156     while((c = *buf) != '\0' && isspace(c))
 157       buf++;
 158
 159     if(*buf != '=')
 160     {
 161       RETURN(NULL);
 162       return NULL;
 163     }
 164
 165     buf++;
 166
 167     /* skip spaces */
 168     while((c = *buf) != '\0'  && isspace(c))
 169       buf++;
 170
 171     RETURN(buf);
 172     return buf;
 173   }
 174
 175   RETURN(NULL);
 176   return NULL;
 177 }
 178 ///
 179 /// parseUtf8()
 180 static int
 181 parseUtf8(STRPTR *ps)
 182 {
 183   STRPTR s = *ps;
 184   int    wc, n, i;
 185
 186   ENTER();
 187
 188   if(*s<0x80)
 189   {
 190     *ps = s+1;
 191
 192     RETURN(*s);
 193     return *s;
 194   }
 195
 196   if(*s<0xc2)
 197   {
 198     RETURN(-1);
 199     return -1;
 200   }
 201   else
 202   {
 203     if(*s<0xe0)
 204     {
 205       if((s[1] & 0xc0)!=0x80)
 206       {
 207         RETURN(-1);
 208         return -1;
 209       }
 210
 211       *ps = s+2;
 212
 213       RETURN(((s[0] & 0x1f)<<6) | (s[1] & 0x3f));
 214       return ((s[0] & 0x1f)<<6) | (s[1] & 0x3f);
 215     }
 216     else
 217     {
 218       if(*s<0xf0)
 219       {
 220         n = 3;
 221       }
 222       else
 223       {
 224         if(*s<0xf8)
 225         {
 226           n = 4;
 227         }
 228         else
 229         {
 230           if(*s<0xfc)
 231           {
 232             n = 5;
 233           }
 234           else
 235           {
 236             if(*s<0xfe)
 237             {
 238               n = 6;
 239             }
 240             else
 241             {
 242               RETURN(-1);
 243               return -1;
 244             }
 245           }
 246         }
 247       }
 248     }
 249   }
 250
 251   wc = *s++ & ((1<<(7-n))-1);
 252
 253   for(i = 1; i<n; i++)
 254   {
 255     if((*s & 0xc0) != 0x80)
 256     {
 257       RETURN(-1);
 258       return -1;
 259     }
 260
 261     wc = (wc << 6) | (*s++ & 0x3f);
 262   }
 263
 264   if(wc < (1 << (5 * n - 4)))
 265   {
 266     RETURN(-1);
 267     return -1;
 268   }
 269
 270   *ps = s;
 271
 272   RETURN(wc);
 273   return wc;
 274 }
 275
 276 ///
 277 /// countCodesets()
 278 static int
 279 countCodesets(struct codesetList *csList)
 280 {
 281   struct MinNode *node, *succ;
 282   int num;
 283
 284   for(node = csList->list.mlh_Head, num = 0; (succ = node->mln_Succ); node = succ)
 285     ++num;
 286
 287   return num;
 288 }
 289
 290 ///
 291 /// mapUTF8toASCII()
 292 // in case some UTF8 sequences can not be converted during CodesetsUTF8ToStrA(), this
 293 // function is used to replace these unknown sequences with lookalike characters that
 294 // still make the text more readable. For more replacement see
 295 // http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
 296 //
 297 // The conversion table in this function is partly borrowed from the awebcharset plugin
 298 // written by Frank Weber. See http://cvs.sunsite.dk/viewcvs.cgi/aweb/plugins/charset/awebcharset.c
 299 //
 300 struct UTF8Replacement
 301 {
 302   const char *utf8;     // the original UTF8 string we are going to replace
 303   const int utf8len;    // the length of the UTF8 string
 304   const char *rep;      // pointer to the replacement string
 305   const int replen;     // the length of the replacement string (minus for signalling an UTF8 string)
 306 };
 307
 308 static int compareUTF8Replacements(const void *p1, const void *p2)
 309 {
 310   struct UTF8Replacement *key = (struct UTF8Replacement *)p1;
 311   struct UTF8Replacement *rep = (struct UTF8Replacement *)p2;
 312   int cmp;
 313
 314   // compare the length first, after that compare the strings
 315   cmp = key->utf8len - rep->utf8len;
 316   if(cmp == 0)
 317     cmp = memcmp(key->utf8, rep->utf8, key->utf8len);
 318
 319   return cmp;
 320 }
 321
 322 static int mapUTF8toASCII(const char **dst, const unsigned char *src, const int utf8len)
 323 {
 324   int len = 0;
 325   struct UTF8Replacement key = { (char *)src, utf8len, NULL, 0 };
 326   struct UTF8Replacement *rep;
 327
 328   static struct UTF8Replacement const utf8map[] =
 329   {
 330     // U+0100 ... U+017F (Latin Extended-A)
 331     { "\xC4\x80", 2, "A",         1 }, // U+0100 -> A       (LATIN CAPITAL LETTER A WITH MACRON)
 332     { "\xC4\x81", 2, "a",         1 }, // U+0101 -> a       (LATIN SMALL LETTER A WITH MACRON)
 333     { "\xC4\x82", 2, "A",         1 }, // U+0102 -> A       (LATIN CAPITAL LETTER A WITH BREVE)
 334     { "\xC4\x83", 2, "a",         1 }, // U+0103 -> a       (LATIN SMALL LETTER A WITH BREVE)
 335     { "\xC4\x84", 2, "A",         1 }, // U+0104 -> A       (LATIN CAPITAL LETTER A WITH OGONEK)
 336     { "\xC4\x85", 2, "a",         1 }, // U+0105 -> a       (LATIN SMALL LETTER A WITH OGONEK)
 337     { "\xC4\x86", 2, "C",         1 }, // U+0106 -> C       (LATIN CAPITAL LETTER C WITH ACUTE)
 338     { "\xC4\x87", 2, "c",         1 }, // U+0107 -> c       (LATIN SMALL LETTER C WITH ACUTE)
 339     { "\xC4\x88", 2, "C",         1 }, // U+0108 -> C       (LATIN CAPITAL LETTER C WITH CIRCUMFLEX)
 340     { "\xC4\x89", 2, "c",         1 }, // U+0109 -> c       (LATIN SMALL LETTER C WITH CIRCUMFLEX)
 341     { "\xC4\x8A", 2, "C",         1 }, // U+010A -> C       (LATIN CAPITAL LETTER C WITH DOT ABOVE)
 342     { "\xC4\x8B", 2, "c",         1 }, // U+010B -> c       (LATIN SMALL LETTER C WITH DOT ABOVE)
 343     { "\xC4\x8C", 2, "C",         1 }, // U+010C -> C       (LATIN CAPITAL LETTER C WITH CARON)
 344     { "\xC4\x8D", 2, "c",         1 }, // U+010D -> c       (LATIN SMALL LETTER C WITH CARON)
 345     { "\xC4\x8E", 2, "D",         1 }, // U+010E -> D       (LATIN CAPITAL LETTER D WITH CARON)
 346     { "\xC4\x8F", 2, "d",         1 }, // U+010F -> d       (LATIN SMALL LETTER D WITH CARON)
 347     { "\xC4\x90", 2, "D",         1 }, // U+0110 -> D       (LATIN CAPITAL LETTER D WITH STROKE)
 348     { "\xC4\x91", 2, "d",         1 }, // U+0111 -> d       (LATIN SMALL LETTER D WITH STROKE)
 349     { "\xC4\x92", 2, "E",         1 }, // U+0112 -> E       (LATIN CAPITAL LETTER E WITH MACRON)
 350     { "\xC4\x93", 2, "e",         1 }, // U+0113 -> e       (LATIN SMALL LETTER E WITH MACRON)
 351     { "\xC4\x94", 2, "E",         1 }, // U+0114 -> E       (LATIN CAPITAL LETTER E WITH BREVE)
 352     { "\xC4\x95", 2, "e",         1 }, // U+0115 -> e       (LATIN SMALL LETTER E WITH BREVE)
 353     { "\xC4\x96", 2, "E",         1 }, // U+0116 -> E       (LATIN CAPITAL LETTER E WITH DOT ABOVE)
 354     { "\xC4\x97", 2, "e",         1 }, // U+0117 -> e       (LATIN SMALL LETTER E WITH DOT ABOVE)
 355     { "\xC4\x98", 2, "E",         1 }, // U+0118 -> E       (LATIN CAPITAL LETTER E WITH OGONEK)
 356     { "\xC4\x99", 2, "e",         1 }, // U+0119 -> e       (LATIN SMALL LETTER E WITH OGONEK)
 357     { "\xC4\x9A", 2, "E",         1 }, // U+011A -> E       (LATIN CAPITAL LETTER E WITH CARON)
 358     { "\xC4\x9B", 2, "e",         1 }, // U+011B -> e       (LATIN SMALL LETTER E WITH CARON)
 359     { "\xC4\x9C", 2, "G",         1 }, // U+011C -> G       (LATIN CAPITAL LETTER G WITH CIRCUMFLEX)
 360     { "\xC4\x9D", 2, "g",         1 }, // U+011D -> g       (LATIN SMALL LETTER G WITH CIRCUMFLEX)
 361     { "\xC4\x9E", 2, "G",         1 }, // U+011E -> G       (LATIN CAPITAL LETTER G WITH BREVE)
 362     { "\xC4\x9F", 2, "g",         1 }, // U+011F -> g       (LATIN SMALL LETTER G WITH BREVE)
 363     { "\xC4\xA0", 2, "G",         1 }, // U+0120 -> G       (LATIN CAPITAL LETTER G WITH DOT ABOVE)
 364     { "\xC4\xA1", 2, "g",         1 }, // U+0121 -> g       (LATIN SMALL LETTER G WITH DOT ABOVE)
 365     { "\xC4\xA2", 2, "G",         1 }, // U+0122 -> G       (LATIN CAPITAL LETTER G WITH CEDILLA)
 366     { "\xC4\xA3", 2, "g",         1 }, // U+0123 -> g       (LATIN SMALL LETTER G WITH CEDILLA)
 367     { "\xC4\xA4", 2, "H",         1 }, // U+0124 -> H       (LATIN CAPITAL LETTER H WITH CIRCUMFLEX)
 368     { "\xC4\xA5", 2, "h",         1 }, // U+0125 -> h       (LATIN SMALL LETTER H WITH CIRCUMFLEX)
 369     { "\xC4\xA6", 2, "H",         1 }, // U+0126 -> H       (LATIN CAPITAL LETTER H WITH STROKE)
 370     { "\xC4\xA7", 2, "h",         1 }, // U+0127 -> h       (LATIN SMALL LETTER H WITH STROKE)
 371     { "\xC4\xA8", 2, "I",         1 }, // U+0128 -> I       (LATIN CAPITAL LETTER I WITH TILDE)
 372     { "\xC4\xA9", 2, "i",         1 }, // U+0129 -> i       (LATIN SMALL LETTER I WITH TILDE)
 373     { "\xC4\xAA", 2, "I",         1 }, // U+012A -> I       (LATIN CAPITAL LETTER I WITH MACRON)
 374     { "\xC4\xAB", 2, "i",         1 }, // U+012B -> i       (LATIN SMALL LETTER I WITH MACRON)
 375     { "\xC4\xAC", 2, "I",         1 }, // U+012C -> I       (LATIN CAPITAL LETTER I WITH BREVE)
 376     { "\xC4\xAD", 2, "i",         1 }, // U+012D -> i       (LATIN SMALL LETTER I WITH BREVE)
 377     { "\xC4\xAE", 2, "I",         1 }, // U+012E -> I       (LATIN CAPITAL LETTER I WITH OGONEK)
 378     { "\xC4\xAF", 2, "i",         1 }, // U+012F -> i       (LATIN SMALL LETTER I WITH OGONEK)
 379     { "\xC4\xB0", 2, "I",         1 }, // U+0130 -> I       (LATIN CAPITAL LETTER I WITH DOT ABOVE)
 380     { "\xC4\xB1", 2, "i",         1 }, // U+0131 -> i       (LATIN SMALL LETTER DOTLESS I)
 381     { "\xC4\xB2", 2, "Ij",        2 }, // U+0132 -> Ij      (LATIN CAPITAL LIGATURE IJ)
 382     { "\xC4\xB3", 2, "ij",        2 }, // U+0133 -> ij      (LATIN SMALL LIGATURE IJ)
 383     { "\xC4\xB4", 2, "J",         1 }, // U+0134 -> J       (LATIN CAPITAL LETTER J WITH CIRCUMFLEX)
 384     { "\xC4\xB5", 2, "j",         1 }, // U+0135 -> j       (LATIN SMALL LETTER J WITH CIRCUMFLEX)
 385     { "\xC4\xB6", 2, "K",         1 }, // U+0136 -> K       (LATIN CAPITAL LETTER K WITH CEDILLA)
 386     { "\xC4\xB7", 2, "k",         1 }, // U+0137 -> k       (LATIN SMALL LETTER K WITH CEDILLA)
 387     { "\xC4\xB8", 2, "k",         1 }, // U+0138 -> k       (LATIN SMALL LETTER KRA)
 388     { "\xC4\xB9", 2, "L",         1 }, // U+0139 -> L       (LATIN CAPITAL LETTER L WITH ACUTE)
 389     { "\xC4\xBA", 2, "l",         1 }, // U+013A -> l       (LATIN SMALL LETTER L WITH ACUTE)
 390     { "\xC4\xBB", 2, "L",         1 }, // U+013B -> L       (LATIN CAPITAL LETTER L WITH CEDILLA)
 391     { "\xC4\xBC", 2, "l",         1 }, // U+013C -> l       (LATIN SMALL LETTER L WITH CEDILLA)
 392     { "\xC4\xBD", 2, "L",         1 }, // U+013D -> L       (LATIN CAPITAL LETTER L WITH CARON)
 393     { "\xC4\xBE", 2, "l",         1 }, // U+013E -> l       (LATIN SMALL LETTER L WITH CARON)
 394     { "\xC4\xBF", 2, "L",         1 }, // U+013F -> L       (LATIN CAPITAL LETTER L WITH MIDDLE DOT)
 395     { "\xC5\x80", 2, "l",         1 }, // U+0140 -> l       (LATIN SMALL LETTER L WITH MIDDLE DOT)
 396     { "\xC5\x81", 2, "L",         1 }, // U+0141 -> L       (LATIN CAPITAL LETTER L WITH STROKE)
 397     { "\xC5\x82", 2, "l",         1 }, // U+0142 -> l       (LATIN SMALL LETTER L WITH STROKE)
 398     { "\xC5\x83", 2, "N",         1 }, // U+0143 -> N       (LATIN CAPITAL LETTER N WITH ACUTE)
 399     { "\xC5\x84", 2, "n",         1 }, // U+0144 -> n       (LATIN SMALL LETTER N WITH ACUTE)
 400     { "\xC5\x85", 2, "N",         1 }, // U+0145 -> N       (LATIN CAPITAL LETTER N WITH CEDILLA)
 401     { "\xC5\x86", 2, "n",         1 }, // U+0146 -> n       (LATIN SMALL LETTER N WITH CEDILLA)
 402     { "\xC5\x87", 2, "N",         1 }, // U+0147 -> N       (LATIN CAPITAL LETTER N WITH CARON)
 403     { "\xC5\x88", 2, "n",         1 }, // U+0148 -> n       (LATIN SMALL LETTER N WITH CARON)
 404     { "\xC5\x89", 2, "'n",        2 }, // U+0149 -> 'n      (LATIN SMALL LETTER N PRECEDED BY APOSTROPHE)
 405     { "\xC5\x8A", 2, "Ng",        2 }, // U+014A -> Ng      (LATIN CAPITAL LETTER ENG)
 406     { "\xC5\x8B", 2, "ng",        2 }, // U+014B -> ng      (LATIN SMALL LETTER ENG)
 407     { "\xC5\x8C", 2, "O",         1 }, // U+014C -> O       (LATIN CAPITAL LETTER O WITH MACRON)
 408     { "\xC5\x8D", 2, "o",         1 }, // U+014D -> o       (LATIN SMALL LETTER O WITH MACRON)
 409     { "\xC5\x8E", 2, "O",         1 }, // U+014E -> O       (LATIN CAPITAL LETTER O WITH BREVE)
 410     { "\xC5\x8F", 2, "o",         1 }, // U+014F -> o       (LATIN SMALL LETTER O WITH BREVE)
 411     { "\xC5\x90", 2, "O",         1 }, // U+0150 -> O       (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
 412     { "\xC5\x91", 2, "o",         1 }, // U+0151 -> o       (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
 413     { "\xC5\x92", 2, "Oe",        2 }, // U+0152 -> Oe      (LATIN CAPITAL LIGATURE OE)
 414     { "\xC5\x93", 2, "oe",        2 }, // U+0153 -> oe      (LATIN SMALL LIGATURE OE)
 415     { "\xC5\x94", 2, "R",         1 }, // U+0154 -> R       (LATIN CAPITAL LETTER R WITH ACUTE)
 416     { "\xC5\x95", 2, "r",         1 }, // U+0155 -> r       (LATIN SMALL LETTER R WITH ACUTE)
 417     { "\xC5\x96", 2, "R",         1 }, // U+0156 -> R       (LATIN CAPITAL LETTER R WITH CEDILLA)
 418     { "\xC5\x97", 2, "r",         1 }, // U+0157 -> r       (LATIN SMALL LETTER R WITH CEDILLA)
 419     { "\xC5\x98", 2, "R",         1 }, // U+0158 -> R       (LATIN CAPITAL LETTER R WITH CARON)
 420     { "\xC5\x99", 2, "r",         1 }, // U+0159 -> r       (LATIN SMALL LETTER R WITH CARON)
 421     { "\xC5\x9A", 2, "S",         1 }, // U+015A -> S       (LATIN CAPITAL LETTER S WITH ACUTE)
 422     { "\xC5\x9B", 2, "s",         1 }, // U+015B -> s       (LATIN SMALL LETTER S WITH ACUTE)
 423     { "\xC5\x9C", 2, "S",         1 }, // U+015C -> S       (LATIN CAPITAL LETTER S WITH CIRCUMFLEX)
 424     { "\xC5\x9D", 2, "s",         1 }, // U+015D -> s       (LATIN SMALL LETTER S WITH CIRCUMFLEX)
 425     { "\xC5\x9E", 2, "S",         1 }, // U+015E -> S       (LATIN CAPITAL LETTER S WITH CEDILLA)
 426     { "\xC5\x9F", 2, "s",         1 }, // U+015F -> s       (LATIN SMALL LETTER S WITH CEDILLA)
 427     { "\xC5\xA0", 2, "S",         1 }, // U+0160 -> S       (LATIN CAPITAL LETTER S WITH CARON)
 428     { "\xC5\xA1", 2, "s",         1 }, // U+0161 -> s       (LATIN SMALL LETTER S WITH CARON)
 429     { "\xC5\xA2", 2, "T",         1 }, // U+0162 -> T       (LATIN CAPITAL LETTER T WITH CEDILLA)
 430     { "\xC5\xA3", 2, "t",         1 }, // U+0163 -> t       (LATIN SMALL LETTER T WITH CEDILLA)
 431     { "\xC5\xA4", 2, "T",         1 }, // U+0164 -> T       (LATIN CAPITAL LETTER T WITH CARON)
 432     { "\xC5\xA5", 2, "t",         1 }, // U+0165 -> t       (LATIN SMALL LETTER T WITH CARON)
 433     { "\xC5\xA6", 2, "T",         1 }, // U+0166 -> T       (LATIN CAPITAL LETTER T WITH STROKE)
 434     { "\xC5\xA7", 2, "t",         1 }, // U+0167 -> t       (LATIN SMALL LETTER T WITH STROKE)
 435     { "\xC5\xA8", 2, "U",         1 }, // U+0168 -> U       (LATIN CAPITAL LETTER U WITH TILDE)
 436     { "\xC5\xA9", 2, "u",         1 }, // U+0169 -> u       (LATIN SMALL LETTER U WITH TILDE)
 437     { "\xC5\xAA", 2, "U",         1 }, // U+016A -> U       (LATIN CAPITAL LETTER U WITH MACRON)
 438     { "\xC5\xAB", 2, "u",         1 }, // U+016B -> u       (LATIN SMALL LETTER U WITH MACRON)
 439     { "\xC5\xAC", 2, "U",         1 }, // U+016C -> U       (LATIN CAPITAL LETTER U WITH BREVE)
 440     { "\xC5\xAD", 2, "u",         1 }, // U+016D -> u       (LATIN SMALL LETTER U WITH BREVE)
 441     { "\xC5\xAE", 2, "U",         1 }, // U+016E -> U       (LATIN CAPITAL LETTER U WITH RING ABOVE)
 442     { "\xC5\xAF", 2, "u",         1 }, // U+016F -> u       (LATIN SMALL LETTER U WITH RING ABOVE)
 443     { "\xC5\xB0", 2, "U",         1 }, // U+0170 -> U       (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
 444     { "\xC5\xB1", 2, "u",         1 }, // U+0171 -> u       (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
 445     { "\xC5\xB2", 2, "U",         1 }, // U+0172 -> U       (LATIN CAPITAL LETTER U WITH OGONEK)
 446     { "\xC5\xB3", 2, "u",         1 }, // U+0173 -> u       (LATIN SMALL LETTER U WITH OGONEK)
 447     { "\xC5\xB4", 2, "W",         1 }, // U+0174 -> W       (LATIN CAPITAL LETTER W WITH CIRCUMFLEX)
 448     { "\xC5\xB5", 2, "w",         1 }, // U+0175 -> w       (LATIN SMALL LETTER W WITH CIRCUMFLEX)
 449     { "\xC5\xB6", 2, "Y",         1 }, // U+0176 -> Y       (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX)
 450     { "\xC5\xB7", 2, "y",         1 }, // U+0177 -> y       (LATIN SMALL LETTER Y WITH CIRCUMFLEX)
 451     { "\xC5\xB8", 2, "Y",         1 }, // U+0178 -> Y       (LATIN CAPITAL LETTER Y WITH DIAERESIS)
 452     { "\xC5\xB9", 2, "Z",         1 }, // U+0179 -> Z       (LATIN CAPITAL LETTER Z WITH ACUTE)
 453     { "\xC5\xBA", 2, "z",         1 }, // U+017A -> z       (LATIN SMALL LETTER Z WITH ACUTE)
 454     { "\xC5\xBB", 2, "Z",         1 }, // U+017B -> Z       (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
 455     { "\xC5\xBC", 2, "z",         1 }, // U+017C -> z       (LATIN SMALL LETTER Z WITH DOT ABOVE)
 456     { "\xC5\xBD", 2, "Z",         1 }, // U+017D -> Z       (LATIN CAPITAL LETTER Z WITH CARON)
 457     { "\xC5\xBE", 2, "z",         1 }, // U+017E -> z       (LATIN SMALL LETTER Z WITH CARON)
 458     { "\xC5\xBF", 2, "s",         1 }, // U+017F -> s       (LATIN SMALL LETTER LONG S
 459
 460     // U+2000 ... U+206F (General Punctuation)
 461     { "\xE2\x80\x90", 3, "-",         1 }, // U+2010 -> -       (HYPHEN)
 462     { "\xE2\x80\x91", 3, "-",         1 }, // U+2011 -> -       (NON-BREAKING HYPHEN)
 463     { "\xE2\x80\x92", 3, "--",        2 }, // U+2012 -> --      (FIGURE DASH)
 464     { "\xE2\x80\x93", 3, "--",        2 }, // U+2013 -> --      (EN DASH)
 465     { "\xE2\x80\x94", 3, "---",       3 }, // U+2014 -> ---     (EM DASH)
 466     { "\xE2\x80\x95", 3, "---",       3 }, // U+2015 -> ---     (HORIZONTAL BAR)
 467     { "\xE2\x80\x96", 3, "||",        2 }, // U+2016 -> ||      (DOUBLE VERTICAL LINE)
 468     { "\xE2\x80\x97", 3, "_",         1 }, // U+2017 -> _       (DOUBLE LOW LINE)
 469     { "\xE2\x80\x98", 3, "`",         1 }, // U+2018 -> `       (LEFT SINGLE QUOTATION MARK)
 470     { "\xE2\x80\x99", 3, "'",         1 }, // U+2019 -> '       (RIGHT SINGLE QUOTATION MARK)
 471     { "\xE2\x80\x9A", 3, ",",         1 }, // U+201A -> ,       (SINGLE LOW-9 QUOTATION MARK)
 472     { "\xE2\x80\x9B", 3, "'",         1 }, // U+201B -> '       (SINGLE HIGH-REVERSED-9 QUOTATION MARK)
 473     { "\xE2\x80\x9C", 3, "\"",        1 }, // U+201C -> "       (LEFT DOUBLE QUOTATION MARK)
 474     { "\xE2\x80\x9D", 3, "\"",        1 }, // U+201D -> "       (RIGHT DOUBLE QUOTATION MARK)
 475     { "\xE2\x80\x9E", 3, ",,",        2 }, // U+201E -> ,,      (DOUBLE LOW-9 QUOTATION MARK)
 476     { "\xE2\x80\x9F", 3, "``",        2 }, // U+201F -> ``      (DOUBLE HIGH-REVERSED-9 QUOTATION MARK)
 477     { "\xE2\x80\xA0", 3, "+",         1 }, // U+2020 -> +       (DAGGER)
 478     { "\xE2\x80\xA1", 3, "+",         1 }, // U+2021 -> +       (DOUBLE DAGGER)
 479     { "\xE2\x80\xA2", 3, "\xC2\xB7", -2 }, // U+2022 -> U+00B7  (BULLET) -> (MIDDLE POINT)
 480     { "\xE2\x80\xA3", 3, ".",         1 }, // U+2023 -> .       (TRIANGULAR BULLET)
 481     { "\xE2\x80\xA4", 3, ".",         1 }, // U+2024 -> .       (ONE DOT LEADER)
 482     { "\xE2\x80\xA5", 3, "..",        2 }, // U+2025 -> ..      (TWO DOT LEADER)
 483     { "\xE2\x80\xA6", 3, "...",       3 }, // U+2026 -> ...     (HORIZONTAL ELLIPSIS)
 484     { "\xE2\x80\xA7", 3, "\xC2\xB7", -2 }, // U+2027 -> U+00B7  (HYPHENATION POINT) -> (MIDDLE POINT)
 485     { "\xE2\x80\xB0", 3, "%.",        2 }, // U+2030 -> %.      (PER MILLE SIGN)
 486     { "\xE2\x80\xB1", 3, "%..",       3 }, // U+2031 -> %..     (PER TEN THOUSAND SIGN)
 487     { "\xE2\x80\xB2", 3, "'",         1 }, // U+2032 -> `       (PRIME)
 488     { "\xE2\x80\xB3", 3, "''",        2 }, // U+2033 -> ''      (DOUBLE PRIME)
 489     { "\xE2\x80\xB4", 3, "'''",       3 }, // U+2034 -> '''     (TRIPLE PRIME)
 490     { "\xE2\x80\xB5", 3, "`",         1 }, // U+2035 -> `       (REVERSED PRIME)
 491     { "\xE2\x80\xB6", 3, "``",        2 }, // U+2036 -> ``      (REVERSED DOUBLE PRIME)
 492     { "\xE2\x80\xB7", 3, "```",       3 }, // U+2037 -> ```     (REVERSED TRIPLE PRIME)
 493     { "\xE2\x80\xB8", 3, "^",         1 }, // U+2038 -> ^       (CARET)
 494     { "\xE2\x80\xB9", 3, "<",         1 }, // U+2039 -> <       (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
 495     { "\xE2\x80\xBA", 3, ">",         1 }, // U+203A -> >       (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
 496     { "\xE2\x80\xBB", 3, "\xC3\x97", -2 }, // U+203B -> U+00D7  (REFERENCE MARK) -> (MULTIPLICATION SIGN)
 497     { "\xE2\x80\xBC", 3, "!!",        2 }, // U+203C -> !!      (DOUBLE EXCLAMATION MARK)
 498     { "\xE2\x80\xBD", 3, "?",         1 }, // U+203D -> ?       (INTERROBANG)
 499     { "\xE2\x81\x82", 3, "*",         1 }, // U+2042 -> *       (ASTERISM)
 500     { "\xE2\x81\x83", 3, ".",         1 }, // U+2043 -> .       (HYPHEN BULLET)
 501     { "\xE2\x81\x84", 3, "/",         1 }, // U+2044 -> /       (FRACTION SLASH)
 502     { "\xE2\x81\x87", 3, "??",        2 }, // U+2047 -> ??      (DOUBLE QUESTION MARK)
 503     { "\xE2\x81\x88", 3, "?!",        2 }, // U+2048 -> ?!      (QUESTION EXCLAMATION MARK)
 504     { "\xE2\x81\x89", 3, "!?",        2 }, // U+2049 -> !?      (EXCLAMATION QUESTION MARK)
 505     { "\xE2\x81\x8E", 3, "*",         1 }, // U+204E -> *       (LOW ASTERISK)
 506     { "\xE2\x81\x8F", 3, ";",         1 }, // U+204F -> ;       (REVERSED SEMICOLON)
 507     { "\xE2\x81\x91", 3, "*",         1 }, // U+2051 -> *       (TWO ASTERISKS ALIGNED VERTICALLY)
 508     { "\xE2\x81\x92", 3, "-",         1 }, // U+2052 -> -       (COMMERCIAL MINUS SIGN)
 509     { "\xE2\x81\x93", 3, "~",         1 }, // U+2053 -> ~       (SWUNG DASH)
 510     { "\xE2\x81\x95", 3, "*",         1 }, // U+2055 -> *       (FLOWER PUNCTUATION MARK)
 511     { "\xE2\x81\x97", 3, "''''",      4 }, // U+2057 -> ''''    (QUADRUPLE PRIME)
 512     { "\xE2\x81\x9A", 3, ":",         1 }, // U+205A -> :       (TWO DOT PUNCTUATION)
 513     { "\xE2\x81\x9C", 3, "+",         1 }, // U+205C -> +       (DOTTED CROSS)
 514
 515     // U+20A0 ... U+20CF (Currency Symbols)
 516     { "\xE2\x82\xA0", 3, "ECU",       3 }, // U+20A0 -> ECU     (EURO-CURRENCY SIGN)
 517     { "\xE2\x82\xA1", 3, "CRC",       3 }, // U+20A1 -> CRC     (COLON SIGN)
 518     { "\xE2\x82\xA2", 3, "BRC",       3 }, // U+20A2 -> BRC     (CRUZEIRO SIGN)
 519     { "\xE2\x82\xA3", 3, "BEF",       3 }, // U+20A3 -> BEF     (FRENCH FRANC SIGN)
 520     { "\xE2\x82\xA4", 3, "ITL",       3 }, // U+20A4 -> ITL     (LIRA SIGN)
 521     { "\xE2\x82\xA6", 3, "NGN",       3 }, // U+20A6 -> NGN     (NEIRA SIGN)
 522     { "\xE2\x82\xA7", 3, "ESP",       3 }, // U+20A7 -> ESP     (PESETA SIGN)
 523     { "\xE2\x82\xA8", 3, "MVQ",       3 }, // U+20A8 -> MVQ     (RUPEE SIGN)
 524     { "\xE2\x82\xA9", 3, "KPW",       3 }, // U+20A9 -> KPW     (WON SIGN)
 525     { "\xE2\x82\xAA", 3, "ILS",       3 }, // U+20AA -> ILS     (NEW SHEQEL SIGN)
 526     { "\xE2\x82\xAB", 3, "VNC",       3 }, // U+20AB -> VNC     (DONG SIGN)
 527     { "\xE2\x82\xAC", 3, "EUR",       3 }, // U+20AC -> EUR     (EURO SIGN)
 528     { "\xE2\x82\xAD", 3, "LAK",       3 }, // U+20AD -> LAK     (KIP SIGN)
 529     { "\xE2\x82\xAE", 3, "MNT",       3 }, // U+20AE -> MNT     (TUGRIK SIGN)
 530     { "\xE2\x82\xAF", 3, "GRD",       3 }, // U+20AF -> GRD     (DRACHMA SIGN)
 531     { "\xE2\x82\xB0", 3, "Pf",        2 }, // U+20B0 -> Pf      (GERMAN PENNY SIGN)
 532     { "\xE2\x82\xB1", 3, "P",         1 }, // U+20B1 -> P       (PESO SIGN)
 533     { "\xE2\x82\xB2", 3, "PYG",       3 }, // U+20B2 -> PYG     (GUARANI SIGN)
 534     { "\xE2\x82\xB3", 3, "ARA",       3 }, // U+20B3 -> ARA     (AUSTRAL SIGN)
 535     { "\xE2\x82\xB4", 3, "UAH",       3 }, // U+20B4 -> UAH     (HRYVNIA SIGN)
 536     { "\xE2\x82\xB5", 3, "GHS",       3 }, // U+20B5 -> GHS     (CEDI SIGN)
 537
 538     // U+2190 ... U+21FF (Arrows)
 539     { "\xE2\x86\x90", 3, "<-",        2 }, // U+2190 -> <-      (LEFTWARDS ARROW)
 540     { "\xE2\x86\x92", 3, "->",        2 }, // U+2192 -> ->      (RIGHTWARDS ARROW)
 541   };
 542
 543   ENTER();
 544
 545   // start with no replacement string
 546   *dst = NULL;
 547
 548   // perform a binary search in the lookup table
 549   if((rep = bsearch(&key, utf8map, sizeof(utf8map) / sizeof(utf8map[0]), sizeof(utf8map[0]), compareUTF8Replacements)) != NULL)
 550   {
 551     // if we found something, then copy this over to the result variables
 552     *dst = rep->rep;
 553     len = rep->replen;
 554   }
 555
 556   RETURN(len);
 557   return len;
 558 }
 559
 560 ///
 561 /// matchCodesetAlias()
 562 //
 563 struct CodesetAliases
 564 {
 565   const char *MIMEname;   // The official and correct MIME name for a codeset
 566   const char *Aliases;    // A space separated array with well-known aliases
 567 };
 568
 569 const struct CodesetAliases codesetAliases[] =
 570 {
 571   // MIME name       Aliases
 572   { "Amiga-1251",   "Ami1251 Amiga1251"  },
 573   { "AmigaPL",      "AmiPL Amiga-PL"     },
 574   { "ISO-8859-1",   "ISO8859-1 8859-1" },
 575   { "ISO-8859-2",   "ISO8859-2 8859-2" },
 576   { "ISO-8859-3",   "ISO8859-3 8859-3" },
 577   { "ISO-8859-4",   "ISO8859-4 8859-4" },
 578   { "ISO-8859-5",   "ISO8859-5 8859-5" },
 579   { "ISO-8859-6",   "ISO8859-6 8859-6" },
 580   { "ISO-8859-7",   "ISO8859-7 8859-7" },
 581   { "ISO-8859-8",   "ISO8859-8 8859-8" },
 582   { "ISO-8859-9",   "ISO8859-9 8859-9" },
 583   { "ISO-8859-10",  "ISO8859-10 8859-10" },
 584   { "ISO-8859-11",  "ISO8859-11 8859-11" },
 585   { "ISO-8859-12",  "ISO8859-12 8859-12" },
 586   { "ISO-8859-13",  "ISO8859-13 8859-13" },
 587   { "ISO-8859-14",  "ISO8859-14 8859-14" },
 588   { "ISO-8859-15",  "ISO8859-15 8859-15" },
 589   { "ISO-8859-16",  "ISO8859-16 8859-16" },
 590   { "ISO-8859-10",  "ISO8859-10 8859-10" },
 591   { "KOI8-R",       "KOI8R" },
 592   { "US-ASCII",     "ASCII" },
 593   { "UTF-8",        "UTF8 UTF" },
 594   { "UTF-16",       "UTF16" },
 595   { "UTF-32",       "UTF32" },
 596   { "windows-1250", "cp1250 windows1250" },
 597   { "windows-1251", "cp1251 windows1251" },
 598   { "windows-1252", "cp1252 windows1252" },
 599   { "windows-1253", "cp1253 windows1253" },
 600   { "windows-1254", "cp1254 windows1254" },
 601   { "windows-1255", "cp1255 windows1255" },
 602   { "windows-1256", "cp1256 windows1256" },
 603   { "windows-1257", "cp1257 windows1257" },
 604   { NULL,           NULL,                }
 605 };
 606
 607 static char *matchCodesetAlias(const char *search)
 608 {
 609   char *result = NULL;
 610   size_t len = strlen(search);
 611   int i;
 612
 613   ENTER();
 614
 615   for(i=0; codesetAliases[i].MIMEname != NULL; i++)
 616   {
 617     BOOL found = FALSE;
 618
 619     // search the MIMEname first
 620     if(stricmp(search, codesetAliases[i].MIMEname) == 0)
 621       found = TRUE;
 622     else
 623     {
 624       const char *s = codesetAliases[i].Aliases;
 625
 626       // loop through space separated list of aliases
 627       while(s != NULL && *s != '\0')
 628       {
 629         if(strnicmp(search, s, len) == 0)
 630         {
 631           found = TRUE;
 632           break;
 633         }
 634
 635         if((s = strpbrk(s, " ")) != NULL)
 636           s++;
 637       }
 638     }
 639
 640     if(found == TRUE)
 641     {
 642       result = (char *)codesetAliases[i].MIMEname;
 643
 644       break;
 645     }
 646   }
 647
 648   RETURN(result);
 649   return result;
 650 }
 651
 652 ///
 653
 654 /**************************************************************************/
 655
 656 /// defaultCodeset()
 657 static struct codeset *
 658 defaultCodeset(BOOL useSemaphore)
 659 {
 660   char buf[256];
 661   struct codeset *codeset;
 662
 663   ENTER();
 664
 665   if(useSemaphore == TRUE)
 666     ObtainSemaphoreShared(&CodesetsBase->libSem);
 667
 668   buf[0] = '\0';
 669   GetVar("codeset_default",buf,sizeof(buf),GVF_GLOBAL_ONLY);
 670
 671   if(buf[0] == '\0' || (codeset = codesetsFind(&CodesetsBase->codesets,buf)) == NULL)
 672     codeset = CodesetsBase->systemCodeset;
 673
 674   if(useSemaphore == TRUE)
 675     ReleaseSemaphore(&CodesetsBase->libSem);
 676
 677   RETURN(codeset);
 678   return codeset;
 679 }
 680 ///
 681 /// codesetsCmpUnicode()
 682 // The compare function
 683 static int
 684 codesetsCmpUnicode(struct single_convert *arg1,struct single_convert *arg2)
 685 {
 686   return strcmp((char*)&arg1->utf8[1], (char*)&arg2->utf8[1]);
 687 }
 688 ///
 689 /// codesetsReadTable()
 690
 691 #define ITEM_STANDARD           "Standard"
 692 #define ITEM_ALTSTANDARD        "AltStandard"
 693 #define ITEM_READONLY           "ReadOnly"
 694 #define ITEM_CHARACTERIZATION   "Characterization"
 695
 696 // Reads a coding table and adds it
 697 static BOOL
 698 codesetsReadTable(struct codesetList *csList, STRPTR name)
 699 {
 700   BPTR fh;
 701   BOOL res = FALSE;
 702
 703   ENTER();
 704
 705   D(DBF_STARTUP, "trying to fetch charset file '%s'...", name);
 706
 707   if((fh = Open(name, MODE_OLDFILE)))
 708   {
 709     struct codeset *codeset;
 710
 711     if((codeset = (struct codeset *)allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) != NULL)
 712     {
 713       int i;
 714       char buf[512];
 715
 716       memset(codeset,0,sizeof(struct codeset));
 717
 718       for(i = 0; i<256; i++)
 719         codeset->table[i].code = codeset->table[i].ucs4 = i;
 720
 721       while(readLine(fh, buf, 512*sizeof(char)))
 722       {
 723         const char *result;
 724
 725         if(buf[0]=='#')
 726           continue;
 727
 728         if((result = getConfigItem(buf, ITEM_STANDARD, strlen(ITEM_STANDARD))))
 729           codeset->name = mystrdup(result);
 730         else if(codeset->name == NULL) // a valid file starts with standard and nothing else!!
 731           break;
 732         else if((result = getConfigItem(buf,ITEM_ALTSTANDARD,strlen(ITEM_ALTSTANDARD))))
 733           codeset->alt_name = mystrdup(result);
 734         else if((result = getConfigItem(buf,ITEM_READONLY,strlen(ITEM_READONLY))))
 735           codeset->read_only = !!atoi(result);
 736         else if((result = getConfigItem(buf,ITEM_CHARACTERIZATION,strlen(ITEM_CHARACTERIZATION))))
 737         {
 738           if((result[0]=='_') && (result[1]=='(') && (result[2]=='"'))
 739           {
 740             char *end = strchr(result + 3, '"');
 741
 742             if(end)
 743               codeset->characterization = mystrndup(result+3,end-(result+3));
 744           }
 745           else
 746             codeset->characterization = mystrdup(result);
 747         }
 748         else
 749         {
 750           char *p = buf;
 751           int fmt2 = 0;
 752
 753           if((*p=='=') || (fmt2 = ((*p=='0') || (*(p+1)=='x'))))
 754           {
 755             p++;
 756             p += fmt2;
 757
 758             i = strtol((const char *)p,(char **)&p,16);
 759             if(i>0 && i<256)
 760             {
 761               while(isspace(*p)) p++;
 762
 763               if(!strnicmp(p, "U+", 2))
 764               {
 765                 p += 2;
 766                 codeset->table[i].ucs4 = strtol((const char *)p,(char **)&p,16);
 767               }
 768               else
 769               {
 770                 if(*p!='#')
 771                   codeset->table[i].ucs4 = strtol((const char *)p,(char **)&p,0);
 772               }
 773             }
 774           }
 775         }
 776       }
 777
 778       // check if there is not already codeset with the same name in here
 779       if(codeset->name != NULL && !(codesetsFind(csList, codeset->name)))
 780       {
 781         for(i=0; i<256; i++)
 782         {
 783           UTF32 src = codeset->table[i].ucs4, *src_ptr = &src;
 784           UTF8  *dest_ptr = &codeset->table[i].utf8[1];
 785
 786           CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
 787           *dest_ptr = 0;
 788           codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)(&codeset->table[i].utf8[1]);
 789         }
 790
 791         memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
 792         qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), (int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
 793         AddTail((struct List *)csList, (struct Node *)&codeset->node);
 794
 795         res = TRUE;
 796       }
 797       else
 798       {
 799         // cleanup
 800         if(codeset->name)             freeArbitrateVecPooled(codeset->name);
 801         if(codeset->alt_name)         freeArbitrateVecPooled(codeset->alt_name);
 802         if(codeset->characterization) freeArbitrateVecPooled(codeset->characterization);
 803         freeArbitrateVecPooled(codeset);
 804       }
 805     }
 806
 807     Close(fh);
 808   }
 809
 810   RETURN(res);
 811   return res;
 812 }
 813 ///
 814 /// codesetsScanDir()
 815 static void
 816 codesetsScanDir(struct codesetList *csList, const char *dirPath)
 817 {
 818   ENTER();
 819
 820   if(dirPath != NULL && dirPath[0] != '\0')
 821   {
 822     #if defined(__amigaos4__)
 823     APTR dirContext;
 824
 825     if((dirContext = ObtainDirContextTags(EX_StringNameInput, dirPath,
 826                                           EX_DataFields,      EXF_NAME|EXF_TYPE,
 827                                           TAG_END)) != NULL)
 828     {
 829       struct ExamineData *exd;
 830
 831       D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
 832
 833       while((exd = ExamineDir(dirContext)) != NULL)
 834       {
 835         if(EXD_IS_FILE(exd))
 836         {
 837           char filePath[620];
 838
 839           strlcpy(filePath, dirPath, sizeof(filePath));
 840           AddPart(filePath, exd->Name, sizeof(filePath));
 841
 842           D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
 843
 844           codesetsReadTable(csList, filePath);
 845         }
 846       }
 847
 848       ReleaseDirContext(dirContext);
 849     }
 850     #else
 851     BPTR dirLock;
 852
 853     if((dirLock = Lock(dirPath, ACCESS_READ)))
 854     {
 855       struct ExAllControl *eac;
 856
 857       D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
 858
 859       if((eac = AllocDosObject(DOS_EXALLCONTROL, NULL)) != NULL)
 860       {
 861         struct ExAllData *ead;
 862         struct ExAllData *eabuffer;
 863         LONG more;
 864
 865         eac->eac_LastKey = 0;
 866         eac->eac_MatchString = NULL;
 867         eac->eac_MatchFunc = NULL;
 868
 869         if((eabuffer = allocVecPooled(CodesetsBase->pool, 10*sizeof(struct ExAllData))) != NULL)
 870         {
 871           char filePath[620];
 872
 873           do
 874           {
 875             more = ExAll(dirLock, eabuffer, 10*sizeof(struct ExAllData), ED_TYPE, eac);
 876             if(!more && IoErr() != ERROR_NO_MORE_ENTRIES)
 877               break;
 878
 879             if(eac->eac_Entries == 0)
 880               continue;
 881
 882             ead = (struct ExAllData *)eabuffer;
 883             do
 884             {
 885               // we only take that ead if it is a file (ed_Type < 0)
 886               if(ead->ed_Type < 0)
 887               {
 888                 strlcpy(filePath, dirPath, sizeof(filePath));
 889                 AddPart(filePath, (char *)ead->ed_Name, sizeof(filePath));
 890
 891                 D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
 892
 893                 codesetsReadTable(csList, filePath);
 894               }
 895             }
 896             while((ead = ead->ed_Next));
 897           }
 898           while(more);
 899
 900           freeVecPooled(CodesetsBase->pool, eabuffer);
 901         }
 902
 903         FreeDosObject(DOS_EXALLCONTROL, eac);
 904       }
 905
 906       UnLock(dirLock);
 907     }
 908     #endif
 909   }
 910
 911   LEAVE();
 912 }
 913
 914 ///
 915 /// codesetsInit()
 916 // Initialized and loads the codesets
 917 BOOL
 918 codesetsInit(struct codesetList *csList)
 919 {
 920   struct codeset       *codeset = NULL;
 921   UTF32                src;
 922   int                  i;
 923   #if defined(__amigaos4__)
 924   ULONG                nextMIB = 3;
 925   #endif
 926
 927   ENTER();
 928
 929   ObtainSemaphore(&CodesetsBase->poolSem);
 930
 931   NewList((struct List *)&CodesetsBase->codesets);
 932
 933   // to make the list of the supported codesets complete we also add fake
 934   // 'UTF-8' , 'UTF-16' and 'UTF-32' only so that our users can query for those codesets as well.
 935   if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
 936     goto end;
 937
 938   codeset->name             = mystrdup("UTF-8");
 939   codeset->alt_name         = mystrdup("UTF8");
 940   codeset->characterization = mystrdup("Unicode");
 941   codeset->read_only        = 0;
 942   AddTail((struct List *)csList, (struct Node *)&codeset->node);
 943   CodesetsBase->utf8Codeset = codeset;
 944
 945   if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
 946     goto end;
 947
 948   codeset->name             = mystrdup("UTF-16");
 949   codeset->alt_name         = mystrdup("UTF16");
 950   codeset->characterization = mystrdup("16-bit Unicode");
 951   codeset->read_only        = 0;
 952   AddTail((struct List *)csList, (struct Node *)&codeset->node);
 953   CodesetsBase->utf16Codeset = codeset;
 954
 955   if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
 956     goto end;
 957
 958   codeset->name             = mystrdup("UTF-32");
 959   codeset->alt_name         = mystrdup("UTF32");
 960   codeset->characterization = mystrdup("32-bit Unicode");
 961   codeset->read_only        = 0;
 962   AddTail((struct List *)csList, (struct Node *)&codeset->node);
 963   CodesetsBase->utf32Codeset = codeset;
 964
 965   // on AmigaOS4 we can use diskfont.library to inquire charset information as
 966   // it comes with a quite rich implementation of different charsets.
 967   #if defined(__amigaos4__)
 968   do
 969   {
 970     char *mimename;
 971     char *ianaName;
 972     ULONG *mapTable;
 973     ULONG curMIB = nextMIB;
 974
 975     nextMIB = ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NEXTNUMBER);
 976     if(nextMIB == 0)
 977       break;
 978
 979     mapTable = (ULONG *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MAPTABLE);
 980     mimename = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MIMENAME);
 981     ianaName = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NAME);
 982     if(mapTable != NULL && mimename != NULL && codesetsFind(csList, mimename) == NULL)
 983     {
 984       D(DBF_STARTUP, "loading charset '%s' from diskfont.library...", mimename);
 985
 986       if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
 987         goto end;
 988
 989       codeset->name             = mystrdup(mimename);
 990       codeset->alt_name         = NULL;
 991       codeset->characterization = mystrdup(ianaName);
 992       codeset->read_only        = 0;
 993
 994       for(i=0; i<256; i++)
 995       {
 996         UTF32 *src_ptr = &src;
 997         UTF8  *dest_ptr = &codeset->table[i].utf8[1];
 998
 999         src = mapTable[i];
1000
1001         codeset->table[i].code = i;
1002         codeset->table[i].ucs4 = src;
1003         CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1004         *dest_ptr = 0;
1005         codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1006       }
1007
1008       memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1009       qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1010
1011       AddTail((struct List *)csList, (struct Node *)&codeset->node);
1012     }
1013   }
1014   while(TRUE);
1015   #endif
1016
1017   #if defined(__MORPHOS__)
1018   {
1019     struct Library *KeymapBase;
1020     struct Library *LocaleBase;
1021
1022     if((KeymapBase = OpenLibrary("keymap.library", 51)) != NULL)
1023     {
1024       if((LocaleBase = OpenLibrary("locale.library", 51)) != NULL)
1025       {
1026         struct KeyMap *keymap = AskKeyMapDefault();
1027         CONST_STRPTR name = GetKeyMapCodepage(keymap);
1028
1029         if(name != NULL && keymap != NULL) // Legacy keymaps dont have codepage or Unicode mappings
1030         {
1031           D(DBF_STARTUP, "loading charset '%s' from keymap.library...", name);
1032
1033           if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) != NULL)
1034           {
1035              codeset->name             = mystrdup(name);
1036              codeset->alt_name         = NULL;
1037              codeset->characterization = mystrdup(name);  // No more information available
1038              codeset->read_only        = 0;
1039
1040              for(i=0; i<256; i++)
1041              {
1042                UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1043                LONG rc;
1044
1045                codeset->table[i].code = i;
1046                codeset->table[i].ucs4 = src = ToUCS4(i, keymap);
1047                rc = ConvertUCS4ToUTF8((CONST_WSTRPTR)&src, dest_ptr, 1);
1048                dest_ptr[rc] = 0;
1049                codeset->table[i].utf8[0] = rc;
1050              }
1051
1052              memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1053             qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1054
1055              AddTail((struct List *)csList, (struct Node *)&codeset->node);
1056           }
1057           else
1058             goto end;
1059         }
1060
1061         CloseLibrary(LocaleBase);
1062       }
1063
1064       CloseLibrary(KeymapBase);
1065     }
1066   }
1067   #endif
1068
1069   D(DBF_STARTUP, "loading charsets from Libs:Charsets...");
1070
1071   // we try to walk to the LIBS:Charsets directory on our own and readin our
1072   // own charset tables
1073   codesetsScanDir(csList, "LIBS:Charsets");
1074
1075   //
1076   // now we go and initialize our internally supported codesets but only if
1077   // we have not already loaded a charset with the same name
1078   //
1079   D(DBF_STARTUP, "initializing internal charsets...");
1080
1081   // ISO-8859-1 + EURO
1082   if(codesetsFind(csList, "ISO-8859-1 + Euro") == NULL)
1083   {
1084     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1085       goto end;
1086
1087     codeset->name             = mystrdup("ISO-8859-1 + Euro");
1088     codeset->alt_name         = NULL;
1089     codeset->characterization = mystrdup("West European (with EURO)");
1090     codeset->read_only        = 1;
1091     for(i = 0; i<256; i++)
1092     {
1093       UTF32 *src_ptr = &src;
1094       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1095
1096       if(i==164)
1097         src = 0x20AC; /* the EURO sign */
1098       else
1099         src = i;
1100
1101       codeset->table[i].code = i;
1102       codeset->table[i].ucs4 = src;
1103       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1104       *dest_ptr = 0;
1105       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1106     }
1107     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1108     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1109     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1110   }
1111
1112   // ISO-8859-1
1113   if(codesetsFind(csList, "ISO-8859-1") == NULL)
1114   {
1115     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1116       goto end;
1117
1118     codeset->name             = mystrdup("ISO-8859-1");
1119     codeset->alt_name         = mystrdup("ISO8859-1");
1120     codeset->characterization = mystrdup("West European");
1121     codeset->read_only        = 0;
1122     for(i = 0; i<256; i++)
1123     {
1124       UTF32 *src_ptr = &src;
1125       UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1126
1127       src = i;
1128
1129       codeset->table[i].code = i;
1130       codeset->table[i].ucs4 = src;
1131       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1132       *dest_ptr = 0;
1133       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1134     }
1135     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1136     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1137     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1138   }
1139
1140   // ISO-8859-2
1141   if(codesetsFind(csList, "ISO-8859-2") == NULL)
1142   {
1143     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1144       goto end;
1145
1146     codeset->name             = mystrdup("ISO-8859-2");
1147     codeset->alt_name         = mystrdup("ISO8859-2");
1148     codeset->characterization = mystrdup("Central/East European");
1149     codeset->read_only        = 0;
1150     for(i = 0; i<256; i++)
1151     {
1152       UTF32 *src_ptr = &src;
1153       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1154
1155       if(i<0xa0)
1156         src = i;
1157       else
1158         src = iso_8859_2_to_ucs4[i-0xa0];
1159
1160       codeset->table[i].code = i;
1161       codeset->table[i].ucs4 = src;
1162       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr,dest_ptr+6, CSF_StrictConversion);
1163       *dest_ptr = 0;
1164       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1165     }
1166     memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1167     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1168     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1169   }
1170
1171   // ISO-8859-3
1172   if(codesetsFind(csList, "ISO-8859-3") == NULL)
1173   {
1174     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1175       goto end;
1176
1177     codeset->name             = mystrdup("ISO-8859-3");
1178     codeset->alt_name         = mystrdup("ISO8859-3");
1179     codeset->characterization = mystrdup("South European");
1180     codeset->read_only        = 0;
1181     for(i = 0; i<256; i++)
1182     {
1183       UTF32 *src_ptr = &src;
1184       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1185
1186       if(i<0xa0)
1187         src = i;
1188       else
1189         src = iso_8859_3_to_ucs4[i-0xa0];
1190
1191       codeset->table[i].code = i;
1192       codeset->table[i].ucs4 = src;
1193       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1194       *dest_ptr = 0;
1195       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1196     }
1197     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1198     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1199     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1200   }
1201
1202   // ISO-8859-4
1203   if(codesetsFind(csList, "ISO-8859-4") == NULL)
1204   {
1205     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1206       goto end;
1207
1208     codeset->name             = mystrdup("ISO-8859-4");
1209     codeset->alt_name         = mystrdup("ISO8859-4");
1210     codeset->characterization = mystrdup("North European");
1211     codeset->read_only        = 0;
1212     for(i = 0; i<256; i++)
1213     {
1214       UTF32 *src_ptr = &src;
1215       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1216
1217       if(i<0xa0)
1218         src = i;
1219       else
1220         src = iso_8859_4_to_ucs4[i-0xa0];
1221
1222       codeset->table[i].code = i;
1223       codeset->table[i].ucs4 = src;
1224       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1225       *dest_ptr = 0;
1226       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1227     }
1228     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1229     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1230     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1231   }
1232
1233   // ISO-8859-5
1234   if(codesetsFind(csList, "ISO-8859-5") == NULL)
1235   {
1236     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1237       goto end;
1238
1239     codeset->name             = mystrdup("ISO-8859-5");
1240     codeset->alt_name         = mystrdup("ISO8859-5");
1241     codeset->characterization = mystrdup("Slavic languages");
1242     codeset->read_only        = 0;
1243     for(i = 0; i<256; i++)
1244     {
1245       UTF32 *src_ptr = &src;
1246       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1247
1248       if(i<0xa0)
1249         src = i;
1250       else
1251         src = iso_8859_5_to_ucs4[i-0xa0];
1252
1253       codeset->table[i].code = i;
1254       codeset->table[i].ucs4 = src;
1255       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1256       *dest_ptr = 0;
1257       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1258     }
1259     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1260     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1261     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1262   }
1263
1264   // ISO-8859-9
1265   if(codesetsFind(csList, "ISO-8859-9") == NULL)
1266   {
1267     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1268       goto end;
1269
1270     codeset->name             = mystrdup("ISO-8859-9");
1271     codeset->alt_name         = mystrdup("ISO8859-9");
1272     codeset->characterization = mystrdup("Turkish");
1273     codeset->read_only        = 0;
1274     for(i = 0; i<256; i++)
1275     {
1276       UTF32 *src_ptr = &src;
1277       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1278
1279       if(i<0xa0)
1280         src = i;
1281       else
1282         src = iso_8859_9_to_ucs4[i-0xa0];
1283
1284       codeset->table[i].code = i;
1285       codeset->table[i].ucs4 = src;
1286       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1287       *dest_ptr = 0;
1288       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1289     }
1290     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1291     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1292     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1293   }
1294
1295   // ISO-8859-15
1296   if(codesetsFind(csList, "ISO-8859-15") == NULL)
1297   {
1298     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1299       goto end;
1300
1301     codeset->name             = mystrdup("ISO-8859-15");
1302     codeset->alt_name         = mystrdup("ISO8859-15");
1303     codeset->characterization = mystrdup("West European II");
1304     codeset->read_only        = 0;
1305     for(i = 0; i<256; i++)
1306     {
1307       UTF32 *src_ptr = &src;
1308       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1309
1310       if(i<0xa0)
1311         src = i;
1312       else
1313         src = iso_8859_15_to_ucs4[i-0xa0];
1314
1315       codeset->table[i].code = i;
1316       codeset->table[i].ucs4 = src;
1317       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1318       *dest_ptr = 0;
1319       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1320     }
1321     memcpy(codeset->table_sorted,codeset->table,sizeof (codeset->table));
1322     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1323     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1324   }
1325
1326   // ISO-8859-16
1327   if(codesetsFind(csList, "ISO-8859-16") == NULL)
1328   {
1329     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1330       goto end;
1331
1332     codeset->name             = mystrdup("ISO-8859-16");
1333     codeset->alt_name         = mystrdup("ISO8869-16");
1334     codeset->characterization = mystrdup("South-Eastern European");
1335     codeset->read_only        = 0;
1336     for(i=0;i<256;i++)
1337     {
1338       UTF32 *src_ptr = &src;
1339       UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1340
1341       if(i < 0xa0)
1342         src = i;
1343       else
1344         src = iso_8859_16_to_ucs4[i-0xa0];
1345
1346       codeset->table[i].code = i;
1347       codeset->table[i].ucs4 = src;
1348       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1349       *dest_ptr = 0;
1350       codeset->table[i].utf8[0] = (ULONG)dest_ptr - (ULONG)&codeset->table[i].utf8[1];
1351     }
1352     memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1353     qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), (int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1354     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1355   }
1356
1357   // KOI8-R
1358   if(codesetsFind(csList, "KOI8-R") == NULL)
1359   {
1360     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1361       goto end;
1362
1363     codeset->name               = mystrdup("KOI8-R");
1364     codeset->alt_name           = mystrdup("KOI8R");
1365     codeset->characterization   = mystrdup("Russian");
1366     codeset->read_only          = 0;
1367     for(i = 0; i<256; i++)
1368     {
1369       UTF32 *src_ptr = &src;
1370       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1371
1372       if(i<0x80)
1373         src = i;
1374       else
1375         src = koi8r_to_ucs4[i-0x80];
1376
1377       codeset->table[i].code = i;
1378       codeset->table[i].ucs4 = src;
1379       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1380       *dest_ptr = 0;
1381       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1382     }
1383     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1384     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1385     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1386   }
1387
1388   // AmigaPL
1389   if(codesetsFind(csList, "AmigaPL") == NULL)
1390   {
1391     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1392       goto end;
1393
1394     codeset->name             = mystrdup("AmigaPL");
1395     codeset->alt_name         = mystrdup("AmiPL");
1396     codeset->characterization = mystrdup("Polish (Amiga)");
1397     codeset->read_only        = 1;
1398     for(i=0; i<256; i++)
1399     {
1400       UTF32 *src_ptr = &src;
1401       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1402
1403       if(i<0xa0)
1404         src = i;
1405       else
1406         src = amigapl_to_ucs4[i-0xa0];
1407
1408       codeset->table[i].code = i;
1409       codeset->table[i].ucs4 = src;
1410       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1411       *dest_ptr = 0;
1412       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1413     }
1414     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1415     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1416     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1417   }
1418
1419   // Amiga-1251
1420   if(codesetsFind(csList, "Amiga-1251") == NULL)
1421   {
1422     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1423       goto end;
1424
1425     codeset->name             = mystrdup("Amiga-1251");
1426     codeset->alt_name         = mystrdup("Ami1251");
1427     codeset->characterization = mystrdup("Cyrillic (Amiga)");
1428     codeset->read_only        = 1;
1429     for(i=0; i<256; i++)
1430     {
1431       UTF32 *src_ptr = &src;
1432       UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1433
1434       if(i < 0xa0)
1435         src = i;
1436       else
1437         src = amiga1251_to_ucs4[i-0xa0];
1438
1439       codeset->table[i].code = i;
1440       codeset->table[i].ucs4 = src;
1441       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1442       *dest_ptr = 0;
1443       codeset->table[i].utf8[0] = (char*)dest_ptr - (char*)&codeset->table[i].utf8[1];
1444     }
1445     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1446     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1447     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1448   }
1449
1450 end:
1451   ReleaseSemaphore(&CodesetsBase->poolSem);
1452
1453   RETURN(codeset != 0);
1454   return codeset != NULL;
1455 }
1456
1457 ///
1458 /// codesetsCleanup()
1459 // Cleanup the memory for the codeset
1460 void
1461 codesetsCleanup(struct codesetList *csList)
1462 {
1463   struct codeset *code;
1464
1465   ENTER();
1466
1467   while((code = (struct codeset *)RemHead((struct List *)csList)))
1468   {
1469     if(code->name) freeArbitrateVecPooled(code->name);
1470     if(code->alt_name) freeArbitrateVecPooled(code->alt_name);
1471     if(code->characterization) freeArbitrateVecPooled(code->characterization);
1472
1473     freeArbitrateVecPooled(code);
1474   }
1475
1476   LEAVE();
1477 }
1478
1479 ///
1480 /// codesetsFind()
1481 // Returns the given codeset.
1482 struct codeset *
1483 codesetsFind(struct codesetList *csList, const char *name)
1484 {
1485   struct codeset *res = NULL;
1486
1487   ENTER();
1488
1489   if(name && *name)
1490   {
1491     struct codeset *mstate, *succ;
1492     char *matchedName = matchCodesetAlias(name);
1493
1494     if(matchedName != NULL)
1495       name = matchedName;
1496
1497     for(mstate = (struct codeset *)csList->list.mlh_Head; (succ = (struct codeset *)mstate->node.mln_Succ); mstate = succ)
1498     {
1499       if(stricmp(name, mstate->name) == 0 ||
1500         (mstate->alt_name != NULL && stricmp(name, mstate->alt_name) == 0))
1501       {
1502         // break out
1503         break;
1504       }
1505     }
1506
1507     if(succ)
1508       res = mstate;
1509   }
1510
1511   RETURN(res);
1512   return res;
1513 }
1514 ///
1515 /// codesetsFindBest()
1516 // Returns the best codeset for the given text
1517 static struct codeset *
1518 codesetsFindBest(struct TagItem *attrs, ULONG csFamily, STRPTR text, int text_len, int *error_ptr)
1519 {
1520   struct codeset *best_codeset = NULL;
1521   int best_errors = text_len;
1522   BOOL found = FALSE;
1523
1524   ENTER();
1525
1526   // in case the user specified the codeset family as a
1527   // cyrillic one we go and do our cyrillic specific analysis first
1528   if(csFamily == CSV_CodesetFamily_Cyrillic)
1529   {
1530     #define NUM_CYRILLIC 3
1531
1532     struct CodesetSearch
1533     {
1534       const char *name;
1535       const char *data;
1536     };
1537
1538     struct CodesetSearch search[NUM_CYRILLIC];
1539     unsigned char *p;
1540     unsigned char *tp;
1541     int ctr[NUM_CYRILLIC];
1542     int Nmax;
1543     int NGlob = 1;
1544     int max;
1545     int gr = 0;
1546     int lr = 0;
1547
1548     search[0].name = "windows-1251";
1549     search[0].data = cp1251_data;
1550     search[1].name = "IBM866";
1551     search[1].data = cp866_data;
1552     search[2].name = "KOI8-R";
1553     search[2].data = koi8r_data;
1554
1555     memset(&ctr, 0, sizeof(ctr));
1556
1557     tp = (unsigned char *)text;
1558
1559     do
1560     {
1561       int n;
1562       int mid = max = -466725766; // TODO: what's the magic behind this constant?
1563       Nmax = 0;
1564
1565       for(n=0; n < NUM_CYRILLIC; n++)
1566       {
1567         unsigned char la = 0;
1568         unsigned char *tptr = (unsigned char *)search[n].data;
1569
1570         p = tp;
1571
1572         do
1573         {
1574           unsigned char lb = (*p++) ^ 128;
1575
1576           if(!((la | lb) & 128))
1577             ctr[n] += (signed char)tptr[(la << 7) + lb];
1578
1579           la = lb;
1580         }
1581         while(*p);
1582
1583         if(max < ctr[n])
1584         {
1585           mid = max;
1586           max = ctr[n];
1587           Nmax = n+1;
1588         }
1589       }
1590
1591       tp = p;
1592       if((max >= 500) && ((max-mid) >= 1000))
1593       {
1594         lr = gr = 1;
1595         NGlob = Nmax;
1596       }
1597     }
1598     while((*p) && (!gr));
1599
1600     if(gr || ((!(*p)) && lr))
1601       Nmax = NGlob;
1602
1603     // if our analysis found something, we go and try
1604     // to find the corresponding codeset in out codeset list
1605     if(max != 0)
1606     {
1607       struct TagItem *tstate = attrs;
1608       struct TagItem *tag;
1609
1610       D(DBF_STARTUP, "identified text as '%s", search[Nmax-1].name);
1611
1612       // now we walk through our taglist and check if the user
1613       // supplied
1614       while((tag = NextTagItem(&tstate)))
1615       {
1616         if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1617         {
1618           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1619
1620           if((best_codeset = codesetsFind(csList, search[Nmax-1].name)) != NULL)
1621             break;
1622         }
1623       }
1624
1625       // if we still haven't found the matching codeset
1626       // we search the internal list
1627       if(best_codeset == NULL)
1628         best_codeset = codesetsFind(&CodesetsBase->codesets, search[Nmax-1].name);
1629
1630       best_errors = 0;
1631
1632       found = TRUE;
1633     }
1634   }
1635
1636   // if we haven't found the best codeset (through the cyrillic analysis
1637   // we go and do the dumb latin search in our codesetlist
1638   if(found == FALSE)
1639   {
1640     struct TagItem *tstate = attrs;
1641     struct TagItem *tag;
1642     BOOL lastIteration = FALSE;
1643
1644     while((tag = NextTagItem(&tstate)) || (lastIteration = TRUE))
1645     {
1646       if(lastIteration == TRUE || (tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0))
1647       {
1648         struct codesetList *csList = (lastIteration ? &CodesetsBase->codesets : (struct codesetList *)tag->ti_Data);
1649         struct codeset *codeset = (struct codeset *)csList->list.mlh_Head;
1650
1651         // the following identification/detection routines is NOT really smart.
1652         // we just see how each UTF8 string is the representation of each char
1653         // in our source text and then check if they are valid or not. As said,
1654         // not very smart, but we don't have anything better right now :(
1655
1656         while(codeset)
1657         {
1658           if(!codeset->read_only && codeset != CodesetsBase->utf8Codeset)
1659           {
1660             char *text_ptr = text;
1661             int i;
1662             int errors = 0;
1663
1664             for(i=0; i < text_len; i++)
1665             {
1666               unsigned char c = *text_ptr++;
1667
1668               if(c)
1669               {
1670                 struct single_convert *f = &codeset->table[c];
1671
1672                 if(f->utf8[0] == 0 || f->utf8[1] == 0x00)
1673                   errors++;
1674               }
1675               else
1676                 break;
1677             }
1678
1679             D(DBF_STARTUP, "tried to identify text as '%s' text with %ld of %ld errors", codeset->name, errors, text_len);
1680
1681             if(errors < best_errors)
1682             {
1683               best_codeset = codeset;
1684               best_errors = errors;
1685             }
1686
1687             if(best_errors == 0)
1688               break;
1689           }
1690
1691           codeset = (struct codeset *)codeset->node.mln_Succ;
1692         }
1693
1694         if(lastIteration)
1695           break;
1696       }
1697     }
1698   }
1699
1700   if(error_ptr)
1701     *error_ptr = best_errors;
1702
1703   RETURN(best_codeset);
1704   return best_codeset;
1705 }
1706 ///
1707
1708 /**************************************************************************/
1709
1710 /// CodesetsSupportedA()
1711 STRPTR *LIBFUNC
1712 CodesetsSupportedA(REG(a0, UNUSED struct TagItem * attrs))
1713 {
1714   STRPTR *array = NULL;
1715   struct TagItem *tstate = attrs;
1716   struct TagItem *tag;
1717   int numCodesets;
1718
1719   ENTER();
1720
1721   // first we need to check how many codesets our supplied
1722   // lists carry.
1723   numCodesets = countCodesets(&CodesetsBase->codesets);
1724   while((tag = NextTagItem(&tstate)))
1725   {
1726     if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1727       numCodesets += countCodesets((struct codesetList *)tag->ti_Data);
1728   }
1729
1730   // now that we know how many codesets we have in our lists we
1731   // can put their names into our string arrays
1732   if(numCodesets > 0)
1733   {
1734     if((array = allocArbitrateVecPooled((numCodesets+1)*sizeof(STRPTR))))
1735     {
1736       struct codeset *code;
1737       struct codeset *succ;
1738       int i=0;
1739
1740       // reset the tstate
1741       tstate = attrs;
1742
1743       ObtainSemaphoreShared(&CodesetsBase->libSem);
1744
1745       // first we walk through the internal codesets list and
1746       // add the names
1747       for(code = (struct codeset *)CodesetsBase->codesets.list.mlh_Head; (succ = (struct codeset *)code->node.mln_Succ); code = succ, i++)
1748         array[i] = code->name;
1749
1750       // then we also iterate through our private codesets list
1751       while((tag = NextTagItem(&tstate)))
1752       {
1753         if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1754         {
1755           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1756
1757           for(code = (struct codeset *)csList->list.mlh_Head; (succ = (struct codeset *)code->node.mln_Succ); code = succ, i++)
1758             array[i] = code->name;
1759         }
1760       }
1761
1762       array[i] = NULL;
1763
1764       ReleaseSemaphore(&CodesetsBase->libSem);
1765     }
1766   }
1767
1768   RETURN(array);
1769   return array;
1770 }
1771
1772 ///
1773 /// CodesetsFreeA()
1774 void LIBFUNC
1775 CodesetsFreeA(REG(a0, APTR obj),
1776               REG(a1, UNUSED struct TagItem *attrs))
1777 {
1778   ENTER();
1779
1780   if(obj)
1781     freeArbitrateVecPooled(obj);
1782
1783   LEAVE();
1784 }
1785
1786 ///
1787 /// CodesetsSetDefaultA()
1788 struct codeset *LIBFUNC
1789 CodesetsSetDefaultA(REG(a0, STRPTR name),
1790                     REG(a1, struct TagItem *attrs))
1791 {
1792   struct codeset *codeset;
1793
1794   ENTER();
1795
1796   ObtainSemaphoreShared(&CodesetsBase->libSem);
1797
1798   if((codeset = codesetsFind(&CodesetsBase->codesets,name)))
1799   {
1800     ULONG flags;
1801
1802     flags = GVF_SAVE_VAR | (GetTagData(CSA_Save,FALSE,attrs) ? GVF_GLOBAL_ONLY : 0);
1803
1804     SetVar("codeset_default",codeset->name,strlen(codeset->name),flags);
1805   }
1806
1807   ReleaseSemaphore(&CodesetsBase->libSem);
1808
1809   RETURN(codeset);
1810   return codeset;
1811 }
1812
1813 ///
1814 /// CodesetsFindA()
1815 struct codeset *LIBFUNC
1816 CodesetsFindA(REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1817 {
1818   struct codeset *codeset = NULL;
1819
1820   ENTER();
1821
1822   ObtainSemaphoreShared(&CodesetsBase->libSem);
1823
1824   // if no name pointer was supplied we have to return
1825   // the default codeset only.
1826   if(name != NULL)
1827   {
1828     // we first walk through our internal list and check if we
1829     // can find the requested codeset
1830     codeset = codesetsFind(&CodesetsBase->codesets, name);
1831
1832     if(codeset == NULL && attrs != NULL)
1833     {
1834       struct TagItem *tstate = attrs;
1835       struct TagItem *tag;
1836
1837       // now we walk through our taglist and check if the user
1838       // supplied
1839       while((tag = NextTagItem(&tstate)))
1840       {
1841         if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1842         {
1843           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1844
1845           if((codeset = codesetsFind(csList, name)) != NULL)
1846             break;
1847         }
1848       }
1849     }
1850   }
1851
1852   // check if we found something or not.
1853   if(codeset == NULL && (attrs == NULL || GetTagData(CSA_FallbackToDefault, TRUE, attrs)))
1854     codeset = defaultCodeset(FALSE);
1855
1856   ReleaseSemaphore(&CodesetsBase->libSem);
1857
1858   RETURN(codeset);
1859   return codeset;
1860 }
1861
1862 ///
1863 /// CodesetsFindBestA()
1864 struct codeset *LIBFUNC
1865 CodesetsFindBestA(REG(a0, struct TagItem *attrs))
1866 {
1867   struct codeset *codeset = NULL;
1868
1869   ENTER();
1870
1871   ObtainSemaphoreShared(&CodesetsBase->libSem);
1872
1873   if(attrs)
1874   {
1875     char *text = (char *)GetTagData(CSA_Source, 0, attrs);
1876     ULONG text_len = GetTagData(CSA_SourceLen, text != NULL ? strlen(text) : 0, attrs);
1877
1878     if(text != NULL && text_len > 0)
1879     {
1880       int numErrors = 0;
1881       ULONG csFamily = GetTagData(CSA_CodesetFamily, CSV_CodesetFamily_Latin, attrs);
1882       int *error_ptr = (int *)GetTagData(CSA_ErrPtr, 0, attrs);
1883       BOOL defaultFallBack = GetTagData(CSA_FallbackToDefault, FALSE, attrs);
1884
1885       codeset = codesetsFindBest(attrs, csFamily, text, text_len, &numErrors);
1886
1887       if(error_ptr != NULL)
1888         *error_ptr = numErrors;
1889
1890       // if we still haven't got the codeset we fallback to the default
1891       if(codeset == NULL && defaultFallBack == TRUE)
1892         codeset = defaultCodeset(FALSE);
1893     }
1894   }
1895
1896   ReleaseSemaphore(&CodesetsBase->libSem);
1897
1898   RETURN(codeset);
1899   return codeset;
1900 }
1901
1902 ///
1903 /// CodesetsUTF8Len()
1904 // Returns the number of characters a utf8 string has. This is not
1905 // identically with the size of memory is required to hold the string.
1906 ULONG LIBFUNC
1907 CodesetsUTF8Len(REG(a0, UTF8 *str))
1908 {
1909   int           len;
1910   unsigned char c;
1911
1912   ENTER();
1913
1914   if(!str)
1915     return 0;
1916
1917   len = 0;
1918
1919   while((c = *str++))
1920   {
1921     len++;
1922     str += trailingBytesForUTF8[c];
1923   }
1924
1925   RETURN((ULONG)len);
1926   return (ULONG)len;
1927 }
1928
1929 ///
1930 /// CodesetsStrLenA()
1931 ULONG LIBFUNC
1932 CodesetsStrLenA(REG(a0, STRPTR str),
1933                 REG(a1, struct TagItem *attrs))
1934 {
1935   ULONG res = 0;
1936
1937   ENTER();
1938
1939   if(str != NULL)
1940   {
1941     struct codeset *codeset;
1942     int            len;
1943     STRPTR         src;
1944     int            utf;
1945
1946     if((codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
1947       codeset = defaultCodeset(TRUE);
1948     if(codeset == CodesetsBase->utf32Codeset)
1949     {
1950       utf = 32;
1951       len = utf32_strlen((UTF32 *)str);
1952     }
1953     else if(codeset == CodesetsBase->utf16Codeset)
1954     {
1955       utf = 16;
1956       len = utf16_strlen((UTF16 *)str);
1957     }
1958     else
1959     {
1960       utf = 0;
1961       len = strlen(str);
1962     }
1963
1964     len = GetTagData(CSA_SourceLen, len, attrs);
1965
1966     src = str;
1967
1968     if(utf != 0)
1969     {
1970       void *srcend = src + len;
1971       UTF8 *dstlen = NULL;
1972
1973       switch(utf)
1974       {
1975         case 32:
1976           CodesetsConvertUTF32toUTF8((const UTF32 **)&src, srcend, &dstlen, NULL, 0);
1977           break;
1978         case 16:
1979           CodesetsConvertUTF16toUTF8((const UTF16 **)&src, srcend, &dstlen, NULL, 0);
1980           break;
1981       }
1982       res       = (ULONG)dstlen;
1983     }
1984     else
1985     {
1986       UBYTE c;
1987
1988       res = 0;
1989
1990       while((c = *src++) != '\0' && len != 0)
1991       {
1992         res += codeset->table[c].utf8[0];
1993         len--;
1994       }
1995     }
1996   }
1997
1998   RETURN(res);
1999   return res;
2000 }
2001
2002 ///
2003 /// CodesetsUTF8ToStrA()
2004 // Converts an UTF8 string to a given charset. Return the number of bytes
2005 // written to dest excluding the NULL byte (which is always ensured by this
2006 // function; it means a NULL str will produce "" as dest; anyway you should
2007 // check NULL str to not waste your time!).
2008 STRPTR LIBFUNC
2009 CodesetsUTF8ToStrA(REG(a0, struct TagItem *attrs))
2010 {
2011   UTF8 *src;
2012   ULONG srcLen;
2013   ULONG *destLenPtr;
2014   ULONG n = 0;
2015   STRPTR dest = NULL;
2016
2017   ENTER();
2018
2019   if((src = (UTF8 *)GetTagData(CSA_Source, (ULONG)NULL, attrs)) != NULL &&
2020      (srcLen = GetTagData(CSA_SourceLen, src != NULL ? strlen((char *)src) : 0, attrs)) > 0)
2021   {
2022     struct convertMsg msg;
2023     struct codeset *codeset;
2024     struct Hook *destHook;
2025     struct Hook *mapForeignCharsHook;
2026     char buf[256];
2027     STRPTR destIter = NULL;
2028     char *b = NULL;
2029     ULONG destLen = 0;
2030     int i = 0;
2031     unsigned char *s = src;
2032     unsigned char *e = (src+srcLen);
2033     int numConvErrors = 0;
2034     int *numConvErrorsPtr;
2035     BOOL mapForeignChars;
2036     APTR pool = NULL;
2037     struct SignalSemaphore *sem = NULL;
2038     int utf;
2039     ULONG char_size;
2040
2041     // get some more optional attributes
2042     destHook = (struct Hook *)GetTagData(CSA_DestHook, (ULONG)NULL, attrs);
2043     destLen = GetTagData(CSA_DestLen, 0, attrs);
2044     numConvErrorsPtr = (int *)GetTagData(CSA_ErrPtr, (ULONG)NULL, attrs);
2045     mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2046     mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, (ULONG)NULL, attrs);
2047
2048     // get the destination codeset pointer
2049     if((codeset = (struct codeset *)GetTagData(CSA_DestCodeset, (ULONG)NULL, attrs)) == NULL)
2050       codeset = defaultCodeset(TRUE);
2051     if(codeset == CodesetsBase->utf32Codeset)
2052     {
2053       utf = 32;
2054       char_size = 4;
2055     }
2056     else if(codeset == CodesetsBase->utf16Codeset)
2057     {
2058       utf = 16;
2059       char_size = 2;
2060     }
2061     else
2062     {
2063       utf = 0;
2064       char_size = 1;
2065     }
2066
2067     // first we make sure we allocate enough memory
2068     // for our destination buffer
2069     if(destHook != NULL)
2070     {
2071       if(destLen < 16 || destLen > sizeof(buf))
2072         destLen = sizeof(buf);
2073
2074       msg.state = CSV_Translating;
2075       b = buf;
2076       i = 0;
2077     }
2078     else
2079     {
2080       // in case the user wants us to dynamically generate the
2081       // destination buffer we do it right now
2082       if((dest = (STRPTR)GetTagData(CSA_Dest, (ULONG)NULL, attrs)) == NULL ||
2083          GetTagData(CSA_AllocIfNeeded, TRUE, attrs) != FALSE)
2084       {
2085         ULONG len = 0;
2086
2087         // calculate the destLen
2088         if(utf)
2089         {
2090           void *dstlen = NULL;
2091
2092           switch(utf)
2093           {
2094             case 32:
2095               CodesetsConvertUTF8toUTF32((const UTF8 **)&s, e, (UTF32 **)&dstlen, NULL, 0);
2096               break;
2097             case 16:
2098               CodesetsConvertUTF8toUTF16((const UTF8 **)&s, e, (UTF16 **)&dstlen, NULL, 0);
2099               break;
2100           }
2101           len = (ULONG)dstlen;
2102         }
2103         else
2104         {
2105           while(s < e)
2106           {
2107             unsigned char c = *s++;
2108
2109             len++;
2110             s += trailingBytesForUTF8[c];
2111           }
2112         }
2113
2114         if(dest == NULL || (destLen < len+1))
2115         {
2116           if((pool = (APTR)GetTagData(CSA_Pool, (ULONG)NULL, attrs)) != NULL)
2117           {
2118             if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, (ULONG)NULL, attrs)) != NULL)
2119               ObtainSemaphore(sem);
2120
2121             // allocate the destination buffer
2122             dest = allocVecPooled(pool, len+char_size);
2123
2124             if(sem != NULL)
2125               ReleaseSemaphore(sem);
2126           }
2127           else
2128             dest = allocArbitrateVecPooled(len+char_size);
2129
2130           destLen = len+char_size;
2131         }
2132
2133         if(dest == NULL)
2134         {
2135           RETURN(NULL);
2136           return NULL;
2137         }
2138       }
2139
2140       destIter = dest;
2141     }
2142
2143     // now we convert the src string to the
2144     // destination buffer.
2145     s = src;
2146     if (utf)
2147     {
2148       void *dstend;
2149
2150       if(destHook != NULL)
2151       {
2152         ULONG r;
2153
2154         dstend = b + destLen - char_size;
2155         do
2156         {
2157           switch(utf)
2158           {
2159             case 32:
2160               r = CodesetsConvertUTF8toUTF32((const UTF8 **)&s, e, (UTF32 **)&b, dstend, 0);
2161               break;
2162             case 16:
2163               r = CodesetsConvertUTF8toUTF16((const UTF8 **)&s, e, (UTF16 **)&b, dstend, 0);
2164               break;
2165           }
2166           b[0] = 0;
2167           if(char_size > 1)
2168             b[1] = 0;
2169           if(r != CSR_TargetExhausted)
2170             msg.state = CSV_End;
2171           msg.len = b-buf;
2172           CallHookPkt(destHook,&msg,buf);
2173
2174           b  = buf;
2175           n += msg.len;
2176         }
2177         while(r == CSR_TargetExhausted);
2178       }
2179       else
2180       {
2181         dstend = destIter + destLen - char_size;
2182         switch(utf)
2183         {
2184           case 32:
2185             CodesetsConvertUTF8toUTF32((const UTF8 **)&s, e, (UTF32 **)&destIter, dstend, 0);
2186             break;
2187           case 16:
2188             CodesetsConvertUTF8toUTF16((const UTF8 **)&s, e, (UTF16 **)&destIter, dstend, 0);
2189             break;
2190         }
2191         n = destIter-dest;
2192       }
2193     }
2194     else
2195     {
2196       for(;;n++)
2197       {
2198         if(destHook == NULL && n >= destLen-1)
2199           break;
2200
2201         // convert until we reach the end of the
2202         // source buffer.
2203         if(s < e)
2204         {
2205           unsigned char c = *s;
2206           unsigned char d = '?';
2207           const char *repstr = NULL;
2208           int replen = 0;
2209
2210           // check if the char is a >7bit char
2211           if(c > 127)
2212           {
2213             struct single_convert *f;
2214             int lenAdd = trailingBytesForUTF8[c];
2215             int lenStr = lenAdd+1;
2216             unsigned char *src = s;
2217
2218             do
2219             {
2220               // start each iteration with "no replacement found yet"
2221               repstr = NULL;
2222               replen = 0;
2223
2224               // search in the UTF8 conversion table of the current charset if
2225               // we have a replacement character for the char sequence starting at s
2226               BIN_SEARCH(codeset->table_sorted, 0, 255, strncmp((char *)src, (char *)codeset->table_sorted[m].utf8+1, lenStr), f);
2227
2228               if(f != NULL)
2229               {
2230                 d = f->code;
2231                 replen = -1;
2232
2233                 break;
2234               }
2235               else
2236               {
2237                 // the analysed char sequence (s) is not convertable to a
2238                 // single visible char replacement, so we normally have to put
2239                 // a ? sign as a "unknown char" sign at the very position.
2240                 //
2241                 // For convienence we, however, allow users to replace these
2242                 // UTF8 characters with char sequences that "looklike" the
2243                 // original char.
2244                 if(mapForeignChars == TRUE)
2245                   replen = mapUTF8toASCII(&repstr, src, lenStr);
2246
2247                 // call the hook only, if the internal table yielded no suitable
2248                 // replacement
2249                 if(replen == 0 && mapForeignCharsHook != NULL)
2250                 {
2251                   struct replaceMsg rmsg;
2252
2253                   rmsg.dst = (char **)&repstr;
2254                   rmsg.src = src;
2255                   rmsg.srclen = lenStr;
2256                   replen = CallHookPkt(mapForeignCharsHook, &rmsg, NULL);
2257                 }
2258
2259                 if(replen < 0)
2260                 {
2261                   D(DBF_UTF, "got UTF8 replacement (%ld)", replen);
2262
2263                   // stay in the loop as long as one replacement function delivers
2264                   // further UTF8 replacement sequences
2265                   src = (unsigned char *)repstr;
2266                 }
2267                 else if(replen == 0)
2268                 {
2269                   D(DBF_UTF, "found no ASCII replacement for UTF8 string (%ld)", replen);
2270                   repstr = NULL;
2271                 }
2272                 else
2273                   D(DBF_UTF, "got replacement string '%s' (%ld)", repstr ? repstr : "<null>", replen);
2274               }
2275             }
2276             while(replen < 0);
2277
2278             if(repstr == NULL || replen == 0)
2279             {
2280               if(replen >= 0)
2281               {
2282                 d = '?';
2283                 numConvErrors++;
2284               }
2285             }
2286
2287             s += lenAdd;
2288           }
2289           else
2290             d = c;
2291
2292           if(destHook != NULL)
2293           {
2294             if(replen > 1)
2295             {
2296               while(replen > 0)
2297               {
2298                 *b++ = *repstr;
2299                 repstr++;
2300                 i++;
2301                 replen--;
2302
2303                 if(i%(destLen-1)==0)
2304                 {
2305                   *b = '\0';
2306                   msg.len = i;
2307                   CallHookPkt(destHook, &msg, buf);
2308
2309                   b  = buf;
2310                   *b = '\0';
2311                   i  = 0;
2312                 }
2313               }
2314             }
2315             else
2316             {
2317               *b++ = replen > 0 ? *repstr : d;
2318               i++;
2319             }
2320
2321             if(i%(destLen-1)==0)
2322             {
2323               *b = '\0';
2324               msg.len = i;
2325               CallHookPkt(destHook, &msg, buf);
2326
2327               b  = buf;
2328               *b = '\0';
2329               i  = 0;
2330             }
2331           }
2332           else
2333           {
2334             if(replen > 1)
2335             {
2336               ULONG destPos = destIter-dest;
2337
2338               if(pool != NULL)
2339               {
2340                 if(sem != NULL)
2341                   ObtainSemaphore(sem);
2342
2343                 // allocate the destination buffer
2344                 dest = reallocVecPooled(pool, dest, destLen, destLen+replen-1);
2345
2346                 if(sem != NULL)
2347                   ReleaseSemaphore(sem);
2348               }
2349               else
2350                 dest = reallocArbitrateVecPooled(dest, destLen, destLen+replen-1);
2351
2352               if(dest == NULL)
2353               {
2354                 RETURN(NULL);
2355                 return NULL;
2356               }
2357
2358               destIter = dest+destPos;
2359               memcpy(destIter, repstr, replen);
2360
2361               // adjust our loop pointer and destination length
2362               destIter += replen;
2363               destLen += replen-1;
2364             }
2365             else if(replen == 1)
2366               *destIter++ = *repstr;
2367             else
2368               *destIter++ = d;
2369           }
2370
2371           s++;
2372         }
2373         else
2374           break;
2375       }
2376
2377       if(destHook != NULL)
2378       {
2379         msg.state = CSV_End;
2380         msg.len   = i;
2381         *b        = '\0';
2382         CallHookPkt(destHook,&msg,buf);
2383       }
2384       else
2385         *destIter = '\0';
2386     }
2387
2388     // let us write the number of conversion errors
2389     // to the proper variable pointer, if wanted
2390     if(numConvErrorsPtr != NULL)
2391       *numConvErrorsPtr = numConvErrors;
2392   }
2393
2394   // put the final length of our destination buffer
2395   // into the destLenPtr
2396   if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, (ULONG)NULL, attrs)) != NULL)
2397     *destLenPtr = n;
2398
2399   RETURN(dest);
2400   return dest;
2401 }
2402
2403 ///
2404 /// CodesetsUTF8CreateA()
2405 // Converts a string and a charset to an UTF8. Returns the UTF8.
2406 // If a destination hook is supplied always return 0.
2407 // If from is NULL, it returns NULL and doesn't call the hook.
2408 UTF8 *LIBFUNC
2409 CodesetsUTF8CreateA(REG(a0, struct TagItem *attrs))
2410 {
2411   UTF8   *from;
2412   UTF8   *dest;
2413   struct codeset *codeset;
2414   ULONG  fromLen, *destLenPtr;
2415   ULONG  n;
2416   int    utf;
2417
2418   ENTER();
2419
2420   dest = NULL;
2421   n    = 0;
2422
2423   if((codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
2424     codeset = defaultCodeset(TRUE);
2425   if(codeset == CodesetsBase->utf32Codeset)
2426     utf = 32;
2427   else if(codeset == CodesetsBase->utf16Codeset)
2428     utf = 16;
2429   else
2430     utf = 0;
2431
2432   from = (UTF8*)GetTagData(CSA_Source, 0, attrs);
2433   if(from)
2434   {
2435     switch(utf)
2436     {
2437       case 32:
2438         fromLen = utf32_strlen((UTF32 *)from);
2439         break;
2440
2441       case 16:
2442         fromLen = utf16_strlen((UTF16 *)from);
2443         break;
2444
2445       default:
2446         fromLen = strlen((char *)from);
2447         break;
2448     }
2449   }
2450   else
2451     fromLen = 0;
2452   fromLen = GetTagData(CSA_SourceLen, fromLen, attrs);
2453
2454   if(from != NULL && fromLen != 0)
2455   {
2456     struct convertMsg       msg;
2457     struct Hook    *hook;
2458     ULONG          destLen;
2459     int            i = 0;
2460     UBYTE          buf[256];
2461     UBYTE          *src, *destPtr = NULL, *b = NULL, c;
2462
2463     hook    = (struct Hook *)GetTagData(CSA_DestHook, 0, attrs);
2464     destLen = GetTagData(CSA_DestLen,0,attrs);
2465
2466     if(hook != NULL)
2467     {
2468       if(destLen<16 || destLen>sizeof(buf))
2469         destLen = sizeof(buf);
2470
2471       msg.state = CSV_Translating;
2472       b = buf;
2473       i = 0;
2474     }
2475     else
2476     {
2477       if((dest = (UTF8*)GetTagData(CSA_Dest, 0, attrs)) != NULL ||
2478         GetTagData(CSA_AllocIfNeeded,TRUE,attrs))
2479       {
2480         ULONG len;
2481
2482         src  = from;
2483
2484         if(utf != 0)
2485         {
2486           void *srcend = src + fromLen;
2487           UTF8 *dstlen = NULL;
2488
2489           switch(utf)
2490           {
2491             case 32:
2492               CodesetsConvertUTF32toUTF8((const UTF32 **)&src, srcend, &dstlen, NULL, 0);
2493               break;
2494             case 16:
2495               CodesetsConvertUTF16toUTF8((const UTF16 **)&src, srcend, &dstlen, NULL, 0);
2496               break;
2497           }
2498           len = (ULONG)dstlen;
2499         }
2500         else
2501         {
2502           ULONG flen = fromLen;
2503
2504           len = 0;
2505           while((c = *src++) != '\0' && flen != 0)
2506           {
2507             len += codeset->table[c].utf8[0];
2508             flen--;
2509           }
2510         }
2511         D(DBF_UTF, "Calculated output UTF-8 buffer length: %lu\n", len);
2512
2513         if(dest == NULL || (destLen<len+1))
2514         {
2515           APTR                   pool;
2516           struct SignalSemaphore *sem;
2517
2518           if((pool = (APTR)GetTagData(CSA_Pool, 0, attrs)) != NULL)
2519           {
2520             if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)) != NULL)
2521               ObtainSemaphore(sem);
2522
2523             // allocate the destination buffer
2524             dest = allocVecPooled(pool,len+1);
2525
2526             if(sem != NULL)
2527               ReleaseSemaphore(sem);
2528           }
2529           else
2530             dest = allocArbitrateVecPooled(len+1);
2531
2532           destLen  = len;
2533         }
2534
2535         if(dest == NULL)
2536         {
2537           RETURN(NULL);
2538           return NULL;
2539         }
2540       }
2541
2542       destPtr = (UBYTE*)dest;
2543     }
2544
2545     src = from;
2546     if(utf)
2547     {
2548       void *srcend = src + fromLen;
2549       UTF8 *dstend;
2550
2551       if(hook != NULL)
2552       {
2553         ULONG r;
2554
2555         dstend = b + destLen - 1;
2556         do
2557         {
2558           switch(utf)
2559           {
2560             case 32:
2561               r = CodesetsConvertUTF32toUTF8((const UTF32 **)&src, srcend, &b, dstend, 0);
2562               break;
2563             case 16:
2564               r = CodesetsConvertUTF16toUTF8((const UTF16 **)&src, srcend, &b, dstend, 0);
2565               break;
2566           }
2567           *b = 0;
2568           if(r != CSR_TargetExhausted)
2569             msg.state = CSV_End;
2570           msg.len = b-buf;
2571           CallHookPkt(hook,&msg,buf);
2572
2573           b  = buf;
2574           n += msg.len;
2575         }
2576         while(r == CSR_TargetExhausted);
2577       }
2578       else
2579       {
2580         dstend = destPtr + destLen;
2581         switch(utf)
2582         {
2583           case 32:
2584             CodesetsConvertUTF32toUTF8((const UTF32 **)&src, srcend, &destPtr, dstend, 0);
2585             break;
2586           case 16:
2587             CodesetsConvertUTF16toUTF8((const UTF16 **)&src, srcend, &destPtr, dstend, 0);
2588             break;
2589         }
2590         n = destPtr-dest;
2591       }
2592     }
2593     else
2594     {
2595       for(; fromLen && (c = *src); src++, fromLen--)
2596       {
2597         UTF8* utf8_seq;
2598
2599         for(utf8_seq = &codeset->table[c].utf8[1]; (c = *utf8_seq); utf8_seq++)
2600         {
2601           if(hook != NULL)
2602           {
2603             *b++ = c;
2604             i++;
2605
2606             if(i%(destLen-1)==0)
2607             {
2608               *b = 0;
2609               msg.len = i;
2610               CallHookPkt(hook,&msg,buf);
2611
2612               b  = buf;
2613               *b = 0;
2614               i  = 0;
2615             }
2616           }
2617           else
2618           {
2619             if(n>=destLen)
2620               break;
2621
2622             *destPtr++ = c;
2623           }
2624
2625           n++;
2626         }
2627       }
2628
2629       if(hook != NULL)
2630       {
2631         msg.state = CSV_End;
2632         msg.len   = i;
2633         *b = 0;
2634         CallHookPkt(hook,&msg,buf);
2635       }
2636       else
2637       {
2638         *destPtr = 0;
2639       }
2640     }
2641   }
2642
2643   if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)))
2644     *destLenPtr = n;
2645
2646   RETURN(dest);
2647   return dest;
2648 }
2649
2650 ///
2651 /// CodesetsIsValidUTF8()
2652 #define GOOD_UCS(c) \
2653      ((c) >= 160 && ((c) & ~0x3ff) != 0xd800 && \
2654       (c) != 0xfeff && (c) != 0xfffe && (c) != 0xffff)
2655
2656 BOOL LIBFUNC
2657 CodesetsIsValidUTF8(REG(a0, STRPTR s))
2658 {
2659   STRPTR t = s;
2660   int n;
2661
2662   ENTER();
2663
2664   while((n = parseUtf8(&t)))
2665   {
2666     if(!GOOD_UCS(n))
2667     {
2668       RETURN(FALSE);
2669       return FALSE;
2670     }
2671   }
2672
2673   RETURN(TRUE);
2674   return TRUE;
2675 }
2676
2677 ///
2678 /// CodesetsConvertStrA()
2679 // Converts a given string from one source Codeset to a given destination
2680 // codeset and returns the convert string
2681 STRPTR LIBFUNC
2682 CodesetsConvertStrA(REG(a0, struct TagItem *attrs))
2683 {
2684   struct codeset *srcCodeset;
2685   STRPTR srcStr = NULL;
2686   STRPTR dstStr = NULL;
2687   ULONG srcLen = 0;
2688   ULONG dstLen = 0;
2689
2690   ENTER();
2691
2692   // get the ptr to the src string we want to convert
2693   // from the source codeset to the dest codeset.
2694   srcStr = (STRPTR)GetTagData(CSA_Source, (ULONG)NULL, attrs);
2695
2696   // get the pointer to the codeset in which the src string is encoded
2697   if((srcCodeset = (struct codeset *)GetTagData(CSA_SourceCodeset, (ULONG)NULL, attrs)) == NULL)
2698     srcCodeset = defaultCodeset(TRUE);
2699
2700   if (srcStr != NULL)
2701   {
2702     if (srcCodeset == CodesetsBase->utf32Codeset)
2703       srcLen = utf32_strlen((UTF32 *)srcStr);
2704     else if (srcCodeset == CodesetsBase->utf16Codeset)
2705       srcLen = utf16_strlen((UTF16 *)srcStr);
2706     else
2707       srcLen = strlen(srcStr);
2708   }
2709   else
2710     srcLen = 0;
2711   srcLen = GetTagData(CSA_SourceLen, srcLen, attrs);
2712
2713   if(srcStr != NULL && srcLen > 0)
2714   {
2715     struct codeset *dstCodeset;
2716
2717     // get the pointer to the codeset in which the dst string should be encoded
2718     if((dstCodeset = (struct codeset *)GetTagData(CSA_DestCodeset, (ULONG)NULL, attrs)) == NULL)
2719       dstCodeset = defaultCodeset(TRUE);
2720
2721     D(DBF_UTF, "srcCodeset: '%s' dstCodeset: '%s'", srcCodeset->name, dstCodeset->name);
2722
2723     // check that the user didn't supplied the very same codeset
2724     // or otherwise a conversion is not required.
2725     if(srcCodeset != NULL && dstCodeset != NULL && srcCodeset != dstCodeset)
2726     {
2727       BOOL utf8Create = FALSE;
2728       BOOL strCreate = FALSE;
2729       UTF8 *utf8str;
2730       ULONG utf8strLen = 0;
2731       ULONG *destLenPtr = NULL;
2732       BOOL mapForeignChars;
2733       struct Hook *mapForeignCharsHook;
2734
2735       mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2736       mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, (ULONG)NULL, attrs);
2737
2738       // if the source codeset is UTF-8 we don't have to use the UTF8Create()
2739       // function and can directly call the UTF8ToStr() function
2740       if(srcCodeset != CodesetsBase->utf8Codeset)
2741       {
2742         struct TagItem tags[] = { { CSA_SourceCodeset,  (ULONG)srcCodeset  },
2743                                   { CSA_Source,         (ULONG)srcStr      },
2744                                   { CSA_SourceLen,      srcLen             },
2745                                   { CSA_DestLenPtr,     (ULONG)&utf8strLen },
2746                                   { TAG_DONE,           0                  } };
2747
2748         utf8str = CodesetsUTF8CreateA((struct TagItem *)&tags[0]);
2749
2750         utf8Create = TRUE;
2751       }
2752       else
2753       {
2754         utf8str = (UTF8 *)srcStr;
2755         utf8strLen = srcLen;
2756       }
2757
2758       // in case the destination codeset is UTF-8 we don't have to actually
2759       // use the UTF8ToStr() function and can immediately return our
2760       // UTF8 string
2761       if(utf8str != NULL && utf8strLen > 0 && dstCodeset != CodesetsBase->utf8Codeset)
2762       {
2763         struct TagItem tags[] = { { CSA_DestCodeset,          (ULONG)dstCodeset          },
2764                                   { CSA_Source,               (ULONG)utf8str             },
2765                                   { CSA_SourceLen,            utf8strLen                 },
2766                                   { CSA_DestLenPtr,           (ULONG)&dstLen             },
2767                                   { CSA_MapForeignChars,      mapForeignChars            },
2768                                   { CSA_MapForeignCharsHook,  (ULONG)mapForeignCharsHook },
2769                                   { TAG_DONE,                 0                          } };
2770
2771         dstStr = CodesetsUTF8ToStrA((struct TagItem *)&tags[0]);
2772
2773         strCreate = TRUE;
2774       }
2775       else
2776       {
2777         dstStr = (STRPTR)utf8str;
2778         dstLen = utf8strLen;
2779       }
2780
2781       D(DBF_UTF, "srcStr: %lx srcLen: %ld dstStr: %lx dstLen: %ld utf8create: %ld strCreate: %ld", srcStr, srcLen,
2782                                                                                                    dstStr, dstLen,
2783                                                                                                    utf8Create,
2784                                                                                                    strCreate);
2785
2786       // if everything was successfull we can go and finalize everything
2787       if(dstStr != NULL && utf8str != NULL)
2788       {
2789         // as the conversion was a two way pass we have to either free the
2790         // memory of the utf8 string or not
2791         if(utf8Create == TRUE && strCreate == TRUE)
2792           CodesetsFreeA(utf8str, NULL);
2793
2794         // if the user wants to be informed abour the length
2795         // of our destination string we store the length now in the supplied ptr.
2796         if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, (ULONG)NULL, attrs)) != NULL)
2797           *destLenPtr = dstLen;
2798
2799         D(DBF_UTF, "successfully converted string with len %ld", dstLen);
2800       }
2801       else
2802       {
2803         W(DBF_ALWAYS, "an error occurred while trying to convert a string");
2804
2805         // free all memory in case the conversion didn't work out
2806         if(utf8Create == TRUE && utf8str != NULL)
2807           CodesetsFreeA(utf8str, NULL);
2808
2809         if(strCreate == TRUE && dstStr != NULL)
2810           CodesetsFreeA(dstStr, NULL);
2811
2812         dstStr = NULL;
2813       }
2814     }
2815   }
2816
2817   RETURN(dstStr);
2818   return dstStr;
2819 }
2820
2821 ///
2822 /// CodesetsFreeVecPooledA()
2823 void LIBFUNC
2824 CodesetsFreeVecPooledA(REG(a0, APTR pool),
2825                        REG(a1, APTR mem),
2826                        REG(a2, struct TagItem *attrs))
2827 {
2828   ENTER();
2829
2830   if(pool && mem)
2831   {
2832     struct SignalSemaphore *sem;
2833
2834     if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)))
2835       ObtainSemaphore(sem);
2836
2837     freeVecPooled(pool,mem);
2838
2839     if(sem)
2840       ReleaseSemaphore(sem);
2841   }
2842
2843   LEAVE();
2844 }
2845
2846 ///
2847 /// CodesetsListCreateA()
2848 struct codesetList *LIBFUNC
2849 CodesetsListCreateA(REG(a0, struct TagItem *attrs))
2850 {
2851   struct codesetList *csList = NULL;
2852
2853   ENTER();
2854
2855   ObtainSemaphore(&CodesetsBase->poolSem);
2856
2857   // no matter what, we create a codesets list we will return to the user
2858   if((csList = allocVecPooled(CodesetsBase->pool, sizeof(struct codesetList))))
2859   {
2860     BOOL scanProgDir = TRUE;
2861     struct TagItem *tstate = attrs;
2862     struct TagItem *tag;
2863
2864     // initialize the new private codeset list and put it into a separate list
2865     NewList((struct List *)csList);
2866
2867     // first we get the path of the directory from which we go
2868     // and scan for charset tables from
2869     while((tag = NextTagItem(&tstate)))
2870     {
2871       switch(tag->ti_Tag)
2872       {
2873         case CSA_CodesetDir:
2874         {
2875           codesetsScanDir(csList, (STRPTR)tag->ti_Data);
2876
2877           scanProgDir = FALSE;
2878         }
2879         break;
2880
2881         case CSA_CodesetFile:
2882         {
2883           codesetsReadTable(csList, (STRPTR)tag->ti_Data);
2884
2885           scanProgDir = FALSE;
2886         }
2887         break;
2888
2889         case CSA_SourceCodeset:
2890         {
2891           struct codeset *cs = (struct codeset *)tag->ti_Data;
2892
2893           AddTail((struct List *)csList, (struct Node *)&cs->node);
2894
2895           scanProgDir = FALSE;
2896         }
2897         break;
2898       }
2899     }
2900
2901     // in case the user also wants us to scan PROGDIR:
2902     // we do so
2903     if(scanProgDir == TRUE)
2904       codesetsScanDir(csList, "PROGDIR:Charsets");
2905   }
2906
2907   ReleaseSemaphore(&CodesetsBase->poolSem);
2908
2909   RETURN(csList);
2910   return csList;
2911 }
2912
2913 ///
2914 /// CodesetsListDeleteA()
2915 BOOL LIBFUNC
2916 CodesetsListDeleteA(REG(a0, struct TagItem *attrs))
2917 {
2918   BOOL result = FALSE;
2919   ENTER();
2920
2921   ObtainSemaphore(&CodesetsBase->poolSem);
2922
2923   if(attrs != NULL)
2924   {
2925     BOOL freeCodesets;
2926     struct TagItem *tstate = attrs;
2927     struct TagItem *tag;
2928
2929     // check if the caller wants us also to free the codesets
2930     freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
2931
2932     // now we iterate through or tagItems and see what the
2933     // user wants to remove from the list
2934     while((tag = NextTagItem(&tstate)))
2935     {
2936       switch(tag->ti_Tag)
2937       {
2938         case CSA_CodesetList:
2939         {
2940           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
2941
2942           if(csList)
2943           {
2944             // cleanup the codesets within the list
2945             if(freeCodesets)
2946               codesetsCleanup(csList);
2947
2948             // then free the list itself
2949             freeArbitrateVecPooled(csList);
2950
2951             result = TRUE;
2952           }
2953         }
2954       }
2955     }
2956   }
2957
2958   ReleaseSemaphore(&CodesetsBase->poolSem);
2959
2960   RETURN(result);
2961   return result;
2962 }
2963
2964 ///
2965 /// CodesetsListAddA()
2966 BOOL LIBFUNC
2967 CodesetsListAddA(REG(a0, struct codesetList *csList),
2968                  REG(a1, struct TagItem *attrs))
2969 {
2970   BOOL result = FALSE;
2971   ENTER();
2972
2973   ObtainSemaphore(&CodesetsBase->poolSem);
2974
2975   if(csList != NULL && attrs != NULL)
2976   {
2977     struct TagItem *tstate = attrs;
2978     struct TagItem *tag;
2979
2980     // now we iterate through or tagItems and see if the user
2981     // wants to scan a whole directory or just adds a file.
2982     while((tag = NextTagItem(&tstate)))
2983     {
2984       switch(tag->ti_Tag)
2985       {
2986         case CSA_CodesetDir:
2987         {
2988           codesetsScanDir(csList, (STRPTR)tag->ti_Data);
2989           result = TRUE;
2990         }
2991         break;
2992
2993         case CSA_CodesetFile:
2994         {
2995           codesetsReadTable(csList, (STRPTR)tag->ti_Data);
2996           result = TRUE;
2997         }
2998         break;
2999
3000         case CSA_SourceCodeset:
3001         {
3002           struct codeset *cs = (struct codeset *)tag->ti_Data;
3003
3004           AddTail((struct List *)csList, (struct Node *)&cs->node);
3005           result = TRUE;
3006         }
3007         break;
3008       }
3009     }
3010   }
3011
3012   ReleaseSemaphore(&CodesetsBase->poolSem);
3013
3014   RETURN(result);
3015   return result;
3016 }
3017
3018 ///
3019 /// CodesetsListRemoveA()
3020 BOOL LIBFUNC
3021 CodesetsListRemoveA(REG(a0, struct TagItem *attrs))
3022 {
3023   BOOL result = FALSE;
3024   ENTER();
3025
3026   ObtainSemaphore(&CodesetsBase->poolSem);
3027
3028   if(attrs != NULL)
3029   {
3030     BOOL freeCodesets;
3031     struct TagItem *tstate = attrs;
3032     struct TagItem *tag;
3033
3034     // check if the caller wants us also to free the codesets
3035     freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3036
3037     // now we iterate through or tagItems and see what the
3038     // user wants to remove from the list
3039     while((tag = NextTagItem(&tstate)))
3040     {
3041       switch(tag->ti_Tag)
3042       {
3043         case CSA_SourceCodeset:
3044         {
3045           struct codeset *cs = (struct codeset *)tag->ti_Data;
3046
3047           if(cs)
3048           {
3049             struct MinNode *mstate = &cs->node;
3050
3051             // before we actually remove the node from its list, we
3052             // have to make sure it isn't part of our internal codesets list
3053             while(mstate->mln_Succ)
3054               mstate = mstate->mln_Succ;
3055
3056             if(mstate != CodesetsBase->codesets.list.mlh_Tail)
3057             {
3058               Remove((struct Node *)&cs->node);
3059
3060               // free all codesets data if requested.
3061               if(freeCodesets == TRUE)
3062               {
3063                 if(cs->name)             freeArbitrateVecPooled(cs->name);
3064                 if(cs->alt_name)         freeArbitrateVecPooled(cs->alt_name);
3065                 if(cs->characterization) freeArbitrateVecPooled(cs->characterization);
3066
3067                 freeArbitrateVecPooled(cs);
3068               }
3069
3070               result = TRUE;
3071             }
3072             else
3073               W(DBF_ALWAYS, "user tried to remove an internal codesets!");
3074           }
3075         }
3076         break;
3077       }
3078     }
3079   }
3080
3081   ReleaseSemaphore(&CodesetsBase->poolSem);
3082
3083   RETURN(result);
3084   return result;
3085 }
3086
3087 ///
3088
3089 /**************************************************************************/