localcharset.c

   1 /* Determine a canonical name for the current locale's character encoding.
   2
   3    Copyright (C) 2000-2006, 2008-2022 Free Software Foundation, Inc.
   4
   5    This file is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU Lesser General Public License as
   7    published by the Free Software Foundation; either version 2.1 of the
   8    License, or (at your option) any later version.
   9
  10    This file is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU Lesser General Public License for more details.
  14
  15    You should have received a copy of the GNU Lesser General Public License
  16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  17
  18 /* Written by Bruno Haible <bruno@clisp.org>.  */
  19
  20 /* Specification.  */
  21 #include "localcharset.h"
  22
  23 #include <stddef.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27
  28 #if _POSIX_C_SOURCE >= 200112L
  29 # define HAVE_LANGINFO_CODESET 1
  30 #else
  31 # define HAVE_LANGINFO_CODESET 0
  32 #endif
  33
  34 #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
  35 # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
  36 #endif
  37
  38 #if defined _WIN32 && !defined __CYGWIN__
  39 # define WINDOWS_NATIVE
  40 # include <locale.h>
  41 #endif
  42
  43 #if defined __EMX__
  44 /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
  45 # ifndef OS2
  46 #  define OS2
  47 # endif
  48 #endif
  49
  50 #if !defined WINDOWS_NATIVE
  51 # if HAVE_LANGINFO_CODESET
  52 #  include <langinfo.h>
  53 # else
  54 #  if 0 /* see comment regarding use of setlocale(), below */
  55 #   include <locale.h>
  56 #  endif
  57 # endif
  58 # ifdef __CYGWIN__
  59 #  define WIN32_LEAN_AND_MEAN
  60 #  include <windows.h>
  61 # endif
  62 #elif defined WINDOWS_NATIVE
  63 # define WIN32_LEAN_AND_MEAN
  64 # include <windows.h>
  65   /* For the use of setlocale() below, the Gnulib override in setlocale.c is
  66      not needed; see the platform lists in setlocale_null.m4.  */
  67 # undef setlocale
  68 #endif
  69 #if defined OS2
  70 # define INCL_DOS
  71 # include <os2.h>
  72 #endif
  73
  74 /* For MB_CUR_MAX_L */
  75 #if defined DARWIN7
  76 # include <xlocale.h>
  77 #endif
  78
  79 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
  80 /* On these platforms, we use a mapping from non-canonical encoding name
  81    to GNU canonical encoding name.  */
  82
  83 /* With glibc-2.1 or newer, we don't need any canonicalization,
  84    because glibc has iconv and both glibc and libiconv support all
  85    GNU canonical names directly.  */
  86 # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
  87 struct table_entry
  88 {
  89   const char alias[11+1];
  90   const char canonical[11+1];
  91 };
  92
  93 /* Table of platform-dependent mappings, sorted in ascending order.  */
  94 static const struct table_entry alias_table[] =
  95   {
  96 #  if defined __FreeBSD__                                   /* FreeBSD */
  97   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
  98     { "Big5",       "BIG5" },
  99     { "C",          "ASCII" },
 100   /*{ "CP1131",     "CP1131" },*/
 101   /*{ "CP1251",     "CP1251" },*/
 102   /*{ "CP866",      "CP866" },*/
 103   /*{ "GB18030",    "GB18030" },*/
 104   /*{ "GB2312",     "GB2312" },*/
 105   /*{ "GBK",        "GBK" },*/
 106   /*{ "ISCII-DEV",  "?" },*/
 107     { "ISO8859-1",  "ISO-8859-1" },
 108     { "ISO8859-13", "ISO-8859-13" },
 109     { "ISO8859-15", "ISO-8859-15" },
 110     { "ISO8859-2",  "ISO-8859-2" },
 111     { "ISO8859-5",  "ISO-8859-5" },
 112     { "ISO8859-7",  "ISO-8859-7" },
 113     { "ISO8859-9",  "ISO-8859-9" },
 114   /*{ "KOI8-R",     "KOI8-R" },*/
 115   /*{ "KOI8-U",     "KOI8-U" },*/
 116     { "SJIS",       "SHIFT_JIS" },
 117     { "US-ASCII",   "ASCII" },
 118     { "eucCN",      "GB2312" },
 119     { "eucJP",      "EUC-JP" },
 120     { "eucKR",      "EUC-KR" }
 121 #   define alias_table_defined
 122 #  endif
 123 #  if defined __NetBSD__                                    /* NetBSD */
 124     { "646",        "ASCII" },
 125   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
 126   /*{ "BIG5",       "BIG5" },*/
 127     { "Big5-HKSCS", "BIG5-HKSCS" },
 128   /*{ "CP1251",     "CP1251" },*/
 129   /*{ "CP866",      "CP866" },*/
 130   /*{ "GB18030",    "GB18030" },*/
 131   /*{ "GB2312",     "GB2312" },*/
 132     { "ISO8859-1",  "ISO-8859-1" },
 133     { "ISO8859-13", "ISO-8859-13" },
 134     { "ISO8859-15", "ISO-8859-15" },
 135     { "ISO8859-2",  "ISO-8859-2" },
 136     { "ISO8859-4",  "ISO-8859-4" },
 137     { "ISO8859-5",  "ISO-8859-5" },
 138     { "ISO8859-7",  "ISO-8859-7" },
 139   /*{ "KOI8-R",     "KOI8-R" },*/
 140   /*{ "KOI8-U",     "KOI8-U" },*/
 141   /*{ "PT154",      "PT154" },*/
 142     { "SJIS",       "SHIFT_JIS" },
 143     { "eucCN",      "GB2312" },
 144     { "eucJP",      "EUC-JP" },
 145     { "eucKR",      "EUC-KR" },
 146     { "eucTW",      "EUC-TW" }
 147 #   define alias_table_defined
 148 #  endif
 149 #  if defined __OpenBSD__                                   /* OpenBSD */
 150     { "646",        "ASCII" },
 151     { "ISO8859-1",  "ISO-8859-1" },
 152     { "ISO8859-13", "ISO-8859-13" },
 153     { "ISO8859-15", "ISO-8859-15" },
 154     { "ISO8859-2",  "ISO-8859-2" },
 155     { "ISO8859-4",  "ISO-8859-4" },
 156     { "ISO8859-5",  "ISO-8859-5" },
 157     { "ISO8859-7",  "ISO-8859-7" },
 158     { "US-ASCII",   "ASCII" }
 159 #   define alias_table_defined
 160 #  endif
 161 #  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
 162     /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
 163        useless:
 164        - It returns the empty string when LANG is set to a locale of the
 165          form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
 166          LC_CTYPE file.
 167        - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
 168          the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
 169        - The documentation says:
 170            "... all code that calls BSD system routines should ensure
 171             that the const *char parameters of these routines are in UTF-8
 172             encoding. All BSD system functions expect their string
 173             parameters to be in UTF-8 encoding and nothing else."
 174          It also says
 175            "An additional caveat is that string parameters for files,
 176             paths, and other file-system entities must be in canonical
 177             UTF-8. In a canonical UTF-8 Unicode string, all decomposable
 178             characters are decomposed ..."
 179          but this is not true: You can pass non-decomposed UTF-8 strings
 180          to file system functions, and it is the OS which will convert
 181          them to decomposed UTF-8 before accessing the file system.
 182        - The Apple Terminal application displays UTF-8 by default.
 183        - However, other applications are free to use different encodings:
 184          - xterm uses ISO-8859-1 by default.
 185          - TextEdit uses MacRoman by default.
 186        We prefer UTF-8 over decomposed UTF-8-MAC because one should
 187        minimize the use of decomposed Unicode. Unfortunately, through the
 188        Darwin file system, decomposed UTF-8 strings are leaked into user
 189        space nevertheless.
 190        Then there are also the locales with encodings other than US-ASCII
 191        and UTF-8. These locales can be occasionally useful to users (e.g.
 192        when grepping through ISO-8859-1 encoded text files), when all their
 193        file names are in US-ASCII.
 194      */
 195     { "ARMSCII-8",  "ARMSCII-8" },
 196     { "Big5",       "BIG5" },
 197     { "Big5HKSCS",  "BIG5-HKSCS" },
 198     { "CP1131",     "CP1131" },
 199     { "CP1251",     "CP1251" },
 200     { "CP866",      "CP866" },
 201     { "CP949",      "CP949" },
 202     { "GB18030",    "GB18030" },
 203     { "GB2312",     "GB2312" },
 204     { "GBK",        "GBK" },
 205   /*{ "ISCII-DEV",  "?" },*/
 206     { "ISO8859-1",  "ISO-8859-1" },
 207     { "ISO8859-13", "ISO-8859-13" },
 208     { "ISO8859-15", "ISO-8859-15" },
 209     { "ISO8859-2",  "ISO-8859-2" },
 210     { "ISO8859-4",  "ISO-8859-4" },
 211     { "ISO8859-5",  "ISO-8859-5" },
 212     { "ISO8859-7",  "ISO-8859-7" },
 213     { "ISO8859-9",  "ISO-8859-9" },
 214     { "KOI8-R",     "KOI8-R" },
 215     { "KOI8-U",     "KOI8-U" },
 216     { "PT154",      "PT154" },
 217     { "SJIS",       "SHIFT_JIS" },
 218     { "eucCN",      "GB2312" },
 219     { "eucJP",      "EUC-JP" },
 220     { "eucKR",      "EUC-KR" }
 221 #   define alias_table_defined
 222 #  endif
 223 #  if defined _AIX                                          /* AIX */
 224   /*{ "GBK",        "GBK" },*/
 225     { "IBM-1046",   "CP1046" },
 226     { "IBM-1124",   "CP1124" },
 227     { "IBM-1129",   "CP1129" },
 228     { "IBM-1252",   "CP1252" },
 229     { "IBM-850",    "CP850" },
 230     { "IBM-856",    "CP856" },
 231     { "IBM-921",    "ISO-8859-13" },
 232     { "IBM-922",    "CP922" },
 233     { "IBM-932",    "CP932" },
 234     { "IBM-943",    "CP943" },
 235     { "IBM-eucCN",  "GB2312" },
 236     { "IBM-eucJP",  "EUC-JP" },
 237     { "IBM-eucKR",  "EUC-KR" },
 238     { "IBM-eucTW",  "EUC-TW" },
 239     { "ISO8859-1",  "ISO-8859-1" },
 240     { "ISO8859-15", "ISO-8859-15" },
 241     { "ISO8859-2",  "ISO-8859-2" },
 242     { "ISO8859-5",  "ISO-8859-5" },
 243     { "ISO8859-6",  "ISO-8859-6" },
 244     { "ISO8859-7",  "ISO-8859-7" },
 245     { "ISO8859-8",  "ISO-8859-8" },
 246     { "ISO8859-9",  "ISO-8859-9" },
 247     { "TIS-620",    "TIS-620" },
 248   /*{ "UTF-8",      "UTF-8" },*/
 249     { "big5",       "BIG5" }
 250 #   define alias_table_defined
 251 #  endif
 252 #  if defined __hpux                                        /* HP-UX */
 253     { "SJIS",      "SHIFT_JIS" },
 254     { "arabic8",   "HP-ARABIC8" },
 255     { "big5",      "BIG5" },
 256     { "cp1251",    "CP1251" },
 257     { "eucJP",     "EUC-JP" },
 258     { "eucKR",     "EUC-KR" },
 259     { "eucTW",     "EUC-TW" },
 260     { "gb18030",   "GB18030" },
 261     { "greek8",    "HP-GREEK8" },
 262     { "hebrew8",   "HP-HEBREW8" },
 263     { "hkbig5",    "BIG5-HKSCS" },
 264     { "hp15CN",    "GB2312" },
 265     { "iso88591",  "ISO-8859-1" },
 266     { "iso885913", "ISO-8859-13" },
 267     { "iso885915", "ISO-8859-15" },
 268     { "iso88592",  "ISO-8859-2" },
 269     { "iso88594",  "ISO-8859-4" },
 270     { "iso88595",  "ISO-8859-5" },
 271     { "iso88596",  "ISO-8859-6" },
 272     { "iso88597",  "ISO-8859-7" },
 273     { "iso88598",  "ISO-8859-8" },
 274     { "iso88599",  "ISO-8859-9" },
 275     { "kana8",     "HP-KANA8" },
 276     { "koi8r",     "KOI8-R" },
 277     { "roman8",    "HP-ROMAN8" },
 278     { "tis620",    "TIS-620" },
 279     { "turkish8",  "HP-TURKISH8" },
 280     { "utf8",      "UTF-8" }
 281 #   define alias_table_defined
 282 #  endif
 283 #  if defined __sgi                                         /* IRIX */
 284     { "ISO8859-1",  "ISO-8859-1" },
 285     { "ISO8859-15", "ISO-8859-15" },
 286     { "ISO8859-2",  "ISO-8859-2" },
 287     { "ISO8859-5",  "ISO-8859-5" },
 288     { "ISO8859-7",  "ISO-8859-7" },
 289     { "ISO8859-9",  "ISO-8859-9" },
 290     { "eucCN",      "GB2312" },
 291     { "eucJP",      "EUC-JP" },
 292     { "eucKR",      "EUC-KR" },
 293     { "eucTW",      "EUC-TW" }
 294 #   define alias_table_defined
 295 #  endif
 296 #  if defined __osf__                                       /* OSF/1 */
 297   /*{ "GBK",        "GBK" },*/
 298     { "ISO8859-1",  "ISO-8859-1" },
 299     { "ISO8859-15", "ISO-8859-15" },
 300     { "ISO8859-2",  "ISO-8859-2" },
 301     { "ISO8859-4",  "ISO-8859-4" },
 302     { "ISO8859-5",  "ISO-8859-5" },
 303     { "ISO8859-7",  "ISO-8859-7" },
 304     { "ISO8859-8",  "ISO-8859-8" },
 305     { "ISO8859-9",  "ISO-8859-9" },
 306     { "KSC5601",    "CP949" },
 307     { "SJIS",       "SHIFT_JIS" },
 308     { "TACTIS",     "TIS-620" },
 309   /*{ "UTF-8",      "UTF-8" },*/
 310     { "big5",       "BIG5" },
 311     { "cp850",      "CP850" },
 312     { "dechanyu",   "DEC-HANYU" },
 313     { "dechanzi",   "GB2312" },
 314     { "deckanji",   "DEC-KANJI" },
 315     { "deckorean",  "EUC-KR" },
 316     { "eucJP",      "EUC-JP" },
 317     { "eucKR",      "EUC-KR" },
 318     { "eucTW",      "EUC-TW" },
 319     { "sdeckanji",  "EUC-JP" }
 320 #   define alias_table_defined
 321 #  endif
 322 #  if defined __sun                                         /* Solaris */
 323     { "5601",        "EUC-KR" },
 324     { "646",         "ASCII" },
 325   /*{ "BIG5",        "BIG5" },*/
 326     { "Big5-HKSCS",  "BIG5-HKSCS" },
 327     { "GB18030",     "GB18030" },
 328   /*{ "GBK",         "GBK" },*/
 329     { "ISO8859-1",   "ISO-8859-1" },
 330     { "ISO8859-11",  "TIS-620" },
 331     { "ISO8859-13",  "ISO-8859-13" },
 332     { "ISO8859-15",  "ISO-8859-15" },
 333     { "ISO8859-2",   "ISO-8859-2" },
 334     { "ISO8859-3",   "ISO-8859-3" },
 335     { "ISO8859-4",   "ISO-8859-4" },
 336     { "ISO8859-5",   "ISO-8859-5" },
 337     { "ISO8859-6",   "ISO-8859-6" },
 338     { "ISO8859-7",   "ISO-8859-7" },
 339     { "ISO8859-8",   "ISO-8859-8" },
 340     { "ISO8859-9",   "ISO-8859-9" },
 341     { "PCK",         "SHIFT_JIS" },
 342     { "TIS620.2533", "TIS-620" },
 343   /*{ "UTF-8",       "UTF-8" },*/
 344     { "ansi-1251",   "CP1251" },
 345     { "cns11643",    "EUC-TW" },
 346     { "eucJP",       "EUC-JP" },
 347     { "gb2312",      "GB2312" },
 348     { "koi8-r",      "KOI8-R" }
 349 #   define alias_table_defined
 350 #  endif
 351 #  if defined __minix                                       /* Minix */
 352     { "646", "ASCII" }
 353 #   define alias_table_defined
 354 #  endif
 355 #  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
 356     { "CP1361",  "JOHAB" },
 357     { "CP20127", "ASCII" },
 358     { "CP20866", "KOI8-R" },
 359     { "CP20936", "GB2312" },
 360     { "CP21866", "KOI8-RU" },
 361     { "CP28591", "ISO-8859-1" },
 362     { "CP28592", "ISO-8859-2" },
 363     { "CP28593", "ISO-8859-3" },
 364     { "CP28594", "ISO-8859-4" },
 365     { "CP28595", "ISO-8859-5" },
 366     { "CP28596", "ISO-8859-6" },
 367     { "CP28597", "ISO-8859-7" },
 368     { "CP28598", "ISO-8859-8" },
 369     { "CP28599", "ISO-8859-9" },
 370     { "CP28605", "ISO-8859-15" },
 371     { "CP38598", "ISO-8859-8" },
 372     { "CP51932", "EUC-JP" },
 373     { "CP51936", "GB2312" },
 374     { "CP51949", "EUC-KR" },
 375     { "CP51950", "EUC-TW" },
 376     { "CP54936", "GB18030" },
 377     { "CP65001", "UTF-8" },
 378     { "CP936",   "GBK" }
 379 #   define alias_table_defined
 380 #  endif
 381 #  if defined OS2                                           /* OS/2 */
 382     /* The list of encodings is taken from "List of OS/2 Codepages"
 383        by Alex Taylor:
 384        <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
 385        See also "__convcp() of kLIBC":
 386        <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.  */
 387     { "CP1004",        "CP1252" },
 388   /*{ "CP1041",        "CP943" },*/
 389   /*{ "CP1088",        "CP949" },*/
 390     { "CP1089",        "ISO-8859-6" },
 391   /*{ "CP1114",        "CP950" },*/
 392   /*{ "CP1115",        "GB2312" },*/
 393     { "CP1208",        "UTF-8" },
 394   /*{ "CP1380",        "GB2312" },*/
 395     { "CP1381",        "GB2312" },
 396     { "CP1383",        "GB2312" },
 397     { "CP1386",        "GBK" },
 398   /*{ "CP301",         "CP943" },*/
 399     { "CP3372",        "EUC-JP" },
 400     { "CP4946",        "CP850" },
 401   /*{ "CP5048",        "JIS_X0208-1990" },*/
 402   /*{ "CP5049",        "JIS_X0212-1990" },*/
 403   /*{ "CP5067",        "KS_C_5601-1987" },*/
 404     { "CP813",         "ISO-8859-7" },
 405     { "CP819",         "ISO-8859-1" },
 406     { "CP878",         "KOI8-R" },
 407   /*{ "CP897",         "CP943" },*/
 408     { "CP912",         "ISO-8859-2" },
 409     { "CP913",         "ISO-8859-3" },
 410     { "CP914",         "ISO-8859-4" },
 411     { "CP915",         "ISO-8859-5" },
 412     { "CP916",         "ISO-8859-8" },
 413     { "CP920",         "ISO-8859-9" },
 414     { "CP921",         "ISO-8859-13" },
 415     { "CP923",         "ISO-8859-15" },
 416   /*{ "CP941",         "CP943" },*/
 417   /*{ "CP947",         "CP950" },*/
 418   /*{ "CP951",         "CP949" },*/
 419   /*{ "CP952",         "JIS_X0208-1990" },*/
 420   /*{ "CP953",         "JIS_X0212-1990" },*/
 421     { "CP954",         "EUC-JP" },
 422     { "CP964",         "EUC-TW" },
 423     { "CP970",         "EUC-KR" },
 424   /*{ "CP971",         "KS_C_5601-1987" },*/
 425     { "IBM-1004",      "CP1252" },
 426   /*{ "IBM-1006",      "?" },*/
 427   /*{ "IBM-1008",      "?" },*/
 428   /*{ "IBM-1041",      "CP943" },*/
 429   /*{ "IBM-1051",      "?" },*/
 430   /*{ "IBM-1088",      "CP949" },*/
 431     { "IBM-1089",      "ISO-8859-6" },
 432   /*{ "IBM-1098",      "?" },*/
 433   /*{ "IBM-1114",      "CP950" },*/
 434   /*{ "IBM-1115",      "GB2312" },*/
 435   /*{ "IBM-1116",      "?" },*/
 436   /*{ "IBM-1117",      "?" },*/
 437   /*{ "IBM-1118",      "?" },*/
 438   /*{ "IBM-1119",      "?" },*/
 439     { "IBM-1124",      "CP1124" },
 440     { "IBM-1125",      "CP1125" },
 441     { "IBM-1131",      "CP1131" },
 442     { "IBM-1208",      "UTF-8" },
 443     { "IBM-1250",      "CP1250" },
 444     { "IBM-1251",      "CP1251" },
 445     { "IBM-1252",      "CP1252" },
 446     { "IBM-1253",      "CP1253" },
 447     { "IBM-1254",      "CP1254" },
 448     { "IBM-1255",      "CP1255" },
 449     { "IBM-1256",      "CP1256" },
 450     { "IBM-1257",      "CP1257" },
 451   /*{ "IBM-1275",      "?" },*/
 452   /*{ "IBM-1276",      "?" },*/
 453   /*{ "IBM-1277",      "?" },*/
 454   /*{ "IBM-1280",      "?" },*/
 455   /*{ "IBM-1281",      "?" },*/
 456   /*{ "IBM-1282",      "?" },*/
 457   /*{ "IBM-1283",      "?" },*/
 458   /*{ "IBM-1380",      "GB2312" },*/
 459     { "IBM-1381",      "GB2312" },
 460     { "IBM-1383",      "GB2312" },
 461     { "IBM-1386",      "GBK" },
 462   /*{ "IBM-301",       "CP943" },*/
 463     { "IBM-3372",      "EUC-JP" },
 464     { "IBM-367",       "ASCII" },
 465     { "IBM-437",       "CP437" },
 466     { "IBM-4946",      "CP850" },
 467   /*{ "IBM-5048",      "JIS_X0208-1990" },*/
 468   /*{ "IBM-5049",      "JIS_X0212-1990" },*/
 469   /*{ "IBM-5067",      "KS_C_5601-1987" },*/
 470     { "IBM-813",       "ISO-8859-7" },
 471     { "IBM-819",       "ISO-8859-1" },
 472     { "IBM-850",       "CP850" },
 473   /*{ "IBM-851",       "?" },*/
 474     { "IBM-852",       "CP852" },
 475     { "IBM-855",       "CP855" },
 476     { "IBM-856",       "CP856" },
 477     { "IBM-857",       "CP857" },
 478   /*{ "IBM-859",       "?" },*/
 479     { "IBM-860",       "CP860" },
 480     { "IBM-861",       "CP861" },
 481     { "IBM-862",       "CP862" },
 482     { "IBM-863",       "CP863" },
 483     { "IBM-864",       "CP864" },
 484     { "IBM-865",       "CP865" },
 485     { "IBM-866",       "CP866" },
 486   /*{ "IBM-868",       "?" },*/
 487     { "IBM-869",       "CP869" },
 488     { "IBM-874",       "CP874" },
 489     { "IBM-878",       "KOI8-R" },
 490   /*{ "IBM-895",       "?" },*/
 491   /*{ "IBM-897",       "CP943" },*/
 492   /*{ "IBM-907",       "?" },*/
 493   /*{ "IBM-909",       "?" },*/
 494     { "IBM-912",       "ISO-8859-2" },
 495     { "IBM-913",       "ISO-8859-3" },
 496     { "IBM-914",       "ISO-8859-4" },
 497     { "IBM-915",       "ISO-8859-5" },
 498     { "IBM-916",       "ISO-8859-8" },
 499     { "IBM-920",       "ISO-8859-9" },
 500     { "IBM-921",       "ISO-8859-13" },
 501     { "IBM-922",       "CP922" },
 502     { "IBM-923",       "ISO-8859-15" },
 503     { "IBM-932",       "CP932" },
 504   /*{ "IBM-941",       "CP943" },*/
 505   /*{ "IBM-942",       "?" },*/
 506     { "IBM-943",       "CP943" },
 507   /*{ "IBM-947",       "CP950" },*/
 508     { "IBM-949",       "CP949" },
 509     { "IBM-950",       "CP950" },
 510   /*{ "IBM-951",       "CP949" },*/
 511   /*{ "IBM-952",       "JIS_X0208-1990" },*/
 512   /*{ "IBM-953",       "JIS_X0212-1990" },*/
 513     { "IBM-954",       "EUC-JP" },
 514   /*{ "IBM-955",       "?" },*/
 515     { "IBM-964",       "EUC-TW" },
 516     { "IBM-970",       "EUC-KR" },
 517   /*{ "IBM-971",       "KS_C_5601-1987" },*/
 518     { "IBM-eucCN",     "GB2312" },
 519     { "IBM-eucJP",     "EUC-JP" },
 520     { "IBM-eucKR",     "EUC-KR" },
 521     { "IBM-eucTW",     "EUC-TW" },
 522     { "IBM33722",      "EUC-JP" },
 523     { "ISO8859-1",     "ISO-8859-1" },
 524     { "ISO8859-2",     "ISO-8859-2" },
 525     { "ISO8859-3",     "ISO-8859-3" },
 526     { "ISO8859-4",     "ISO-8859-4" },
 527     { "ISO8859-5",     "ISO-8859-5" },
 528     { "ISO8859-6",     "ISO-8859-6" },
 529     { "ISO8859-7",     "ISO-8859-7" },
 530     { "ISO8859-8",     "ISO-8859-8" },
 531     { "ISO8859-9",     "ISO-8859-9" },
 532   /*{ "JISX0201-1976", "JISX0201-1976" },*/
 533   /*{ "JISX0208-1978", "?" },*/
 534   /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
 535   /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
 536   /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
 537   /*{ "KSC5601-1987",  "KS_C_5601-1987" },*/
 538     { "SJIS-1",        "CP943" },
 539     { "SJIS-2",        "CP943" },
 540     { "eucJP",         "EUC-JP" },
 541     { "eucKR",         "EUC-KR" },
 542     { "eucTW-1993",    "EUC-TW" }
 543 #   define alias_table_defined
 544 #  endif
 545 #  if defined VMS                                           /* OpenVMS */
 546     /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
 547        "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
 548        section 10.7 "Handling Different Character Sets".  */
 549     { "DECHANYU",  "DEC-HANYU" },
 550     { "DECHANZI",  "GB2312" },
 551     { "DECKANJI",  "DEC-KANJI" },
 552     { "DECKOREAN", "EUC-KR" },
 553     { "ISO8859-1", "ISO-8859-1" },
 554     { "ISO8859-2", "ISO-8859-2" },
 555     { "ISO8859-5", "ISO-8859-5" },
 556     { "ISO8859-7", "ISO-8859-7" },
 557     { "ISO8859-8", "ISO-8859-8" },
 558     { "ISO8859-9", "ISO-8859-9" },
 559     { "SDECKANJI", "EUC-JP" },
 560     { "SJIS",      "SHIFT_JIS" },
 561     { "eucJP",     "EUC-JP" },
 562     { "eucTW",     "EUC-TW" }
 563 #   define alias_table_defined
 564 #  endif
 565 #  ifndef alias_table_defined
 566     /* Just a dummy entry, to avoid a C syntax error.  */
 567     { "", "" }
 568 #  endif
 569   };
 570 # endif
 571 #else
 572 /* On these platforms, we use a mapping from locale name to GNU canonical
 573    encoding name.  */
 574 struct table_entry
 575 {
 576   const char locale[17+1];
 577   const char canonical[11+1];
 578 };
 579
 580 /* Table of platform-dependent mappings, sorted in ascending order.  */
 581 static const struct table_entry locale_table[] =
 582   {
 583 # if defined __FreeBSD__                                    /* FreeBSD 4.2 */
 584     { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
 585     { "da_DK.DIS_8859-15", "ISO-8859-15" },
 586     { "da_DK.ISO_8859-1",  "ISO-8859-1" },
 587     { "de_AT.DIS_8859-15", "ISO-8859-15" },
 588     { "de_AT.ISO_8859-1",  "ISO-8859-1" },
 589     { "de_CH.DIS_8859-15", "ISO-8859-15" },
 590     { "de_CH.ISO_8859-1",  "ISO-8859-1" },
 591     { "de_DE.DIS_8859-15", "ISO-8859-15" },
 592     { "de_DE.ISO_8859-1",  "ISO-8859-1" },
 593     { "en_AU.DIS_8859-15", "ISO-8859-15" },
 594     { "en_AU.ISO_8859-1",  "ISO-8859-1" },
 595     { "en_CA.DIS_8859-15", "ISO-8859-15" },
 596     { "en_CA.ISO_8859-1",  "ISO-8859-1" },
 597     { "en_GB.DIS_8859-15", "ISO-8859-15" },
 598     { "en_GB.ISO_8859-1",  "ISO-8859-1" },
 599     { "en_US.DIS_8859-15", "ISO-8859-15" },
 600     { "en_US.ISO_8859-1",  "ISO-8859-1" },
 601     { "es_ES.DIS_8859-15", "ISO-8859-15" },
 602     { "es_ES.ISO_8859-1",  "ISO-8859-1" },
 603     { "fi_FI.DIS_8859-15", "ISO-8859-15" },
 604     { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
 605     { "fr_BE.DIS_8859-15", "ISO-8859-15" },
 606     { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
 607     { "fr_CA.DIS_8859-15", "ISO-8859-15" },
 608     { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
 609     { "fr_CH.DIS_8859-15", "ISO-8859-15" },
 610     { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
 611     { "fr_FR.DIS_8859-15", "ISO-8859-15" },
 612     { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
 613     { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
 614     { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
 615     { "is_IS.DIS_8859-15", "ISO-8859-15" },
 616     { "is_IS.ISO_8859-1",  "ISO-8859-1" },
 617     { "it_CH.DIS_8859-15", "ISO-8859-15" },
 618     { "it_CH.ISO_8859-1",  "ISO-8859-1" },
 619     { "it_IT.DIS_8859-15", "ISO-8859-15" },
 620     { "it_IT.ISO_8859-1",  "ISO-8859-1" },
 621     { "ja_JP.EUC",         "EUC-JP" },
 622     { "ja_JP.SJIS",        "SHIFT_JIS" },
 623     { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
 624     { "ko_KR.EUC",         "EUC-KR" },
 625     { "la_LN.ASCII",       "ASCII" },
 626     { "la_LN.DIS_8859-15", "ISO-8859-15" },
 627     { "la_LN.ISO_8859-1",  "ISO-8859-1" },
 628     { "la_LN.ISO_8859-2",  "ISO-8859-2" },
 629     { "la_LN.ISO_8859-4",  "ISO-8859-4" },
 630     { "lt_LN.ASCII",       "ASCII" },
 631     { "lt_LN.DIS_8859-15", "ISO-8859-15" },
 632     { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
 633     { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
 634     { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
 635     { "nl_BE.DIS_8859-15", "ISO-8859-15" },
 636     { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
 637     { "nl_NL.DIS_8859-15", "ISO-8859-15" },
 638     { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
 639     { "no_NO.DIS_8859-15", "ISO-8859-15" },
 640     { "no_NO.ISO_8859-1",  "ISO-8859-1" },
 641     { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
 642     { "pt_PT.DIS_8859-15", "ISO-8859-15" },
 643     { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
 644     { "ru_RU.CP866",       "CP866" },
 645     { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
 646     { "ru_RU.KOI8-R",      "KOI8-R" },
 647     { "ru_SU.CP866",       "CP866" },
 648     { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
 649     { "ru_SU.KOI8-R",      "KOI8-R" },
 650     { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
 651     { "sv_SE.DIS_8859-15", "ISO-8859-15" },
 652     { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
 653     { "uk_UA.KOI8-U",      "KOI8-U" },
 654     { "zh_CN.EUC",         "GB2312" },
 655     { "zh_TW.BIG5",        "BIG5" },
 656     { "zh_TW.Big5",        "BIG5" }
 657 #  define locale_table_defined
 658 # endif
 659 # if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
 660     /* The encodings given here may not all be correct.
 661        If you find that the encoding given for your language and
 662        country is not the one your DOS machine actually uses, just
 663        correct it in this file, and send a mail to
 664        Juan Manuel Guerrero <juan.guerrero@gmx.de>
 665        and <bug-gnulib@gnu.org>.  */
 666     { "C",     "ASCII" },
 667     { "ar",    "CP864" },
 668     { "ar_AE", "CP864" },
 669     { "ar_DZ", "CP864" },
 670     { "ar_EG", "CP864" },
 671     { "ar_IQ", "CP864" },
 672     { "ar_IR", "CP864" },
 673     { "ar_JO", "CP864" },
 674     { "ar_KW", "CP864" },
 675     { "ar_MA", "CP864" },
 676     { "ar_OM", "CP864" },
 677     { "ar_QA", "CP864" },
 678     { "ar_SA", "CP864" },
 679     { "ar_SY", "CP864" },
 680     { "be",    "CP866" },
 681     { "be_BE", "CP866" },
 682     { "bg",    "CP866" }, /* not CP855 ?? */
 683     { "bg_BG", "CP866" }, /* not CP855 ?? */
 684     { "ca",    "CP850" },
 685     { "ca_ES", "CP850" },
 686     { "cs",    "CP852" },
 687     { "cs_CZ", "CP852" },
 688     { "da",    "CP865" }, /* not CP850 ?? */
 689     { "da_DK", "CP865" }, /* not CP850 ?? */
 690     { "de",    "CP850" },
 691     { "de_AT", "CP850" },
 692     { "de_CH", "CP850" },
 693     { "de_DE", "CP850" },
 694     { "el",    "CP869" },
 695     { "el_GR", "CP869" },
 696     { "en",    "CP850" },
 697     { "en_AU", "CP850" }, /* not CP437 ?? */
 698     { "en_CA", "CP850" },
 699     { "en_GB", "CP850" },
 700     { "en_NZ", "CP437" },
 701     { "en_US", "CP437" },
 702     { "en_ZA", "CP850" }, /* not CP437 ?? */
 703     { "eo",    "CP850" },
 704     { "eo_EO", "CP850" },
 705     { "es",    "CP850" },
 706     { "es_AR", "CP850" },
 707     { "es_BO", "CP850" },
 708     { "es_CL", "CP850" },
 709     { "es_CO", "CP850" },
 710     { "es_CR", "CP850" },
 711     { "es_CU", "CP850" },
 712     { "es_DO", "CP850" },
 713     { "es_EC", "CP850" },
 714     { "es_ES", "CP850" },
 715     { "es_GT", "CP850" },
 716     { "es_HN", "CP850" },
 717     { "es_MX", "CP850" },
 718     { "es_NI", "CP850" },
 719     { "es_PA", "CP850" },
 720     { "es_PE", "CP850" },
 721     { "es_PY", "CP850" },
 722     { "es_SV", "CP850" },
 723     { "es_UY", "CP850" },
 724     { "es_VE", "CP850" },
 725     { "et",    "CP850" },
 726     { "et_EE", "CP850" },
 727     { "eu",    "CP850" },
 728     { "eu_ES", "CP850" },
 729     { "fi",    "CP850" },
 730     { "fi_FI", "CP850" },
 731     { "fr",    "CP850" },
 732     { "fr_BE", "CP850" },
 733     { "fr_CA", "CP850" },
 734     { "fr_CH", "CP850" },
 735     { "fr_FR", "CP850" },
 736     { "ga",    "CP850" },
 737     { "ga_IE", "CP850" },
 738     { "gd",    "CP850" },
 739     { "gd_GB", "CP850" },
 740     { "gl",    "CP850" },
 741     { "gl_ES", "CP850" },
 742     { "he",    "CP862" },
 743     { "he_IL", "CP862" },
 744     { "hr",    "CP852" },
 745     { "hr_HR", "CP852" },
 746     { "hu",    "CP852" },
 747     { "hu_HU", "CP852" },
 748     { "id",    "CP850" }, /* not CP437 ?? */
 749     { "id_ID", "CP850" }, /* not CP437 ?? */
 750     { "is",    "CP861" }, /* not CP850 ?? */
 751     { "is_IS", "CP861" }, /* not CP850 ?? */
 752     { "it",    "CP850" },
 753     { "it_CH", "CP850" },
 754     { "it_IT", "CP850" },
 755     { "ja",    "CP932" },
 756     { "ja_JP", "CP932" },
 757     { "kr",    "CP949" }, /* not CP934 ?? */
 758     { "kr_KR", "CP949" }, /* not CP934 ?? */
 759     { "lt",    "CP775" },
 760     { "lt_LT", "CP775" },
 761     { "lv",    "CP775" },
 762     { "lv_LV", "CP775" },
 763     { "mk",    "CP866" }, /* not CP855 ?? */
 764     { "mk_MK", "CP866" }, /* not CP855 ?? */
 765     { "mt",    "CP850" },
 766     { "mt_MT", "CP850" },
 767     { "nb",    "CP865" }, /* not CP850 ?? */
 768     { "nb_NO", "CP865" }, /* not CP850 ?? */
 769     { "nl",    "CP850" },
 770     { "nl_BE", "CP850" },
 771     { "nl_NL", "CP850" },
 772     { "nn",    "CP865" }, /* not CP850 ?? */
 773     { "nn_NO", "CP865" }, /* not CP850 ?? */
 774     { "no",    "CP865" }, /* not CP850 ?? */
 775     { "no_NO", "CP865" }, /* not CP850 ?? */
 776     { "pl",    "CP852" },
 777     { "pl_PL", "CP852" },
 778     { "pt",    "CP850" },
 779     { "pt_BR", "CP850" },
 780     { "pt_PT", "CP850" },
 781     { "ro",    "CP852" },
 782     { "ro_RO", "CP852" },
 783     { "ru",    "CP866" },
 784     { "ru_RU", "CP866" },
 785     { "sk",    "CP852" },
 786     { "sk_SK", "CP852" },
 787     { "sl",    "CP852" },
 788     { "sl_SI", "CP852" },
 789     { "sq",    "CP852" },
 790     { "sq_AL", "CP852" },
 791     { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
 792     { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
 793     { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
 794     { "sv",    "CP850" },
 795     { "sv_SE", "CP850" },
 796     { "th",    "CP874" },
 797     { "th_TH", "CP874" },
 798     { "tr",    "CP857" },
 799     { "tr_TR", "CP857" },
 800     { "uk",    "CP1125" },
 801     { "uk_UA", "CP1125" },
 802     { "zh_CN", "GBK" },
 803     { "zh_TW", "CP950" } /* not CP938 ?? */
 804 #  define locale_table_defined
 805 # endif
 806 # ifndef locale_table_defined
 807     /* Just a dummy entry, to avoid a C syntax error.  */
 808     { "", "" }
 809 # endif
 810   };
 811 #endif
 812
 813 /* Determine the current locale's character encoding, and canonicalize it
 814    into one of the canonical names listed below.
 815    The result must not be freed; it is statically allocated.  The result
 816    becomes invalid when setlocale() is used to change the global locale, or
 817    when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
 818    is changed; threads in multithreaded programs should not do this.
 819    If the canonical name cannot be determined, the result is a non-canonical
 820    name.  */
 821 #ifdef STATIC
 822 STATIC
 823 #endif
 824 const char *
 825 locale_charset (void)
 826 {
 827   const char *codeset;
 828
 829   /* This function must be multithread-safe.  To achieve this without using
 830      thread-local storage, we use a simple strcpy or memcpy to fill this static
 831      buffer.  Filling it through, for example, strcpy + strcat would not be
 832      guaranteed to leave the buffer's contents intact if another thread is
 833      currently accessing it.  If necessary, the contents is first assembled in
 834      a stack-allocated buffer.  */
 835 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
 836 # if HAVE_LANGINFO_CODESET
 837   /* Most systems support nl_langinfo (CODESET) nowadays.  */
 838   codeset = nl_langinfo (CODESET);
 839 #  ifdef __CYGWIN__
 840   /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
 841      returns "US-ASCII".  Return the suffix of the locale name from the
 842      environment variables (if present) or the codepage as a number.  */
 843   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
 844     {
 845       const char *locale;
 846       static char resultbuf[2 + 10 + 1];
 847
 848       locale = getenv ("LC_ALL");
 849
 850       if (locale == NULL || locale[0] == '\0')
 851         {
 852           locale = getenv ("LC_CTYPE");
 853
 854           if (locale == NULL || locale[0] == '\0')
 855             locale = getenv ("LANG");
 856         }
 857
 858       if (locale != NULL && locale[0] != '\0')
 859         {
 860           /* If the locale name contains an encoding after the dot, return
 861              it.  */
 862           const char *dot = strchr (locale, '.');
 863
 864           if (dot != NULL)
 865             {
 866               const char *modifier;
 867
 868               dot++;
 869               /* Look for the possible @... trailer and remove it, if any.  */
 870               modifier = strchr (dot, '@');
 871
 872               if (modifier == NULL)
 873                 return dot;
 874
 875               if (modifier - dot < sizeof (resultbuf))
 876                 {
 877                   /* This way of filling resultbuf is multithread-safe.  */
 878                   memcpy (resultbuf, dot, modifier - dot);
 879                   resultbuf [modifier - dot] = '\0';
 880                   return resultbuf;
 881                 }
 882             }
 883         }
 884
 885       /* The Windows API has a function returning the locale's codepage as a
 886          number: GetACP().  This encoding is used by Cygwin, unless the user
 887          has set the environment variable CYGWIN=codepage:oem (which very few
 888          people do).
 889          Output directed to console windows needs to be converted (to
 890          GetOEMCP() if the console is using a raster font, or to
 891          GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
 892          this conversion transparently (see winsup/cygwin/fhandler_console.cc),
 893          converting to GetConsoleOutputCP().  This leads to correct results,
 894          except when SetConsoleOutputCP has been called and a raster font is
 895          in use.  */
 896       {
 897         char buf[2 + 10 + 1];
 898
 899         sprintf (buf, "CP%u", GetACP ());
 900         strcpy (resultbuf, buf);
 901         codeset = resultbuf;
 902       }
 903     }
 904 #  endif
 905
 906   if (codeset == NULL)
 907     /* The canonical name cannot be determined.  */
 908     codeset = "";
 909 # elif defined WINDOWS_NATIVE
 910   char buf[2 + 10 + 1];
 911   static char resultbuf[2 + 10 + 1];
 912
 913   /* The Windows API has a function returning the locale's codepage as
 914      a number, but the value doesn't change according to what the
 915      'setlocale' call specified.  So we use it as a last resort, in
 916      case the string returned by 'setlocale' doesn't specify the
 917      codepage.  */
 918   char *current_locale = setlocale (LC_CTYPE, NULL);
 919   char *pdot = strrchr (current_locale, '.');
 920
 921   if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
 922     sprintf (buf, "CP%s", pdot + 1);
 923   else
 924     {
 925       /* The Windows API has a function returning the locale's codepage as a
 926          number: GetACP().
 927          When the output goes to a console window, it needs to be provided in
 928          GetOEMCP() encoding if the console is using a raster font, or in
 929          GetConsoleOutputCP() encoding if it is using a TrueType font.
 930          But in GUI programs and for output sent to files and pipes, GetACP()
 931          encoding is the best bet.  */
 932       sprintf (buf, "CP%u", GetACP ());
 933     }
 934
 935   /* For a locale name such as "French_France.65001", in Windows 10,
 936      setlocale now returns "French_France.utf8" instead.  */
 937   if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
 938     codeset = "UTF-8";
 939   else
 940     {
 941       strcpy (resultbuf, buf);
 942       codeset = resultbuf;
 943     }
 944 # elif defined OS2
 945   const char *locale;
 946   static char resultbuf[2 + 10 + 1];
 947   ULONG cp[3];
 948   ULONG cplen;
 949
 950   codeset = NULL;
 951
 952   /* Allow user to override the codeset, as set in the operating system,
 953      with standard language environment variables.  */
 954   locale = getenv ("LC_ALL");
 955
 956   if (locale == NULL || locale[0] == '\0')
 957     {
 958       locale = getenv ("LC_CTYPE");
 959
 960       if (locale == NULL || locale[0] == '\0')
 961         locale = getenv ("LANG");
 962     }
 963
 964   if (locale != NULL && locale[0] != '\0')
 965     {
 966       /* If the locale name contains an encoding after the dot, return it.  */
 967       const char *dot = strchr (locale, '.');
 968
 969       if (dot != NULL)
 970         {
 971           const char *modifier;
 972
 973           dot++;
 974           /* Look for the possible @... trailer and remove it, if any.  */
 975           modifier = strchr (dot, '@');
 976
 977           if (modifier == NULL)
 978             return dot;
 979
 980           if (modifier - dot < sizeof (resultbuf))
 981             {
 982               /* This way of filling resultbuf is multithread-safe.  */
 983               memcpy (resultbuf, dot, modifier - dot);
 984               resultbuf [modifier - dot] = '\0';
 985               return resultbuf;
 986             }
 987         }
 988
 989       /* For the POSIX locale, don't use the system's codepage.  */
 990       if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
 991         codeset = "";
 992     }
 993
 994   if (codeset == NULL)
 995     {
 996       /* OS/2 has a function returning the locale's codepage as a number.  */
 997       if (DosQueryCp (sizeof (cp), cp, &cplen))
 998         codeset = "";
 999       else
1000         {
1001           char buf[2 + 10 + 1];
1002
1003           sprintf (buf, "CP%u", cp[0]);
1004           strcpy (resultbuf, buf);
1005           codeset = resultbuf;
1006         }
1007     }
1008 # else
1009 #  error "Add code for other platforms here."
1010 # endif
1011
1012   /* Resolve alias.  */
1013   {
1014 # ifdef alias_table_defined
1015     /* On some platforms, UTF-8 locales are the most frequently used ones.
1016        Speed up the common case and slow down the less common cases by
1017        testing for this case first.  */
1018 #  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
1019     if (strcmp (codeset, "UTF-8") == 0)
1020       goto done_table_lookup;
1021     else
1022 #  endif
1023       {
1024         const struct table_entry * const table = alias_table;
1025         size_t const table_size = sizeof (alias_table) / sizeof (struct table_entry);
1026         /* The table is sorted.  Perform a binary search.  */
1027         size_t hi = table_size;
1028         size_t lo = 0;
1029
1030         while (lo < hi)
1031           {
1032             /* Invariant:
1033                for i < lo, strcmp (table[i].alias, codeset) < 0,
1034                for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
1035             size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1036             int cmp = strcmp (table[mid].alias, codeset);
1037
1038             if (cmp < 0)
1039               lo = mid + 1;
1040             else if (cmp > 0)
1041               hi = mid;
1042             else
1043               {
1044                 /* Found an i with
1045                      strcmp (table[i].alias, codeset) == 0.  */
1046                 codeset = table[mid].canonical;
1047                 goto done_table_lookup;
1048               }
1049           }
1050       }
1051
1052     if (0)
1053       done_table_lookup: ;
1054     else
1055 # endif
1056       {
1057         /* Did not find it in the table.  */
1058         /* On Mac OS X, all modern locales use the UTF-8 encoding.
1059            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
1060 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1061         codeset = "UTF-8";
1062 # else
1063         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
1064            the empty string as denoting "the locale's character encoding",
1065            thus GNU libiconv would call this function a second time.  */
1066         if (codeset[0] == '\0')
1067           codeset = "ASCII";
1068 # endif
1069       }
1070   }
1071 #else
1072   /* On old systems which lack it, use setlocale or getenv.  */
1073   const char *locale = NULL;
1074
1075   /* But most old systems don't have a complete set of locales.  Some
1076      (like DJGPP) have only the C locale.  Therefore we don't use setlocale
1077      here; it would return "C" when it doesn't support the locale name the
1078      user has set.  */
1079 # if 0
1080   locale = setlocale (LC_CTYPE, NULL);
1081 # endif
1082   if (locale == NULL || locale[0] == '\0')
1083     {
1084       locale = getenv ("LC_ALL");
1085
1086       if (locale == NULL || locale[0] == '\0')
1087         {
1088           locale = getenv ("LC_CTYPE");
1089
1090           if (locale == NULL || locale[0] == '\0')
1091             {
1092               locale = getenv ("LANG");
1093
1094               if (locale == NULL)
1095                 locale = "";
1096             }
1097         }
1098     }
1099
1100   /* Map locale name to canonical encoding name.  */
1101   {
1102 # ifdef locale_table_defined
1103     const struct table_entry * const table = locale_table;
1104     size_t const table_size =
1105       sizeof (locale_table) / sizeof (struct table_entry);
1106     /* The table is sorted.  Perform a binary search.  */
1107     size_t hi = table_size;
1108     size_t lo = 0;
1109
1110     while (lo < hi)
1111       {
1112         /* Invariant:
1113            for i < lo, strcmp (table[i].locale, locale) < 0,
1114            for i >= hi, strcmp (table[i].locale, locale) > 0.  */
1115         size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1116         int cmp = strcmp (table[mid].locale, locale);
1117
1118         if (cmp < 0)
1119           lo = mid + 1;
1120         else if (cmp > 0)
1121           hi = mid;
1122         else
1123           {
1124             /* Found an i with
1125                  strcmp (table[i].locale, locale) == 0.  */
1126             codeset = table[mid].canonical;
1127             goto done_table_lookup;
1128           }
1129       }
1130
1131     if (0)
1132       done_table_lookup: ;
1133     else
1134 # endif
1135       {
1136         /* Did not find it in the table.  */
1137         /* On Mac OS X, all modern locales use the UTF-8 encoding.
1138            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
1139 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1140         codeset = "UTF-8";
1141 # else
1142         /* The canonical name cannot be determined.  */
1143         /* Do not return an empty string.  GNU libc and GNU libiconv interpret
1144            the empty string as denoting "the locale's character encoding",
1145            thus GNU libiconv would call this function a second time.  */
1146         codeset = "ASCII";
1147 # endif
1148       }
1149   }
1150 #endif
1151
1152 #ifdef DARWIN7
1153   /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1154      (the default codeset) does not work when MB_CUR_MAX is 1.  */
1155   if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1156     codeset = "ASCII";
1157 #endif
1158
1159   return codeset;
1160 }