Add new files from gnulib: sh-quote and system-quote.
[libquote.git] / localcharset.c
blob89788dddbdf6cc0b559eb32085c572c13943d13d
1 /* Determine a canonical name for the current locale's character encoding.
3 Copyright (C) 2000-2006, 2008-2022 Free Software Foundation, Inc.
5 This file is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation; either version 2.1 of the
8 License, or (at your option) any later version.
10 This file is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 /* Written by Bruno Haible <bruno@clisp.org>. */
20 /* Specification. */
21 #include "localcharset.h"
23 #include <stddef.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <stdlib.h>
28 #if _POSIX_C_SOURCE >= 200112L
29 # define HAVE_LANGINFO_CODESET 1
30 #else
31 # define HAVE_LANGINFO_CODESET 0
32 #endif
34 #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
35 # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
36 #endif
38 #if defined _WIN32 && !defined __CYGWIN__
39 # define WINDOWS_NATIVE
40 # include <locale.h>
41 #endif
43 #if defined __EMX__
44 /* Assume EMX program runs on OS/2, even if compiled under DOS. */
45 # ifndef OS2
46 # define OS2
47 # endif
48 #endif
50 #if !defined WINDOWS_NATIVE
51 # if HAVE_LANGINFO_CODESET
52 # include <langinfo.h>
53 # else
54 # if 0 /* see comment regarding use of setlocale(), below */
55 # include <locale.h>
56 # endif
57 # endif
58 # ifdef __CYGWIN__
59 # define WIN32_LEAN_AND_MEAN
60 # include <windows.h>
61 # endif
62 #elif defined WINDOWS_NATIVE
63 # define WIN32_LEAN_AND_MEAN
64 # include <windows.h>
65 /* For the use of setlocale() below, the Gnulib override in setlocale.c is
66 not needed; see the platform lists in setlocale_null.m4. */
67 # undef setlocale
68 #endif
69 #if defined OS2
70 # define INCL_DOS
71 # include <os2.h>
72 #endif
74 /* For MB_CUR_MAX_L */
75 #if defined DARWIN7
76 # include <xlocale.h>
77 #endif
79 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
80 /* On these platforms, we use a mapping from non-canonical encoding name
81 to GNU canonical encoding name. */
83 /* With glibc-2.1 or newer, we don't need any canonicalization,
84 because glibc has iconv and both glibc and libiconv support all
85 GNU canonical names directly. */
86 # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
87 struct table_entry
89 const char alias[11+1];
90 const char canonical[11+1];
93 /* Table of platform-dependent mappings, sorted in ascending order. */
94 static const struct table_entry alias_table[] =
96 # if defined __FreeBSD__ /* FreeBSD */
97 /*{ "ARMSCII-8", "ARMSCII-8" },*/
98 { "Big5", "BIG5" },
99 { "C", "ASCII" },
100 /*{ "CP1131", "CP1131" },*/
101 /*{ "CP1251", "CP1251" },*/
102 /*{ "CP866", "CP866" },*/
103 /*{ "GB18030", "GB18030" },*/
104 /*{ "GB2312", "GB2312" },*/
105 /*{ "GBK", "GBK" },*/
106 /*{ "ISCII-DEV", "?" },*/
107 { "ISO8859-1", "ISO-8859-1" },
108 { "ISO8859-13", "ISO-8859-13" },
109 { "ISO8859-15", "ISO-8859-15" },
110 { "ISO8859-2", "ISO-8859-2" },
111 { "ISO8859-5", "ISO-8859-5" },
112 { "ISO8859-7", "ISO-8859-7" },
113 { "ISO8859-9", "ISO-8859-9" },
114 /*{ "KOI8-R", "KOI8-R" },*/
115 /*{ "KOI8-U", "KOI8-U" },*/
116 { "SJIS", "SHIFT_JIS" },
117 { "US-ASCII", "ASCII" },
118 { "eucCN", "GB2312" },
119 { "eucJP", "EUC-JP" },
120 { "eucKR", "EUC-KR" }
121 # define alias_table_defined
122 # endif
123 # if defined __NetBSD__ /* NetBSD */
124 { "646", "ASCII" },
125 /*{ "ARMSCII-8", "ARMSCII-8" },*/
126 /*{ "BIG5", "BIG5" },*/
127 { "Big5-HKSCS", "BIG5-HKSCS" },
128 /*{ "CP1251", "CP1251" },*/
129 /*{ "CP866", "CP866" },*/
130 /*{ "GB18030", "GB18030" },*/
131 /*{ "GB2312", "GB2312" },*/
132 { "ISO8859-1", "ISO-8859-1" },
133 { "ISO8859-13", "ISO-8859-13" },
134 { "ISO8859-15", "ISO-8859-15" },
135 { "ISO8859-2", "ISO-8859-2" },
136 { "ISO8859-4", "ISO-8859-4" },
137 { "ISO8859-5", "ISO-8859-5" },
138 { "ISO8859-7", "ISO-8859-7" },
139 /*{ "KOI8-R", "KOI8-R" },*/
140 /*{ "KOI8-U", "KOI8-U" },*/
141 /*{ "PT154", "PT154" },*/
142 { "SJIS", "SHIFT_JIS" },
143 { "eucCN", "GB2312" },
144 { "eucJP", "EUC-JP" },
145 { "eucKR", "EUC-KR" },
146 { "eucTW", "EUC-TW" }
147 # define alias_table_defined
148 # endif
149 # if defined __OpenBSD__ /* OpenBSD */
150 { "646", "ASCII" },
151 { "ISO8859-1", "ISO-8859-1" },
152 { "ISO8859-13", "ISO-8859-13" },
153 { "ISO8859-15", "ISO-8859-15" },
154 { "ISO8859-2", "ISO-8859-2" },
155 { "ISO8859-4", "ISO-8859-4" },
156 { "ISO8859-5", "ISO-8859-5" },
157 { "ISO8859-7", "ISO-8859-7" },
158 { "US-ASCII", "ASCII" }
159 # define alias_table_defined
160 # endif
161 # if defined __APPLE__ && defined __MACH__ /* Mac OS X */
162 /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
163 useless:
164 - It returns the empty string when LANG is set to a locale of the
165 form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
166 LC_CTYPE file.
167 - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
168 the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
169 - The documentation says:
170 "... all code that calls BSD system routines should ensure
171 that the const *char parameters of these routines are in UTF-8
172 encoding. All BSD system functions expect their string
173 parameters to be in UTF-8 encoding and nothing else."
174 It also says
175 "An additional caveat is that string parameters for files,
176 paths, and other file-system entities must be in canonical
177 UTF-8. In a canonical UTF-8 Unicode string, all decomposable
178 characters are decomposed ..."
179 but this is not true: You can pass non-decomposed UTF-8 strings
180 to file system functions, and it is the OS which will convert
181 them to decomposed UTF-8 before accessing the file system.
182 - The Apple Terminal application displays UTF-8 by default.
183 - However, other applications are free to use different encodings:
184 - xterm uses ISO-8859-1 by default.
185 - TextEdit uses MacRoman by default.
186 We prefer UTF-8 over decomposed UTF-8-MAC because one should
187 minimize the use of decomposed Unicode. Unfortunately, through the
188 Darwin file system, decomposed UTF-8 strings are leaked into user
189 space nevertheless.
190 Then there are also the locales with encodings other than US-ASCII
191 and UTF-8. These locales can be occasionally useful to users (e.g.
192 when grepping through ISO-8859-1 encoded text files), when all their
193 file names are in US-ASCII.
195 { "ARMSCII-8", "ARMSCII-8" },
196 { "Big5", "BIG5" },
197 { "Big5HKSCS", "BIG5-HKSCS" },
198 { "CP1131", "CP1131" },
199 { "CP1251", "CP1251" },
200 { "CP866", "CP866" },
201 { "CP949", "CP949" },
202 { "GB18030", "GB18030" },
203 { "GB2312", "GB2312" },
204 { "GBK", "GBK" },
205 /*{ "ISCII-DEV", "?" },*/
206 { "ISO8859-1", "ISO-8859-1" },
207 { "ISO8859-13", "ISO-8859-13" },
208 { "ISO8859-15", "ISO-8859-15" },
209 { "ISO8859-2", "ISO-8859-2" },
210 { "ISO8859-4", "ISO-8859-4" },
211 { "ISO8859-5", "ISO-8859-5" },
212 { "ISO8859-7", "ISO-8859-7" },
213 { "ISO8859-9", "ISO-8859-9" },
214 { "KOI8-R", "KOI8-R" },
215 { "KOI8-U", "KOI8-U" },
216 { "PT154", "PT154" },
217 { "SJIS", "SHIFT_JIS" },
218 { "eucCN", "GB2312" },
219 { "eucJP", "EUC-JP" },
220 { "eucKR", "EUC-KR" }
221 # define alias_table_defined
222 # endif
223 # if defined _AIX /* AIX */
224 /*{ "GBK", "GBK" },*/
225 { "IBM-1046", "CP1046" },
226 { "IBM-1124", "CP1124" },
227 { "IBM-1129", "CP1129" },
228 { "IBM-1252", "CP1252" },
229 { "IBM-850", "CP850" },
230 { "IBM-856", "CP856" },
231 { "IBM-921", "ISO-8859-13" },
232 { "IBM-922", "CP922" },
233 { "IBM-932", "CP932" },
234 { "IBM-943", "CP943" },
235 { "IBM-eucCN", "GB2312" },
236 { "IBM-eucJP", "EUC-JP" },
237 { "IBM-eucKR", "EUC-KR" },
238 { "IBM-eucTW", "EUC-TW" },
239 { "ISO8859-1", "ISO-8859-1" },
240 { "ISO8859-15", "ISO-8859-15" },
241 { "ISO8859-2", "ISO-8859-2" },
242 { "ISO8859-5", "ISO-8859-5" },
243 { "ISO8859-6", "ISO-8859-6" },
244 { "ISO8859-7", "ISO-8859-7" },
245 { "ISO8859-8", "ISO-8859-8" },
246 { "ISO8859-9", "ISO-8859-9" },
247 { "TIS-620", "TIS-620" },
248 /*{ "UTF-8", "UTF-8" },*/
249 { "big5", "BIG5" }
250 # define alias_table_defined
251 # endif
252 # if defined __hpux /* HP-UX */
253 { "SJIS", "SHIFT_JIS" },
254 { "arabic8", "HP-ARABIC8" },
255 { "big5", "BIG5" },
256 { "cp1251", "CP1251" },
257 { "eucJP", "EUC-JP" },
258 { "eucKR", "EUC-KR" },
259 { "eucTW", "EUC-TW" },
260 { "gb18030", "GB18030" },
261 { "greek8", "HP-GREEK8" },
262 { "hebrew8", "HP-HEBREW8" },
263 { "hkbig5", "BIG5-HKSCS" },
264 { "hp15CN", "GB2312" },
265 { "iso88591", "ISO-8859-1" },
266 { "iso885913", "ISO-8859-13" },
267 { "iso885915", "ISO-8859-15" },
268 { "iso88592", "ISO-8859-2" },
269 { "iso88594", "ISO-8859-4" },
270 { "iso88595", "ISO-8859-5" },
271 { "iso88596", "ISO-8859-6" },
272 { "iso88597", "ISO-8859-7" },
273 { "iso88598", "ISO-8859-8" },
274 { "iso88599", "ISO-8859-9" },
275 { "kana8", "HP-KANA8" },
276 { "koi8r", "KOI8-R" },
277 { "roman8", "HP-ROMAN8" },
278 { "tis620", "TIS-620" },
279 { "turkish8", "HP-TURKISH8" },
280 { "utf8", "UTF-8" }
281 # define alias_table_defined
282 # endif
283 # if defined __sgi /* IRIX */
284 { "ISO8859-1", "ISO-8859-1" },
285 { "ISO8859-15", "ISO-8859-15" },
286 { "ISO8859-2", "ISO-8859-2" },
287 { "ISO8859-5", "ISO-8859-5" },
288 { "ISO8859-7", "ISO-8859-7" },
289 { "ISO8859-9", "ISO-8859-9" },
290 { "eucCN", "GB2312" },
291 { "eucJP", "EUC-JP" },
292 { "eucKR", "EUC-KR" },
293 { "eucTW", "EUC-TW" }
294 # define alias_table_defined
295 # endif
296 # if defined __osf__ /* OSF/1 */
297 /*{ "GBK", "GBK" },*/
298 { "ISO8859-1", "ISO-8859-1" },
299 { "ISO8859-15", "ISO-8859-15" },
300 { "ISO8859-2", "ISO-8859-2" },
301 { "ISO8859-4", "ISO-8859-4" },
302 { "ISO8859-5", "ISO-8859-5" },
303 { "ISO8859-7", "ISO-8859-7" },
304 { "ISO8859-8", "ISO-8859-8" },
305 { "ISO8859-9", "ISO-8859-9" },
306 { "KSC5601", "CP949" },
307 { "SJIS", "SHIFT_JIS" },
308 { "TACTIS", "TIS-620" },
309 /*{ "UTF-8", "UTF-8" },*/
310 { "big5", "BIG5" },
311 { "cp850", "CP850" },
312 { "dechanyu", "DEC-HANYU" },
313 { "dechanzi", "GB2312" },
314 { "deckanji", "DEC-KANJI" },
315 { "deckorean", "EUC-KR" },
316 { "eucJP", "EUC-JP" },
317 { "eucKR", "EUC-KR" },
318 { "eucTW", "EUC-TW" },
319 { "sdeckanji", "EUC-JP" }
320 # define alias_table_defined
321 # endif
322 # if defined __sun /* Solaris */
323 { "5601", "EUC-KR" },
324 { "646", "ASCII" },
325 /*{ "BIG5", "BIG5" },*/
326 { "Big5-HKSCS", "BIG5-HKSCS" },
327 { "GB18030", "GB18030" },
328 /*{ "GBK", "GBK" },*/
329 { "ISO8859-1", "ISO-8859-1" },
330 { "ISO8859-11", "TIS-620" },
331 { "ISO8859-13", "ISO-8859-13" },
332 { "ISO8859-15", "ISO-8859-15" },
333 { "ISO8859-2", "ISO-8859-2" },
334 { "ISO8859-3", "ISO-8859-3" },
335 { "ISO8859-4", "ISO-8859-4" },
336 { "ISO8859-5", "ISO-8859-5" },
337 { "ISO8859-6", "ISO-8859-6" },
338 { "ISO8859-7", "ISO-8859-7" },
339 { "ISO8859-8", "ISO-8859-8" },
340 { "ISO8859-9", "ISO-8859-9" },
341 { "PCK", "SHIFT_JIS" },
342 { "TIS620.2533", "TIS-620" },
343 /*{ "UTF-8", "UTF-8" },*/
344 { "ansi-1251", "CP1251" },
345 { "cns11643", "EUC-TW" },
346 { "eucJP", "EUC-JP" },
347 { "gb2312", "GB2312" },
348 { "koi8-r", "KOI8-R" }
349 # define alias_table_defined
350 # endif
351 # if defined __minix /* Minix */
352 { "646", "ASCII" }
353 # define alias_table_defined
354 # endif
355 # if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */
356 { "CP1361", "JOHAB" },
357 { "CP20127", "ASCII" },
358 { "CP20866", "KOI8-R" },
359 { "CP20936", "GB2312" },
360 { "CP21866", "KOI8-RU" },
361 { "CP28591", "ISO-8859-1" },
362 { "CP28592", "ISO-8859-2" },
363 { "CP28593", "ISO-8859-3" },
364 { "CP28594", "ISO-8859-4" },
365 { "CP28595", "ISO-8859-5" },
366 { "CP28596", "ISO-8859-6" },
367 { "CP28597", "ISO-8859-7" },
368 { "CP28598", "ISO-8859-8" },
369 { "CP28599", "ISO-8859-9" },
370 { "CP28605", "ISO-8859-15" },
371 { "CP38598", "ISO-8859-8" },
372 { "CP51932", "EUC-JP" },
373 { "CP51936", "GB2312" },
374 { "CP51949", "EUC-KR" },
375 { "CP51950", "EUC-TW" },
376 { "CP54936", "GB18030" },
377 { "CP65001", "UTF-8" },
378 { "CP936", "GBK" }
379 # define alias_table_defined
380 # endif
381 # if defined OS2 /* OS/2 */
382 /* The list of encodings is taken from "List of OS/2 Codepages"
383 by Alex Taylor:
384 <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
385 See also "__convcp() of kLIBC":
386 <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */
387 { "CP1004", "CP1252" },
388 /*{ "CP1041", "CP943" },*/
389 /*{ "CP1088", "CP949" },*/
390 { "CP1089", "ISO-8859-6" },
391 /*{ "CP1114", "CP950" },*/
392 /*{ "CP1115", "GB2312" },*/
393 { "CP1208", "UTF-8" },
394 /*{ "CP1380", "GB2312" },*/
395 { "CP1381", "GB2312" },
396 { "CP1383", "GB2312" },
397 { "CP1386", "GBK" },
398 /*{ "CP301", "CP943" },*/
399 { "CP3372", "EUC-JP" },
400 { "CP4946", "CP850" },
401 /*{ "CP5048", "JIS_X0208-1990" },*/
402 /*{ "CP5049", "JIS_X0212-1990" },*/
403 /*{ "CP5067", "KS_C_5601-1987" },*/
404 { "CP813", "ISO-8859-7" },
405 { "CP819", "ISO-8859-1" },
406 { "CP878", "KOI8-R" },
407 /*{ "CP897", "CP943" },*/
408 { "CP912", "ISO-8859-2" },
409 { "CP913", "ISO-8859-3" },
410 { "CP914", "ISO-8859-4" },
411 { "CP915", "ISO-8859-5" },
412 { "CP916", "ISO-8859-8" },
413 { "CP920", "ISO-8859-9" },
414 { "CP921", "ISO-8859-13" },
415 { "CP923", "ISO-8859-15" },
416 /*{ "CP941", "CP943" },*/
417 /*{ "CP947", "CP950" },*/
418 /*{ "CP951", "CP949" },*/
419 /*{ "CP952", "JIS_X0208-1990" },*/
420 /*{ "CP953", "JIS_X0212-1990" },*/
421 { "CP954", "EUC-JP" },
422 { "CP964", "EUC-TW" },
423 { "CP970", "EUC-KR" },
424 /*{ "CP971", "KS_C_5601-1987" },*/
425 { "IBM-1004", "CP1252" },
426 /*{ "IBM-1006", "?" },*/
427 /*{ "IBM-1008", "?" },*/
428 /*{ "IBM-1041", "CP943" },*/
429 /*{ "IBM-1051", "?" },*/
430 /*{ "IBM-1088", "CP949" },*/
431 { "IBM-1089", "ISO-8859-6" },
432 /*{ "IBM-1098", "?" },*/
433 /*{ "IBM-1114", "CP950" },*/
434 /*{ "IBM-1115", "GB2312" },*/
435 /*{ "IBM-1116", "?" },*/
436 /*{ "IBM-1117", "?" },*/
437 /*{ "IBM-1118", "?" },*/
438 /*{ "IBM-1119", "?" },*/
439 { "IBM-1124", "CP1124" },
440 { "IBM-1125", "CP1125" },
441 { "IBM-1131", "CP1131" },
442 { "IBM-1208", "UTF-8" },
443 { "IBM-1250", "CP1250" },
444 { "IBM-1251", "CP1251" },
445 { "IBM-1252", "CP1252" },
446 { "IBM-1253", "CP1253" },
447 { "IBM-1254", "CP1254" },
448 { "IBM-1255", "CP1255" },
449 { "IBM-1256", "CP1256" },
450 { "IBM-1257", "CP1257" },
451 /*{ "IBM-1275", "?" },*/
452 /*{ "IBM-1276", "?" },*/
453 /*{ "IBM-1277", "?" },*/
454 /*{ "IBM-1280", "?" },*/
455 /*{ "IBM-1281", "?" },*/
456 /*{ "IBM-1282", "?" },*/
457 /*{ "IBM-1283", "?" },*/
458 /*{ "IBM-1380", "GB2312" },*/
459 { "IBM-1381", "GB2312" },
460 { "IBM-1383", "GB2312" },
461 { "IBM-1386", "GBK" },
462 /*{ "IBM-301", "CP943" },*/
463 { "IBM-3372", "EUC-JP" },
464 { "IBM-367", "ASCII" },
465 { "IBM-437", "CP437" },
466 { "IBM-4946", "CP850" },
467 /*{ "IBM-5048", "JIS_X0208-1990" },*/
468 /*{ "IBM-5049", "JIS_X0212-1990" },*/
469 /*{ "IBM-5067", "KS_C_5601-1987" },*/
470 { "IBM-813", "ISO-8859-7" },
471 { "IBM-819", "ISO-8859-1" },
472 { "IBM-850", "CP850" },
473 /*{ "IBM-851", "?" },*/
474 { "IBM-852", "CP852" },
475 { "IBM-855", "CP855" },
476 { "IBM-856", "CP856" },
477 { "IBM-857", "CP857" },
478 /*{ "IBM-859", "?" },*/
479 { "IBM-860", "CP860" },
480 { "IBM-861", "CP861" },
481 { "IBM-862", "CP862" },
482 { "IBM-863", "CP863" },
483 { "IBM-864", "CP864" },
484 { "IBM-865", "CP865" },
485 { "IBM-866", "CP866" },
486 /*{ "IBM-868", "?" },*/
487 { "IBM-869", "CP869" },
488 { "IBM-874", "CP874" },
489 { "IBM-878", "KOI8-R" },
490 /*{ "IBM-895", "?" },*/
491 /*{ "IBM-897", "CP943" },*/
492 /*{ "IBM-907", "?" },*/
493 /*{ "IBM-909", "?" },*/
494 { "IBM-912", "ISO-8859-2" },
495 { "IBM-913", "ISO-8859-3" },
496 { "IBM-914", "ISO-8859-4" },
497 { "IBM-915", "ISO-8859-5" },
498 { "IBM-916", "ISO-8859-8" },
499 { "IBM-920", "ISO-8859-9" },
500 { "IBM-921", "ISO-8859-13" },
501 { "IBM-922", "CP922" },
502 { "IBM-923", "ISO-8859-15" },
503 { "IBM-932", "CP932" },
504 /*{ "IBM-941", "CP943" },*/
505 /*{ "IBM-942", "?" },*/
506 { "IBM-943", "CP943" },
507 /*{ "IBM-947", "CP950" },*/
508 { "IBM-949", "CP949" },
509 { "IBM-950", "CP950" },
510 /*{ "IBM-951", "CP949" },*/
511 /*{ "IBM-952", "JIS_X0208-1990" },*/
512 /*{ "IBM-953", "JIS_X0212-1990" },*/
513 { "IBM-954", "EUC-JP" },
514 /*{ "IBM-955", "?" },*/
515 { "IBM-964", "EUC-TW" },
516 { "IBM-970", "EUC-KR" },
517 /*{ "IBM-971", "KS_C_5601-1987" },*/
518 { "IBM-eucCN", "GB2312" },
519 { "IBM-eucJP", "EUC-JP" },
520 { "IBM-eucKR", "EUC-KR" },
521 { "IBM-eucTW", "EUC-TW" },
522 { "IBM33722", "EUC-JP" },
523 { "ISO8859-1", "ISO-8859-1" },
524 { "ISO8859-2", "ISO-8859-2" },
525 { "ISO8859-3", "ISO-8859-3" },
526 { "ISO8859-4", "ISO-8859-4" },
527 { "ISO8859-5", "ISO-8859-5" },
528 { "ISO8859-6", "ISO-8859-6" },
529 { "ISO8859-7", "ISO-8859-7" },
530 { "ISO8859-8", "ISO-8859-8" },
531 { "ISO8859-9", "ISO-8859-9" },
532 /*{ "JISX0201-1976", "JISX0201-1976" },*/
533 /*{ "JISX0208-1978", "?" },*/
534 /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
535 /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
536 /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
537 /*{ "KSC5601-1987", "KS_C_5601-1987" },*/
538 { "SJIS-1", "CP943" },
539 { "SJIS-2", "CP943" },
540 { "eucJP", "EUC-JP" },
541 { "eucKR", "EUC-KR" },
542 { "eucTW-1993", "EUC-TW" }
543 # define alias_table_defined
544 # endif
545 # if defined VMS /* OpenVMS */
546 /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
547 "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
548 section 10.7 "Handling Different Character Sets". */
549 { "DECHANYU", "DEC-HANYU" },
550 { "DECHANZI", "GB2312" },
551 { "DECKANJI", "DEC-KANJI" },
552 { "DECKOREAN", "EUC-KR" },
553 { "ISO8859-1", "ISO-8859-1" },
554 { "ISO8859-2", "ISO-8859-2" },
555 { "ISO8859-5", "ISO-8859-5" },
556 { "ISO8859-7", "ISO-8859-7" },
557 { "ISO8859-8", "ISO-8859-8" },
558 { "ISO8859-9", "ISO-8859-9" },
559 { "SDECKANJI", "EUC-JP" },
560 { "SJIS", "SHIFT_JIS" },
561 { "eucJP", "EUC-JP" },
562 { "eucTW", "EUC-TW" }
563 # define alias_table_defined
564 # endif
565 # ifndef alias_table_defined
566 /* Just a dummy entry, to avoid a C syntax error. */
567 { "", "" }
568 # endif
570 # endif
571 #else
572 /* On these platforms, we use a mapping from locale name to GNU canonical
573 encoding name. */
574 struct table_entry
576 const char locale[17+1];
577 const char canonical[11+1];
580 /* Table of platform-dependent mappings, sorted in ascending order. */
581 static const struct table_entry locale_table[] =
583 # if defined __FreeBSD__ /* FreeBSD 4.2 */
584 { "cs_CZ.ISO_8859-2", "ISO-8859-2" },
585 { "da_DK.DIS_8859-15", "ISO-8859-15" },
586 { "da_DK.ISO_8859-1", "ISO-8859-1" },
587 { "de_AT.DIS_8859-15", "ISO-8859-15" },
588 { "de_AT.ISO_8859-1", "ISO-8859-1" },
589 { "de_CH.DIS_8859-15", "ISO-8859-15" },
590 { "de_CH.ISO_8859-1", "ISO-8859-1" },
591 { "de_DE.DIS_8859-15", "ISO-8859-15" },
592 { "de_DE.ISO_8859-1", "ISO-8859-1" },
593 { "en_AU.DIS_8859-15", "ISO-8859-15" },
594 { "en_AU.ISO_8859-1", "ISO-8859-1" },
595 { "en_CA.DIS_8859-15", "ISO-8859-15" },
596 { "en_CA.ISO_8859-1", "ISO-8859-1" },
597 { "en_GB.DIS_8859-15", "ISO-8859-15" },
598 { "en_GB.ISO_8859-1", "ISO-8859-1" },
599 { "en_US.DIS_8859-15", "ISO-8859-15" },
600 { "en_US.ISO_8859-1", "ISO-8859-1" },
601 { "es_ES.DIS_8859-15", "ISO-8859-15" },
602 { "es_ES.ISO_8859-1", "ISO-8859-1" },
603 { "fi_FI.DIS_8859-15", "ISO-8859-15" },
604 { "fi_FI.ISO_8859-1", "ISO-8859-1" },
605 { "fr_BE.DIS_8859-15", "ISO-8859-15" },
606 { "fr_BE.ISO_8859-1", "ISO-8859-1" },
607 { "fr_CA.DIS_8859-15", "ISO-8859-15" },
608 { "fr_CA.ISO_8859-1", "ISO-8859-1" },
609 { "fr_CH.DIS_8859-15", "ISO-8859-15" },
610 { "fr_CH.ISO_8859-1", "ISO-8859-1" },
611 { "fr_FR.DIS_8859-15", "ISO-8859-15" },
612 { "fr_FR.ISO_8859-1", "ISO-8859-1" },
613 { "hr_HR.ISO_8859-2", "ISO-8859-2" },
614 { "hu_HU.ISO_8859-2", "ISO-8859-2" },
615 { "is_IS.DIS_8859-15", "ISO-8859-15" },
616 { "is_IS.ISO_8859-1", "ISO-8859-1" },
617 { "it_CH.DIS_8859-15", "ISO-8859-15" },
618 { "it_CH.ISO_8859-1", "ISO-8859-1" },
619 { "it_IT.DIS_8859-15", "ISO-8859-15" },
620 { "it_IT.ISO_8859-1", "ISO-8859-1" },
621 { "ja_JP.EUC", "EUC-JP" },
622 { "ja_JP.SJIS", "SHIFT_JIS" },
623 { "ja_JP.Shift_JIS", "SHIFT_JIS" },
624 { "ko_KR.EUC", "EUC-KR" },
625 { "la_LN.ASCII", "ASCII" },
626 { "la_LN.DIS_8859-15", "ISO-8859-15" },
627 { "la_LN.ISO_8859-1", "ISO-8859-1" },
628 { "la_LN.ISO_8859-2", "ISO-8859-2" },
629 { "la_LN.ISO_8859-4", "ISO-8859-4" },
630 { "lt_LN.ASCII", "ASCII" },
631 { "lt_LN.DIS_8859-15", "ISO-8859-15" },
632 { "lt_LN.ISO_8859-1", "ISO-8859-1" },
633 { "lt_LN.ISO_8859-2", "ISO-8859-2" },
634 { "lt_LT.ISO_8859-4", "ISO-8859-4" },
635 { "nl_BE.DIS_8859-15", "ISO-8859-15" },
636 { "nl_BE.ISO_8859-1", "ISO-8859-1" },
637 { "nl_NL.DIS_8859-15", "ISO-8859-15" },
638 { "nl_NL.ISO_8859-1", "ISO-8859-1" },
639 { "no_NO.DIS_8859-15", "ISO-8859-15" },
640 { "no_NO.ISO_8859-1", "ISO-8859-1" },
641 { "pl_PL.ISO_8859-2", "ISO-8859-2" },
642 { "pt_PT.DIS_8859-15", "ISO-8859-15" },
643 { "pt_PT.ISO_8859-1", "ISO-8859-1" },
644 { "ru_RU.CP866", "CP866" },
645 { "ru_RU.ISO_8859-5", "ISO-8859-5" },
646 { "ru_RU.KOI8-R", "KOI8-R" },
647 { "ru_SU.CP866", "CP866" },
648 { "ru_SU.ISO_8859-5", "ISO-8859-5" },
649 { "ru_SU.KOI8-R", "KOI8-R" },
650 { "sl_SI.ISO_8859-2", "ISO-8859-2" },
651 { "sv_SE.DIS_8859-15", "ISO-8859-15" },
652 { "sv_SE.ISO_8859-1", "ISO-8859-1" },
653 { "uk_UA.KOI8-U", "KOI8-U" },
654 { "zh_CN.EUC", "GB2312" },
655 { "zh_TW.BIG5", "BIG5" },
656 { "zh_TW.Big5", "BIG5" }
657 # define locale_table_defined
658 # endif
659 # if defined __DJGPP__ /* DOS / DJGPP 2.03 */
660 /* The encodings given here may not all be correct.
661 If you find that the encoding given for your language and
662 country is not the one your DOS machine actually uses, just
663 correct it in this file, and send a mail to
664 Juan Manuel Guerrero <juan.guerrero@gmx.de>
665 and <bug-gnulib@gnu.org>. */
666 { "C", "ASCII" },
667 { "ar", "CP864" },
668 { "ar_AE", "CP864" },
669 { "ar_DZ", "CP864" },
670 { "ar_EG", "CP864" },
671 { "ar_IQ", "CP864" },
672 { "ar_IR", "CP864" },
673 { "ar_JO", "CP864" },
674 { "ar_KW", "CP864" },
675 { "ar_MA", "CP864" },
676 { "ar_OM", "CP864" },
677 { "ar_QA", "CP864" },
678 { "ar_SA", "CP864" },
679 { "ar_SY", "CP864" },
680 { "be", "CP866" },
681 { "be_BE", "CP866" },
682 { "bg", "CP866" }, /* not CP855 ?? */
683 { "bg_BG", "CP866" }, /* not CP855 ?? */
684 { "ca", "CP850" },
685 { "ca_ES", "CP850" },
686 { "cs", "CP852" },
687 { "cs_CZ", "CP852" },
688 { "da", "CP865" }, /* not CP850 ?? */
689 { "da_DK", "CP865" }, /* not CP850 ?? */
690 { "de", "CP850" },
691 { "de_AT", "CP850" },
692 { "de_CH", "CP850" },
693 { "de_DE", "CP850" },
694 { "el", "CP869" },
695 { "el_GR", "CP869" },
696 { "en", "CP850" },
697 { "en_AU", "CP850" }, /* not CP437 ?? */
698 { "en_CA", "CP850" },
699 { "en_GB", "CP850" },
700 { "en_NZ", "CP437" },
701 { "en_US", "CP437" },
702 { "en_ZA", "CP850" }, /* not CP437 ?? */
703 { "eo", "CP850" },
704 { "eo_EO", "CP850" },
705 { "es", "CP850" },
706 { "es_AR", "CP850" },
707 { "es_BO", "CP850" },
708 { "es_CL", "CP850" },
709 { "es_CO", "CP850" },
710 { "es_CR", "CP850" },
711 { "es_CU", "CP850" },
712 { "es_DO", "CP850" },
713 { "es_EC", "CP850" },
714 { "es_ES", "CP850" },
715 { "es_GT", "CP850" },
716 { "es_HN", "CP850" },
717 { "es_MX", "CP850" },
718 { "es_NI", "CP850" },
719 { "es_PA", "CP850" },
720 { "es_PE", "CP850" },
721 { "es_PY", "CP850" },
722 { "es_SV", "CP850" },
723 { "es_UY", "CP850" },
724 { "es_VE", "CP850" },
725 { "et", "CP850" },
726 { "et_EE", "CP850" },
727 { "eu", "CP850" },
728 { "eu_ES", "CP850" },
729 { "fi", "CP850" },
730 { "fi_FI", "CP850" },
731 { "fr", "CP850" },
732 { "fr_BE", "CP850" },
733 { "fr_CA", "CP850" },
734 { "fr_CH", "CP850" },
735 { "fr_FR", "CP850" },
736 { "ga", "CP850" },
737 { "ga_IE", "CP850" },
738 { "gd", "CP850" },
739 { "gd_GB", "CP850" },
740 { "gl", "CP850" },
741 { "gl_ES", "CP850" },
742 { "he", "CP862" },
743 { "he_IL", "CP862" },
744 { "hr", "CP852" },
745 { "hr_HR", "CP852" },
746 { "hu", "CP852" },
747 { "hu_HU", "CP852" },
748 { "id", "CP850" }, /* not CP437 ?? */
749 { "id_ID", "CP850" }, /* not CP437 ?? */
750 { "is", "CP861" }, /* not CP850 ?? */
751 { "is_IS", "CP861" }, /* not CP850 ?? */
752 { "it", "CP850" },
753 { "it_CH", "CP850" },
754 { "it_IT", "CP850" },
755 { "ja", "CP932" },
756 { "ja_JP", "CP932" },
757 { "kr", "CP949" }, /* not CP934 ?? */
758 { "kr_KR", "CP949" }, /* not CP934 ?? */
759 { "lt", "CP775" },
760 { "lt_LT", "CP775" },
761 { "lv", "CP775" },
762 { "lv_LV", "CP775" },
763 { "mk", "CP866" }, /* not CP855 ?? */
764 { "mk_MK", "CP866" }, /* not CP855 ?? */
765 { "mt", "CP850" },
766 { "mt_MT", "CP850" },
767 { "nb", "CP865" }, /* not CP850 ?? */
768 { "nb_NO", "CP865" }, /* not CP850 ?? */
769 { "nl", "CP850" },
770 { "nl_BE", "CP850" },
771 { "nl_NL", "CP850" },
772 { "nn", "CP865" }, /* not CP850 ?? */
773 { "nn_NO", "CP865" }, /* not CP850 ?? */
774 { "no", "CP865" }, /* not CP850 ?? */
775 { "no_NO", "CP865" }, /* not CP850 ?? */
776 { "pl", "CP852" },
777 { "pl_PL", "CP852" },
778 { "pt", "CP850" },
779 { "pt_BR", "CP850" },
780 { "pt_PT", "CP850" },
781 { "ro", "CP852" },
782 { "ro_RO", "CP852" },
783 { "ru", "CP866" },
784 { "ru_RU", "CP866" },
785 { "sk", "CP852" },
786 { "sk_SK", "CP852" },
787 { "sl", "CP852" },
788 { "sl_SI", "CP852" },
789 { "sq", "CP852" },
790 { "sq_AL", "CP852" },
791 { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */
792 { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
793 { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
794 { "sv", "CP850" },
795 { "sv_SE", "CP850" },
796 { "th", "CP874" },
797 { "th_TH", "CP874" },
798 { "tr", "CP857" },
799 { "tr_TR", "CP857" },
800 { "uk", "CP1125" },
801 { "uk_UA", "CP1125" },
802 { "zh_CN", "GBK" },
803 { "zh_TW", "CP950" } /* not CP938 ?? */
804 # define locale_table_defined
805 # endif
806 # ifndef locale_table_defined
807 /* Just a dummy entry, to avoid a C syntax error. */
808 { "", "" }
809 # endif
811 #endif
813 /* Determine the current locale's character encoding, and canonicalize it
814 into one of the canonical names listed below.
815 The result must not be freed; it is statically allocated. The result
816 becomes invalid when setlocale() is used to change the global locale, or
817 when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
818 is changed; threads in multithreaded programs should not do this.
819 If the canonical name cannot be determined, the result is a non-canonical
820 name. */
821 #ifdef STATIC
822 STATIC
823 #endif
824 const char *
825 locale_charset (void)
827 const char *codeset;
829 /* This function must be multithread-safe. To achieve this without using
830 thread-local storage, we use a simple strcpy or memcpy to fill this static
831 buffer. Filling it through, for example, strcpy + strcat would not be
832 guaranteed to leave the buffer's contents intact if another thread is
833 currently accessing it. If necessary, the contents is first assembled in
834 a stack-allocated buffer. */
835 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
836 # if HAVE_LANGINFO_CODESET
837 /* Most systems support nl_langinfo (CODESET) nowadays. */
838 codeset = nl_langinfo (CODESET);
839 # ifdef __CYGWIN__
840 /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
841 returns "US-ASCII". Return the suffix of the locale name from the
842 environment variables (if present) or the codepage as a number. */
843 if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
845 const char *locale;
846 static char resultbuf[2 + 10 + 1];
848 locale = getenv ("LC_ALL");
850 if (locale == NULL || locale[0] == '\0')
852 locale = getenv ("LC_CTYPE");
854 if (locale == NULL || locale[0] == '\0')
855 locale = getenv ("LANG");
858 if (locale != NULL && locale[0] != '\0')
860 /* If the locale name contains an encoding after the dot, return
861 it. */
862 const char *dot = strchr (locale, '.');
864 if (dot != NULL)
866 const char *modifier;
868 dot++;
869 /* Look for the possible @... trailer and remove it, if any. */
870 modifier = strchr (dot, '@');
872 if (modifier == NULL)
873 return dot;
875 if (modifier - dot < sizeof (resultbuf))
877 /* This way of filling resultbuf is multithread-safe. */
878 memcpy (resultbuf, dot, modifier - dot);
879 resultbuf [modifier - dot] = '\0';
880 return resultbuf;
885 /* The Windows API has a function returning the locale's codepage as a
886 number: GetACP(). This encoding is used by Cygwin, unless the user
887 has set the environment variable CYGWIN=codepage:oem (which very few
888 people do).
889 Output directed to console windows needs to be converted (to
890 GetOEMCP() if the console is using a raster font, or to
891 GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
892 this conversion transparently (see winsup/cygwin/fhandler_console.cc),
893 converting to GetConsoleOutputCP(). This leads to correct results,
894 except when SetConsoleOutputCP has been called and a raster font is
895 in use. */
897 char buf[2 + 10 + 1];
899 sprintf (buf, "CP%u", GetACP ());
900 strcpy (resultbuf, buf);
901 codeset = resultbuf;
904 # endif
906 if (codeset == NULL)
907 /* The canonical name cannot be determined. */
908 codeset = "";
909 # elif defined WINDOWS_NATIVE
910 char buf[2 + 10 + 1];
911 static char resultbuf[2 + 10 + 1];
913 /* The Windows API has a function returning the locale's codepage as
914 a number, but the value doesn't change according to what the
915 'setlocale' call specified. So we use it as a last resort, in
916 case the string returned by 'setlocale' doesn't specify the
917 codepage. */
918 char *current_locale = setlocale (LC_CTYPE, NULL);
919 char *pdot = strrchr (current_locale, '.');
921 if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
922 sprintf (buf, "CP%s", pdot + 1);
923 else
925 /* The Windows API has a function returning the locale's codepage as a
926 number: GetACP().
927 When the output goes to a console window, it needs to be provided in
928 GetOEMCP() encoding if the console is using a raster font, or in
929 GetConsoleOutputCP() encoding if it is using a TrueType font.
930 But in GUI programs and for output sent to files and pipes, GetACP()
931 encoding is the best bet. */
932 sprintf (buf, "CP%u", GetACP ());
935 /* For a locale name such as "French_France.65001", in Windows 10,
936 setlocale now returns "French_France.utf8" instead. */
937 if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
938 codeset = "UTF-8";
939 else
941 strcpy (resultbuf, buf);
942 codeset = resultbuf;
944 # elif defined OS2
945 const char *locale;
946 static char resultbuf[2 + 10 + 1];
947 ULONG cp[3];
948 ULONG cplen;
950 codeset = NULL;
952 /* Allow user to override the codeset, as set in the operating system,
953 with standard language environment variables. */
954 locale = getenv ("LC_ALL");
956 if (locale == NULL || locale[0] == '\0')
958 locale = getenv ("LC_CTYPE");
960 if (locale == NULL || locale[0] == '\0')
961 locale = getenv ("LANG");
964 if (locale != NULL && locale[0] != '\0')
966 /* If the locale name contains an encoding after the dot, return it. */
967 const char *dot = strchr (locale, '.');
969 if (dot != NULL)
971 const char *modifier;
973 dot++;
974 /* Look for the possible @... trailer and remove it, if any. */
975 modifier = strchr (dot, '@');
977 if (modifier == NULL)
978 return dot;
980 if (modifier - dot < sizeof (resultbuf))
982 /* This way of filling resultbuf is multithread-safe. */
983 memcpy (resultbuf, dot, modifier - dot);
984 resultbuf [modifier - dot] = '\0';
985 return resultbuf;
989 /* For the POSIX locale, don't use the system's codepage. */
990 if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
991 codeset = "";
994 if (codeset == NULL)
996 /* OS/2 has a function returning the locale's codepage as a number. */
997 if (DosQueryCp (sizeof (cp), cp, &cplen))
998 codeset = "";
999 else
1001 char buf[2 + 10 + 1];
1003 sprintf (buf, "CP%u", cp[0]);
1004 strcpy (resultbuf, buf);
1005 codeset = resultbuf;
1008 # else
1009 # error "Add code for other platforms here."
1010 # endif
1012 /* Resolve alias. */
1014 # ifdef alias_table_defined
1015 /* On some platforms, UTF-8 locales are the most frequently used ones.
1016 Speed up the common case and slow down the less common cases by
1017 testing for this case first. */
1018 # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
1019 if (strcmp (codeset, "UTF-8") == 0)
1020 goto done_table_lookup;
1021 else
1022 # endif
1024 const struct table_entry * const table = alias_table;
1025 size_t const table_size = sizeof (alias_table) / sizeof (struct table_entry);
1026 /* The table is sorted. Perform a binary search. */
1027 size_t hi = table_size;
1028 size_t lo = 0;
1030 while (lo < hi)
1032 /* Invariant:
1033 for i < lo, strcmp (table[i].alias, codeset) < 0,
1034 for i >= hi, strcmp (table[i].alias, codeset) > 0. */
1035 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1036 int cmp = strcmp (table[mid].alias, codeset);
1038 if (cmp < 0)
1039 lo = mid + 1;
1040 else if (cmp > 0)
1041 hi = mid;
1042 else
1044 /* Found an i with
1045 strcmp (table[i].alias, codeset) == 0. */
1046 codeset = table[mid].canonical;
1047 goto done_table_lookup;
1052 if (0)
1053 done_table_lookup: ;
1054 else
1055 # endif
1057 /* Did not find it in the table. */
1058 /* On Mac OS X, all modern locales use the UTF-8 encoding.
1059 BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1060 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1061 codeset = "UTF-8";
1062 # else
1063 /* Don't return an empty string. GNU libc and GNU libiconv interpret
1064 the empty string as denoting "the locale's character encoding",
1065 thus GNU libiconv would call this function a second time. */
1066 if (codeset[0] == '\0')
1067 codeset = "ASCII";
1068 # endif
1071 #else
1072 /* On old systems which lack it, use setlocale or getenv. */
1073 const char *locale = NULL;
1075 /* But most old systems don't have a complete set of locales. Some
1076 (like DJGPP) have only the C locale. Therefore we don't use setlocale
1077 here; it would return "C" when it doesn't support the locale name the
1078 user has set. */
1079 # if 0
1080 locale = setlocale (LC_CTYPE, NULL);
1081 # endif
1082 if (locale == NULL || locale[0] == '\0')
1084 locale = getenv ("LC_ALL");
1086 if (locale == NULL || locale[0] == '\0')
1088 locale = getenv ("LC_CTYPE");
1090 if (locale == NULL || locale[0] == '\0')
1092 locale = getenv ("LANG");
1094 if (locale == NULL)
1095 locale = "";
1100 /* Map locale name to canonical encoding name. */
1102 # ifdef locale_table_defined
1103 const struct table_entry * const table = locale_table;
1104 size_t const table_size =
1105 sizeof (locale_table) / sizeof (struct table_entry);
1106 /* The table is sorted. Perform a binary search. */
1107 size_t hi = table_size;
1108 size_t lo = 0;
1110 while (lo < hi)
1112 /* Invariant:
1113 for i < lo, strcmp (table[i].locale, locale) < 0,
1114 for i >= hi, strcmp (table[i].locale, locale) > 0. */
1115 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1116 int cmp = strcmp (table[mid].locale, locale);
1118 if (cmp < 0)
1119 lo = mid + 1;
1120 else if (cmp > 0)
1121 hi = mid;
1122 else
1124 /* Found an i with
1125 strcmp (table[i].locale, locale) == 0. */
1126 codeset = table[mid].canonical;
1127 goto done_table_lookup;
1131 if (0)
1132 done_table_lookup: ;
1133 else
1134 # endif
1136 /* Did not find it in the table. */
1137 /* On Mac OS X, all modern locales use the UTF-8 encoding.
1138 BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1139 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1140 codeset = "UTF-8";
1141 # else
1142 /* The canonical name cannot be determined. */
1143 /* Do not return an empty string. GNU libc and GNU libiconv interpret
1144 the empty string as denoting "the locale's character encoding",
1145 thus GNU libiconv would call this function a second time. */
1146 codeset = "ASCII";
1147 # endif
1150 #endif
1152 #ifdef DARWIN7
1153 /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1154 (the default codeset) does not work when MB_CUR_MAX is 1. */
1155 if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1156 codeset = "ASCII";
1157 #endif
1159 return codeset;