git-notify: Make the state file group writable
[monitoring-plugins.git] / gl / localcharset.c
bloba7ca94c1bbd4b174b27a8941e00eec9a53509c5c
1 /* Determine a canonical name for the current locale's character encoding.
3 Copyright (C) 2000-2006, 2008-2009 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License along
16 with this program; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 /* Written by Bruno Haible <bruno@clisp.org>. */
21 #include <config.h>
23 /* Specification. */
24 #include "localcharset.h"
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <stdlib.h>
31 #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
32 # define DARWIN7 /* Darwin 7 or newer, i.e. MacOS X 10.3 or newer */
33 #endif
35 #if defined _WIN32 || defined __WIN32__
36 # define WIN32_NATIVE
37 #endif
39 #if defined __EMX__
40 /* Assume EMX program runs on OS/2, even if compiled under DOS. */
41 # ifndef OS2
42 # define OS2
43 # endif
44 #endif
46 #if !defined WIN32_NATIVE
47 # if HAVE_LANGINFO_CODESET
48 # include <langinfo.h>
49 # else
50 # if 0 /* see comment below */
51 # include <locale.h>
52 # endif
53 # endif
54 # ifdef __CYGWIN__
55 # define WIN32_LEAN_AND_MEAN
56 # include <windows.h>
57 # endif
58 #elif defined WIN32_NATIVE
59 # define WIN32_LEAN_AND_MEAN
60 # include <windows.h>
61 #endif
62 #if defined OS2
63 # define INCL_DOS
64 # include <os2.h>
65 #endif
67 #if ENABLE_RELOCATABLE
68 # include "relocatable.h"
69 #else
70 # define relocate(pathname) (pathname)
71 #endif
73 /* Get LIBDIR. */
74 #ifndef LIBDIR
75 # include "configmake.h"
76 #endif
78 #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
79 /* Win32, Cygwin, OS/2, DOS */
80 # define ISSLASH(C) ((C) == '/' || (C) == '\\')
81 #endif
83 #ifndef DIRECTORY_SEPARATOR
84 # define DIRECTORY_SEPARATOR '/'
85 #endif
87 #ifndef ISSLASH
88 # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
89 #endif
91 #if HAVE_DECL_GETC_UNLOCKED
92 # undef getc
93 # define getc getc_unlocked
94 #endif
96 /* The following static variable is declared 'volatile' to avoid a
97 possible multithread problem in the function get_charset_aliases. If we
98 are running in a threaded environment, and if two threads initialize
99 'charset_aliases' simultaneously, both will produce the same value,
100 and everything will be ok if the two assignments to 'charset_aliases'
101 are atomic. But I don't know what will happen if the two assignments mix. */
102 #if __STDC__ != 1
103 # define volatile /* empty */
104 #endif
105 /* Pointer to the contents of the charset.alias file, if it has already been
106 read, else NULL. Its format is:
107 ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */
108 static const char * volatile charset_aliases;
110 /* Return a pointer to the contents of the charset.alias file. */
111 static const char *
112 get_charset_aliases (void)
114 const char *cp;
116 cp = charset_aliases;
117 if (cp == NULL)
119 #if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
120 FILE *fp;
121 const char *dir;
122 const char *base = "charset.alias";
123 char *file_name;
125 /* Make it possible to override the charset.alias location. This is
126 necessary for running the testsuite before "make install". */
127 dir = getenv ("CHARSETALIASDIR");
128 if (dir == NULL || dir[0] == '\0')
129 dir = relocate (LIBDIR);
131 /* Concatenate dir and base into freshly allocated file_name. */
133 size_t dir_len = strlen (dir);
134 size_t base_len = strlen (base);
135 int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
136 file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
137 if (file_name != NULL)
139 memcpy (file_name, dir, dir_len);
140 if (add_slash)
141 file_name[dir_len] = DIRECTORY_SEPARATOR;
142 memcpy (file_name + dir_len + add_slash, base, base_len + 1);
146 if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
147 /* Out of memory or file not found, treat it as empty. */
148 cp = "";
149 else
151 /* Parse the file's contents. */
152 char *res_ptr = NULL;
153 size_t res_size = 0;
155 for (;;)
157 int c;
158 char buf1[50+1];
159 char buf2[50+1];
160 size_t l1, l2;
161 char *old_res_ptr;
163 c = getc (fp);
164 if (c == EOF)
165 break;
166 if (c == '\n' || c == ' ' || c == '\t')
167 continue;
168 if (c == '#')
170 /* Skip comment, to end of line. */
172 c = getc (fp);
173 while (!(c == EOF || c == '\n'));
174 if (c == EOF)
175 break;
176 continue;
178 ungetc (c, fp);
179 if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
180 break;
181 l1 = strlen (buf1);
182 l2 = strlen (buf2);
183 old_res_ptr = res_ptr;
184 if (res_size == 0)
186 res_size = l1 + 1 + l2 + 1;
187 res_ptr = (char *) malloc (res_size + 1);
189 else
191 res_size += l1 + 1 + l2 + 1;
192 res_ptr = (char *) realloc (res_ptr, res_size + 1);
194 if (res_ptr == NULL)
196 /* Out of memory. */
197 res_size = 0;
198 if (old_res_ptr != NULL)
199 free (old_res_ptr);
200 break;
202 strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
203 strcpy (res_ptr + res_size - (l2 + 1), buf2);
205 fclose (fp);
206 if (res_size == 0)
207 cp = "";
208 else
210 *(res_ptr + res_size) = '\0';
211 cp = res_ptr;
215 if (file_name != NULL)
216 free (file_name);
218 #else
220 # if defined DARWIN7
221 /* To avoid the trouble of installing a file that is shared by many
222 GNU packages -- many packaging systems have problems with this --,
223 simply inline the aliases here. */
224 cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
225 "ISO8859-2" "\0" "ISO-8859-2" "\0"
226 "ISO8859-4" "\0" "ISO-8859-4" "\0"
227 "ISO8859-5" "\0" "ISO-8859-5" "\0"
228 "ISO8859-7" "\0" "ISO-8859-7" "\0"
229 "ISO8859-9" "\0" "ISO-8859-9" "\0"
230 "ISO8859-13" "\0" "ISO-8859-13" "\0"
231 "ISO8859-15" "\0" "ISO-8859-15" "\0"
232 "KOI8-R" "\0" "KOI8-R" "\0"
233 "KOI8-U" "\0" "KOI8-U" "\0"
234 "CP866" "\0" "CP866" "\0"
235 "CP949" "\0" "CP949" "\0"
236 "CP1131" "\0" "CP1131" "\0"
237 "CP1251" "\0" "CP1251" "\0"
238 "eucCN" "\0" "GB2312" "\0"
239 "GB2312" "\0" "GB2312" "\0"
240 "eucJP" "\0" "EUC-JP" "\0"
241 "eucKR" "\0" "EUC-KR" "\0"
242 "Big5" "\0" "BIG5" "\0"
243 "Big5HKSCS" "\0" "BIG5-HKSCS" "\0"
244 "GBK" "\0" "GBK" "\0"
245 "GB18030" "\0" "GB18030" "\0"
246 "SJIS" "\0" "SHIFT_JIS" "\0"
247 "ARMSCII-8" "\0" "ARMSCII-8" "\0"
248 "PT154" "\0" "PT154" "\0"
249 /*"ISCII-DEV" "\0" "?" "\0"*/
250 "*" "\0" "UTF-8" "\0";
251 # endif
253 # if defined VMS
254 /* To avoid the troubles of an extra file charset.alias_vms in the
255 sources of many GNU packages, simply inline the aliases here. */
256 /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
257 "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
258 section 10.7 "Handling Different Character Sets". */
259 cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
260 "ISO8859-2" "\0" "ISO-8859-2" "\0"
261 "ISO8859-5" "\0" "ISO-8859-5" "\0"
262 "ISO8859-7" "\0" "ISO-8859-7" "\0"
263 "ISO8859-8" "\0" "ISO-8859-8" "\0"
264 "ISO8859-9" "\0" "ISO-8859-9" "\0"
265 /* Japanese */
266 "eucJP" "\0" "EUC-JP" "\0"
267 "SJIS" "\0" "SHIFT_JIS" "\0"
268 "DECKANJI" "\0" "DEC-KANJI" "\0"
269 "SDECKANJI" "\0" "EUC-JP" "\0"
270 /* Chinese */
271 "eucTW" "\0" "EUC-TW" "\0"
272 "DECHANYU" "\0" "DEC-HANYU" "\0"
273 "DECHANZI" "\0" "GB2312" "\0"
274 /* Korean */
275 "DECKOREAN" "\0" "EUC-KR" "\0";
276 # endif
278 # if defined WIN32_NATIVE || defined __CYGWIN__
279 /* To avoid the troubles of installing a separate file in the same
280 directory as the DLL and of retrieving the DLL's directory at
281 runtime, simply inline the aliases here. */
283 cp = "CP936" "\0" "GBK" "\0"
284 "CP1361" "\0" "JOHAB" "\0"
285 "CP20127" "\0" "ASCII" "\0"
286 "CP20866" "\0" "KOI8-R" "\0"
287 "CP20936" "\0" "GB2312" "\0"
288 "CP21866" "\0" "KOI8-RU" "\0"
289 "CP28591" "\0" "ISO-8859-1" "\0"
290 "CP28592" "\0" "ISO-8859-2" "\0"
291 "CP28593" "\0" "ISO-8859-3" "\0"
292 "CP28594" "\0" "ISO-8859-4" "\0"
293 "CP28595" "\0" "ISO-8859-5" "\0"
294 "CP28596" "\0" "ISO-8859-6" "\0"
295 "CP28597" "\0" "ISO-8859-7" "\0"
296 "CP28598" "\0" "ISO-8859-8" "\0"
297 "CP28599" "\0" "ISO-8859-9" "\0"
298 "CP28605" "\0" "ISO-8859-15" "\0"
299 "CP38598" "\0" "ISO-8859-8" "\0"
300 "CP51932" "\0" "EUC-JP" "\0"
301 "CP51936" "\0" "GB2312" "\0"
302 "CP51949" "\0" "EUC-KR" "\0"
303 "CP51950" "\0" "EUC-TW" "\0"
304 "CP54936" "\0" "GB18030" "\0"
305 "CP65001" "\0" "UTF-8" "\0";
306 # endif
307 #endif
309 charset_aliases = cp;
312 return cp;
315 /* Determine the current locale's character encoding, and canonicalize it
316 into one of the canonical names listed in config.charset.
317 The result must not be freed; it is statically allocated.
318 If the canonical name cannot be determined, the result is a non-canonical
319 name. */
321 #ifdef STATIC
322 STATIC
323 #endif
324 const char *
325 locale_charset (void)
327 const char *codeset;
328 const char *aliases;
330 #if !(defined WIN32_NATIVE || defined OS2)
332 # if HAVE_LANGINFO_CODESET
334 /* Most systems support nl_langinfo (CODESET) nowadays. */
335 codeset = nl_langinfo (CODESET);
337 # ifdef __CYGWIN__
338 /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always
339 returns "US-ASCII". As long as this is not fixed, return the suffix
340 of the locale name from the environment variables (if present) or
341 the codepage as a number. */
342 if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
344 const char *locale;
345 static char buf[2 + 10 + 1];
347 locale = getenv ("LC_ALL");
348 if (locale == NULL || locale[0] == '\0')
350 locale = getenv ("LC_CTYPE");
351 if (locale == NULL || locale[0] == '\0')
352 locale = getenv ("LANG");
354 if (locale != NULL && locale[0] != '\0')
356 /* If the locale name contains an encoding after the dot, return
357 it. */
358 const char *dot = strchr (locale, '.');
360 if (dot != NULL)
362 const char *modifier;
364 dot++;
365 /* Look for the possible @... trailer and remove it, if any. */
366 modifier = strchr (dot, '@');
367 if (modifier == NULL)
368 return dot;
369 if (modifier - dot < sizeof (buf))
371 memcpy (buf, dot, modifier - dot);
372 buf [modifier - dot] = '\0';
373 return buf;
378 /* Woe32 has a function returning the locale's codepage as a number. */
379 sprintf (buf, "CP%u", GetACP ());
380 codeset = buf;
382 # endif
384 # else
386 /* On old systems which lack it, use setlocale or getenv. */
387 const char *locale = NULL;
389 /* But most old systems don't have a complete set of locales. Some
390 (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't
391 use setlocale here; it would return "C" when it doesn't support the
392 locale name the user has set. */
393 # if 0
394 locale = setlocale (LC_CTYPE, NULL);
395 # endif
396 if (locale == NULL || locale[0] == '\0')
398 locale = getenv ("LC_ALL");
399 if (locale == NULL || locale[0] == '\0')
401 locale = getenv ("LC_CTYPE");
402 if (locale == NULL || locale[0] == '\0')
403 locale = getenv ("LANG");
407 /* On some old systems, one used to set locale = "iso8859_1". On others,
408 you set it to "language_COUNTRY.charset". In any case, we resolve it
409 through the charset.alias file. */
410 codeset = locale;
412 # endif
414 #elif defined WIN32_NATIVE
416 static char buf[2 + 10 + 1];
418 /* Woe32 has a function returning the locale's codepage as a number. */
419 sprintf (buf, "CP%u", GetACP ());
420 codeset = buf;
422 #elif defined OS2
424 const char *locale;
425 static char buf[2 + 10 + 1];
426 ULONG cp[3];
427 ULONG cplen;
429 /* Allow user to override the codeset, as set in the operating system,
430 with standard language environment variables. */
431 locale = getenv ("LC_ALL");
432 if (locale == NULL || locale[0] == '\0')
434 locale = getenv ("LC_CTYPE");
435 if (locale == NULL || locale[0] == '\0')
436 locale = getenv ("LANG");
438 if (locale != NULL && locale[0] != '\0')
440 /* If the locale name contains an encoding after the dot, return it. */
441 const char *dot = strchr (locale, '.');
443 if (dot != NULL)
445 const char *modifier;
447 dot++;
448 /* Look for the possible @... trailer and remove it, if any. */
449 modifier = strchr (dot, '@');
450 if (modifier == NULL)
451 return dot;
452 if (modifier - dot < sizeof (buf))
454 memcpy (buf, dot, modifier - dot);
455 buf [modifier - dot] = '\0';
456 return buf;
460 /* Resolve through the charset.alias file. */
461 codeset = locale;
463 else
465 /* OS/2 has a function returning the locale's codepage as a number. */
466 if (DosQueryCp (sizeof (cp), cp, &cplen))
467 codeset = "";
468 else
470 sprintf (buf, "CP%u", cp[0]);
471 codeset = buf;
475 #endif
477 if (codeset == NULL)
478 /* The canonical name cannot be determined. */
479 codeset = "";
481 /* Resolve alias. */
482 for (aliases = get_charset_aliases ();
483 *aliases != '\0';
484 aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
485 if (strcmp (codeset, aliases) == 0
486 || (aliases[0] == '*' && aliases[1] == '\0'))
488 codeset = aliases + strlen (aliases) + 1;
489 break;
492 /* Don't return an empty string. GNU libc and GNU libiconv interpret
493 the empty string as denoting "the locale's character encoding",
494 thus GNU libiconv would call this function a second time. */
495 if (codeset[0] == '\0')
496 codeset = "ASCII";
498 return codeset;