iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / intl / gettext / loadmsgcat.c
blob0eac283169d638f48d2a1d0b1acbee355b9c868d
1 /* Load needed message catalogs.
2 Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Modified on 2007-07-02 by Kalle Olavi Niemitalo. */
20 /* Tell glibc's <string.h> to provide a prototype for mempcpy().
21 This must come before <config.h> because <config.h> may include
22 <features.h>, and once <features.h> has been included, it's too late. */
23 #ifndef _GNU_SOURCE
24 #define _GNU_SOURCE 1
25 #endif
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
31 #include <ctype.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <stdlib.h>
37 #include <string.h>
39 #if defined HAVE_UNISTD_H
40 #include <unistd.h>
41 #endif
44 #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP)
45 #include <sys/mman.h>
46 /* Use a custom macro instead of overloading HAVE_MMAP, because the
47 * following #include directives may cause "config.h" to be included
48 * again (bug 960). It might be good to remove #include "config.h"
49 * directives from header files and keep them in *.c files only, but
50 * that seems too risky for the stable elinks-0.11 branch. */
51 #undef LOADMSGCAT_USE_MMAP
52 #define LOADMSGCAT_USE_MMAP 1
53 #else
54 #undef LOADMSGCAT_USE_MMAP
55 #endif
57 #include "elinks.h"
59 #include "intl/gettext/gettext.h"
60 #include "intl/gettext/gettextP.h"
61 #include "main/main.h"
62 #include "util/memory.h"
63 #include "util/string.h"
66 /* For systems that distinguish between text and binary I/O.
67 O_BINARY is usually declared in <fcntl.h>. */
68 #if !defined O_BINARY && defined _O_BINARY
69 /* For MSC-compatible compilers. */
70 #define O_BINARY _O_BINARY
71 #define O_TEXT _O_TEXT
72 #endif
73 #ifdef __BEOS__
74 /* BeOS 5 has O_BINARY and O_TEXT, but they have no effect. */
75 #undef O_BINARY
76 #undef O_TEXT
77 #endif
78 /* On reasonable systems, binary I/O is the default. */
79 #ifndef O_BINARY
80 #define O_BINARY 0
81 #endif
83 /* We need a sign, whether a new catalog was loaded, which can be associated
84 with all translations. This is important if the translations are
85 cached by one of GCC's features. */
86 int _nl_msg_cat_cntr;
89 /* For compilers without support for ISO C 99 struct/union initializers:
90 Initialization at run-time. */
92 static struct expression plvar;
93 static struct expression plone;
94 static struct expression germanic_plural;
96 static void
97 init_germanic_plural(void)
99 if (plone.val.num == 0) {
100 plvar.nargs = 0;
101 plvar.operation = var;
103 plone.nargs = 0;
104 plone.operation = num;
105 plone.val.num = 1;
107 germanic_plural.nargs = 2;
108 germanic_plural.operation = not_equal;
109 germanic_plural.val.args[0] = &plvar;
110 germanic_plural.val.args[1] = &plone;
114 #define INIT_GERMANIC_PLURAL() init_germanic_plural ()
116 /* Initialize the codeset dependent parts of an opened message catalog.
117 Return the header entry. */
118 const unsigned char *
119 _nl_init_domain_conv(struct loaded_l10nfile *domain_file,
120 struct loaded_domain *domain,
121 struct binding *domainbinding)
123 /* Find out about the character set the file is encoded with.
124 This can be found (in textual form) in the entry "". If this
125 entry does not exist or if this does not contain the `charset='
126 information, we will assume the charset matches the one the
127 current locale and we don't have to perform any conversion. */
128 unsigned char *nullentry;
129 size_t nullentrylen;
131 /* Preinitialize fields, to avoid recursion during _nl_find_msg. */
132 domain->codeset_cntr =
133 (domainbinding != NULL ? domainbinding->codeset_cntr : 0);
134 #if HAVE_ICONV
135 domain->conv = (iconv_t) - 1;
136 #endif
137 domain->conv_tab = NULL;
139 /* Get the header entry. */
140 nullentry = _nl_find_msg(domain_file, domainbinding, "", &nullentrylen);
142 if (nullentry != NULL) {
143 #if HAVE_ICONV
144 const unsigned char *charsetstr;
146 charsetstr = strstr(nullentry, "charset=");
147 if (charsetstr != NULL) {
148 size_t len;
149 unsigned char *charset;
150 const unsigned char *outcharset;
152 charsetstr += strlen("charset=");
153 len = strcspn(charsetstr, " \t\n");
155 charset = (unsigned char *) fmem_alloc(len + 1);
156 *((unsigned char *) mempcpy(charset, charsetstr, len)) = '\0';
158 /* The output charset should normally be determined by the
159 locale. But sometimes the locale is not used or not correctly
160 set up, so we provide a possibility for the user to override
161 this. Moreover, the value specified through
162 bind_textdomain_codeset overrides both. */
163 if (domainbinding != NULL
164 && domainbinding->codeset != NULL)
165 outcharset = domainbinding->codeset;
166 else {
167 outcharset = getenv("OUTPUT_CHARSET");
168 if (outcharset == NULL || outcharset[0] == '\0') {
169 extern const unsigned char *elinks_locale_charset(void);
171 outcharset = elinks_locale_charset();
175 /* When using GNU libiconv, we want to use transliteration. */
176 #if _LIBICONV_VERSION >= 0x0105
177 len = strlen(outcharset);
179 unsigned char *tmp = (unsigned char *) fmem_alloc(len + 10 + 1);
181 memcpy(tmp, outcharset, len);
182 memcpy(tmp + len, "//TRANSLIT", 10 + 1);
183 outcharset = tmp;
185 #endif
186 domain->conv = iconv_open(outcharset, charset);
187 #if _LIBICONV_VERSION >= 0x0105
188 fmem_free((void *) outcharset);
189 #endif
191 fmem_free(charset);
193 #endif /* HAVE_ICONV */
196 return nullentry;
199 /* Frees the codeset dependent parts of an opened message catalog. */
200 void
201 _nl_free_domain_conv(struct loaded_domain *domain)
203 if (domain->conv_tab != NULL && domain->conv_tab != (unsigned char **) -1)
204 free(domain->conv_tab);
205 #if HAVE_ICONV
206 if (domain->conv != (iconv_t) - 1)
207 iconv_close(domain->conv);
208 #endif
211 /* We cannot use our memory functions here because of circular library
212 * dependencies. */
214 /* This is hacked for ELinks - we want to look up for the translations at the
215 * correct place even if we are being ran from the source/build tree. */
216 static struct string *
217 add_filename_to_string(struct string *str, struct loaded_l10nfile *domain_file)
219 unsigned char *slash = strrchr(program.path, '/');
220 size_t dirnamelen = (slash ? slash - program.path + 1 : 0);
222 /* Check if elinks is being run from the source tree. */
223 if (dirnamelen < 4
224 || strncmp(program.path + dirnamelen - 4, "src", 3))
225 return NULL;
227 if ((dirnamelen && !add_bytes_to_string(str, program.path, dirnamelen))
228 || !add_to_string(str, "../po/")
229 || !add_bytes_to_string(str,
230 (unsigned char *) domain_file->langdirname,
231 domain_file->langdirnamelen)
232 || !add_to_string(str, ".gmo"))
233 return NULL;
235 return str;
238 /* Load the message catalogs specified by FILENAME. If it is no valid
239 message catalog do nothing. */
240 void
241 _nl_load_domain(struct loaded_l10nfile *domain_file,
242 struct binding *domainbinding)
244 int fd = -1;
245 size_t size;
246 struct stat st;
247 struct mo_file_header *data = (struct mo_file_header *) -1;
248 int use_mmap = 0;
249 struct loaded_domain *domain;
250 const unsigned char *nullentry;
252 domain_file->decided = 1;
253 domain_file->data = NULL;
255 /* Note that it would be useless to store domainbinding in domain_file
256 because domainbinding might be == NULL now but != NULL later (after
257 a call to bind_textdomain_codeset). */
260 struct string filename;
262 if (init_string(&filename)
263 && add_filename_to_string(&filename, domain_file)) {
264 fd = open(filename.source, O_RDONLY | O_BINARY);
267 done_string(&filename);
269 if (fd != -1)
270 goto source_success;
273 /* If the record does not represent a valid locale the FILENAME
274 might be NULL. This can happen when according to the given
275 specification the locale file name is different for XPG and CEN
276 syntax. */
277 if (domain_file->filename == NULL)
278 return;
280 /* Try to open the addressed file. */
281 fd = open(domain_file->filename, O_RDONLY | O_BINARY);
282 if (fd == -1)
283 return;
285 source_success:
287 /* We must know about the size of the file. */
288 if (fstat(fd, &st) != 0
289 || (size = (size_t) st.st_size) != st.st_size
290 || (size < sizeof(struct mo_file_header))) {
291 /* Something went wrong. */
292 close(fd);
293 return;
295 #ifdef LOADMSGCAT_USE_MMAP
296 /* Now we are ready to load the file. If mmap() is available we try
297 this first. If not available or it failed we try to load it. */
298 data = (struct mo_file_header *) mmap(NULL, size, PROT_READ,
299 MAP_PRIVATE, fd, 0);
301 if (data != (struct mo_file_header *) -1) {
302 /* mmap() call was successful. */
303 close(fd);
304 use_mmap = 1;
306 #endif
308 /* If the data is not yet available (i.e. mmap'ed) we try to load
309 it manually. */
310 if (data == (struct mo_file_header *) -1) {
311 size_t to_read;
312 unsigned char *read_ptr;
314 data = (struct mo_file_header *) malloc(size);
315 if (data == NULL)
316 return;
318 to_read = size;
319 read_ptr = (unsigned char *) data;
320 do {
321 ssize_t nb = safe_read(fd, read_ptr, to_read);
323 if (nb <= 0) {
324 close(fd);
325 return;
327 read_ptr += nb;
328 to_read -= nb;
329 } while (to_read > 0);
331 close(fd);
334 /* Using the magic number we can test whether it really is a message
335 catalog file. */
336 if (data->magic != _MAGIC && data->magic != _MAGIC_SWAPPED) {
337 /* The magic number is wrong: not a message catalog file. */
338 #ifdef LOADMSGCAT_USE_MMAP
339 if (use_mmap)
340 munmap((void *) data, size);
341 else
342 #endif
343 free(data);
344 return;
347 domain = (struct loaded_domain *) malloc(sizeof(struct loaded_domain));
348 if (domain == NULL)
349 return;
350 domain_file->data = domain;
352 domain->data = (unsigned char *) data;
353 domain->use_mmap = use_mmap;
354 domain->mmap_size = size;
355 domain->must_swap = data->magic != _MAGIC;
357 /* Fill in the information about the available tables. */
358 switch (W(domain->must_swap, data->revision)) {
359 case 0:
360 domain->nstrings = W(domain->must_swap, data->nstrings);
361 domain->orig_tab = (struct string_desc *)
362 ((unsigned char *) data +
363 W(domain->must_swap, data->orig_tab_offset));
364 domain->trans_tab = (struct string_desc *)
365 ((unsigned char *) data +
366 W(domain->must_swap, data->trans_tab_offset));
367 domain->hash_size =
368 W(domain->must_swap, data->hash_tab_size);
369 domain->hash_tab = (nls_uint32 *)
370 ((unsigned char *) data +
371 W(domain->must_swap, data->hash_tab_offset));
372 break;
373 default:
374 /* This is an invalid revision. */
375 #ifdef LOADMSGCAT_USE_MMAP
376 if (use_mmap)
377 munmap((void *) data, size);
378 else
379 #endif
380 free(data);
381 free(domain);
382 domain_file->data = NULL;
383 return;
386 /* Now initialize the character set converter from the character set
387 the file is encoded with (found in the header entry) to the domain's
388 specified character set or the locale's character set. */
389 nullentry = _nl_init_domain_conv(domain_file, domain, domainbinding);
391 /* Also look for a plural specification. */
392 if (nullentry != NULL) {
393 const unsigned char *plural;
394 const unsigned char *nplurals;
396 plural = strstr(nullentry, "plural=");
397 nplurals = strstr(nullentry, "nplurals=");
398 if (plural == NULL || nplurals == NULL)
399 goto no_plural;
400 else {
401 /* First get the number. */
402 unsigned char *endp;
403 unsigned long int n;
404 struct parse_args args;
406 nplurals += 9;
407 skip_space(nplurals);
409 for (endp = (unsigned char *) nplurals, n = 0;
410 isdigit(*endp); endp++)
411 n = n * 10 + (*endp - '0');
413 domain->nplurals = n;
414 if (nplurals == endp)
415 goto no_plural;
417 /* Due to the restrictions bison imposes onto the interface of the
418 scanner function we have to put the input string and the result
419 passed up from the parser into the same structure which address
420 is passed down to the parser. */
421 plural += 7;
422 args.cp = plural;
423 if (gettext__parse(&args) != 0)
424 goto no_plural;
425 domain->plural = args.res;
427 } else {
428 /* By default we are using the Germanic form: singular form only
429 for `one', the plural form otherwise. Yes, this is also what
430 English is using since English is a Germanic language. */
431 no_plural:
432 INIT_GERMANIC_PLURAL();
433 domain->plural = &germanic_plural;
434 domain->nplurals = 2;
438 #if 0
439 void
440 _nl_unload_domain(struct loaded_domain *domain)
442 if (domain->plural != &germanic_plural)
443 __gettext_free_exp(domain->plural);
445 _nl_free_domain_conv(domain);
447 #ifdef _POSIX_MAPPED_FILES
448 if (domain->use_mmap)
449 munmap((void *) domain->data, domain->mmap_size);
450 else
451 #endif /* _POSIX_MAPPED_FILES */
452 free((void *) domain->data);
454 free(domain);
456 #endif