iconv: Bail out of the loop when an illegal sequence of bytes occurs.
[elinks/elinks-j605.git] / src / intl / gettext / l10nflist.c
blob4d12aa95e80c8cdd65a5c3b42f9c728ac2b76658
1 /* Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
2 Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Tell glibc's <string.h> to provide a prototype for mempcpy().
19 This must come before <config.h> because <config.h> may include
20 <features.h>, and once <features.h> has been included, it's too late. */
21 #ifndef _GNU_SOURCE
22 #define _GNU_SOURCE 1
23 #endif
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
29 #include <string.h>
31 #include <ctype.h>
32 #include <sys/types.h>
33 #include <stdlib.h>
35 #include "elinks.h"
37 #include "intl/gettext/loadinfo.h"
38 #include "util/conv.h"
39 #include "util/string.h"
41 /* Awful hack to permit compilation under cygwin and its broken configure.
42 * Configure script detects these functions, but compilation clashes on
43 * implicit declarations of them... So we force use of internal ones.
44 * Cygwin argz.h do not contain any declaration for these, nor any other
45 * header while they are available in some linked libs.
46 * Feel free to provide a better fix if any. --Zas */
47 #ifdef HAVE_SYS_CYGWIN_H
48 #undef HAVE___ARGZ_STRINGIFY
49 #undef HAVE___ARGZ_COUNT
50 #undef HAVE___ARGZ_NEXT
51 #else
52 #if defined HAVE_ARGZ_H
53 #include <argz.h>
54 #endif
55 #endif
57 /* On some strange systems still no definition of NULL is found. Sigh! */
58 #ifndef NULL
59 #if defined __STDC__ && __STDC__
60 #define NULL ((void *) 0)
61 #else
62 #define NULL 0
63 #endif
64 #endif
66 /* Define function which are usually not available. */
68 #if !defined HAVE___ARGZ_COUNT
69 /* Returns the number of strings in ARGZ. */
70 static size_t argz_count__(const unsigned char *argz, size_t len);
72 static size_t
73 argz_count__(const unsigned char *argz, size_t len)
75 size_t count = 0;
77 while (len > 0) {
78 size_t part_len = strlen(argz);
80 argz += part_len + 1;
81 len -= part_len + 1;
82 count++;
84 return count;
87 #undef __argz_count
88 #define __argz_count(argz, len) argz_count__ (argz, len)
89 #endif /* !HAVE___ARGZ_COUNT */
91 #if !defined HAVE___ARGZ_STRINGIFY
92 /* Make '\0' separated arg vector ARGZ printable by converting all the '\0's
93 except the last into the character SEP. */
94 static void argz_stringify__(unsigned char *argz, size_t len, int sep);
96 static void
97 argz_stringify__(unsigned char *argz, size_t len, int sep)
99 while (len > 0) {
100 size_t part_len = strlen(argz);
102 argz += part_len;
103 len -= part_len + 1;
104 if (len > 0)
105 *argz++ = sep;
109 #undef __argz_stringify
110 #define __argz_stringify(argz, len, sep) argz_stringify__ (argz, len, sep)
111 #endif /* !HAVE___ARGZ_STRINGIFY */
113 #if !defined HAVE___ARGZ_NEXT
114 static unsigned char *argz_next__(unsigned char *argz, size_t argz_len,
115 const unsigned char *entry);
117 static unsigned char *
118 argz_next__(unsigned char *argz, size_t argz_len, const unsigned char *entry)
120 if (entry) {
121 if (entry < argz + argz_len)
122 entry = strchr(entry, '\0') + 1;
124 return entry >= argz + argz_len ? NULL : (unsigned char *) entry;
125 } else if (argz_len > 0)
126 return argz;
127 else
128 return 0;
131 #undef __argz_next
132 #define __argz_next(argz, len, entry) argz_next__ (argz, len, entry)
133 #endif /* !HAVE___ARGZ_NEXT */
135 /* Return number of bits set in X. */
136 static inline int
137 pop(int x)
139 /* We assume that no more than 16 bits are used. */
140 x = ((x & ~0x5555) >> 1) + (x & 0x5555);
141 x = ((x & ~0x3333) >> 2) + (x & 0x3333);
142 x = ((x >> 4) + x) & 0x0f0f;
143 x = ((x >> 8) + x) & 0xff;
145 return x;
148 struct loaded_l10nfile *
149 _nl_make_l10nflist(struct loaded_l10nfile **l10nfile_list,
150 const unsigned char *dirlist,
151 size_t dirlist_len,
152 int mask,
153 const unsigned char *language,
154 const unsigned char *territory,
155 const unsigned char *codeset,
156 const unsigned char *normalized_codeset,
157 const unsigned char *modifier,
158 const unsigned char *special,
159 const unsigned char *sponsor,
160 const unsigned char *revision,
161 const unsigned char *filename,
162 int do_allocate)
164 unsigned char *abs_filename, *abs_langdirname;
165 int abs_langdirnamelen;
166 struct loaded_l10nfile *last = NULL;
167 struct loaded_l10nfile *retval;
168 unsigned char *cp;
169 size_t entries;
170 int cnt;
172 /* Allocate room for the full file name. */
173 abs_filename = (unsigned char *) malloc(dirlist_len + strlen(language)
174 + ((mask & TERRITORY) != 0
175 ? strlen(territory) + 1 : 0)
176 + ((mask & XPG_CODESET) != 0
177 ? strlen(codeset) + 1 : 0)
178 + ((mask & XPG_NORM_CODESET) != 0
179 ? strlen(normalized_codeset) + 1 : 0)
180 + (((mask & XPG_MODIFIER) != 0
181 || (mask & CEN_AUDIENCE) != 0)
182 ? strlen(modifier) + 1 : 0)
183 + ((mask & CEN_SPECIAL) != 0
184 ? strlen(special) + 1 : 0)
185 + (((mask & CEN_SPONSOR) != 0
186 || (mask & CEN_REVISION) != 0)
187 ? (1 + ((mask & CEN_SPONSOR) != 0
188 ? strlen(sponsor) + 1 : 0)
189 + ((mask & CEN_REVISION) != 0
190 ? strlen(revision) +
191 1 : 0)) : 0)
192 + 1 + strlen(filename) + 1);
194 if (abs_filename == NULL)
195 return NULL;
197 retval = NULL;
198 last = NULL;
200 /* Construct file name. */
201 memcpy(abs_filename, dirlist, dirlist_len);
202 __argz_stringify(abs_filename, dirlist_len, PATH_SEPARATOR);
203 cp = abs_filename + (dirlist_len - 1);
204 *cp++ = '/';
205 abs_langdirname = cp;
206 cp = stpcpy(cp, language);
208 if ((mask & TERRITORY) != 0) {
209 *cp++ = '_';
210 cp = stpcpy(cp, territory);
212 if ((mask & XPG_CODESET) != 0) {
213 *cp++ = '.';
214 cp = stpcpy(cp, codeset);
216 if ((mask & XPG_NORM_CODESET) != 0) {
217 *cp++ = '.';
218 cp = stpcpy(cp, normalized_codeset);
220 if ((mask & (XPG_MODIFIER | CEN_AUDIENCE)) != 0) {
221 /* This component can be part of both syntaces but has different
222 leading characters. For CEN we use `+', else `@'. */
223 *cp++ = (mask & CEN_AUDIENCE) != 0 ? '+' : '@';
224 cp = stpcpy(cp, modifier);
226 if ((mask & CEN_SPECIAL) != 0) {
227 *cp++ = '+';
228 cp = stpcpy(cp, special);
230 if ((mask & (CEN_SPONSOR | CEN_REVISION)) != 0) {
231 *cp++ = ',';
232 if ((mask & CEN_SPONSOR) != 0)
233 cp = stpcpy(cp, sponsor);
234 if ((mask & CEN_REVISION) != 0) {
235 *cp++ = '_';
236 cp = stpcpy(cp, revision);
239 abs_langdirnamelen = cp - abs_langdirname;
241 *cp++ = '/';
242 stpcpy(cp, filename);
244 /* Look in list of already loaded domains whether it is already
245 available. */
246 last = NULL;
247 for (retval = *l10nfile_list; retval != NULL; retval = retval->next)
248 if (retval->filename != NULL) {
249 int compare = strcmp(retval->filename, abs_filename);
251 if (compare == 0)
252 /* We found it! */
253 break;
254 if (compare < 0) {
255 /* It's not in the list. */
256 retval = NULL;
257 break;
260 last = retval;
263 if (retval != NULL || do_allocate == 0) {
264 free(abs_filename);
265 return retval;
268 retval = (struct loaded_l10nfile *)
269 malloc(sizeof(*retval) + (__argz_count(dirlist, dirlist_len)
270 * (1 << pop(mask))
271 * sizeof(struct loaded_l10nfile *)));
272 if (retval == NULL)
273 return NULL;
275 retval->filename = abs_filename;
276 retval->langdirname = abs_langdirname;
277 retval->langdirnamelen = abs_langdirnamelen;
278 retval->decided = (__argz_count(dirlist, dirlist_len) != 1
279 || ((mask & XPG_CODESET) != 0
280 && (mask & XPG_NORM_CODESET) != 0));
281 retval->data = NULL;
283 if (last == NULL) {
284 retval->next = *l10nfile_list;
285 *l10nfile_list = retval;
286 } else {
287 retval->next = last->next;
288 last->next = retval;
291 entries = 0;
292 /* If the DIRLIST is a real list the RETVAL entry corresponds not to
293 a real file. So we have to use the DIRLIST separation mechanism
294 of the inner loop. */
295 cnt = __argz_count(dirlist, dirlist_len) == 1 ? mask - 1 : mask;
296 for (; cnt >= 0; --cnt)
297 if ((cnt & ~mask) == 0
298 && ((cnt & CEN_SPECIFIC) == 0 || (cnt & XPG_SPECIFIC) == 0)
299 && ((cnt & XPG_CODESET) == 0
300 || (cnt & XPG_NORM_CODESET) == 0)) {
301 /* Iterate over all elements of the DIRLIST. */
302 unsigned char *dir = NULL;
304 while ((dir =
305 __argz_next((unsigned char *) dirlist, dirlist_len, dir))
306 != NULL)
307 retval->successor[entries++]
308 = _nl_make_l10nflist(l10nfile_list, dir,
309 strlen(dir) + 1,
310 cnt, language,
311 territory, codeset,
312 normalized_codeset,
313 modifier, special,
314 sponsor, revision,
315 filename, 1);
317 retval->successor[entries] = NULL;
319 return retval;
322 /* Normalize codeset name. There is no standard for the codeset
323 names. Normalization allows the user to use any of the common
324 names. The return value is dynamically allocated and has to be
325 freed by the caller. */
326 const unsigned char *
327 _nl_normalize_codeset(const unsigned char *codeset, size_t name_len)
329 int len = 0;
330 int only_digit = 1;
331 unsigned char *retval;
332 unsigned char *wp;
333 size_t cnt;
335 for (cnt = 0; cnt < name_len; ++cnt)
336 if (isalnum(codeset[cnt])) {
337 ++len;
339 if (isalpha(codeset[cnt]))
340 only_digit = 0;
343 retval = (unsigned char *) malloc((only_digit ? 3 : 0) + len + 1);
345 if (retval != NULL) {
346 if (only_digit)
347 wp = stpcpy(retval, "iso");
348 else
349 wp = retval;
351 for (cnt = 0; cnt < name_len; ++cnt)
352 if (isalpha(codeset[cnt]))
353 *wp++ = c_tolower(codeset[cnt]);
354 else if (isdigit(codeset[cnt]))
355 *wp++ = codeset[cnt];
357 *wp = '\0';
360 return (const unsigned char *) retval;