Fix mdoc(7)/man(7) mix up.
[netbsd-mini2440.git] / lib / libc / locale / rune.c
blobe30b50daff4d78b2dc8c76c5f6db90f699252e90
1 /* $NetBSD: rune.c,v 1.32 2009/01/11 02:46:29 christos Exp $ */
3 /*-
4 * Copyright (c)1999 Citrus Project,
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
29 /*-
30 * Copyright (c) 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * Paul Borman at Krystal Technologies.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
61 #include <sys/cdefs.h>
62 #if defined(LIBC_SCCS) && !defined(lint)
63 #if 0
64 static char sccsid[] = "@(#)rune.c 8.1 (Berkeley) 6/4/93";
65 #else
66 __RCSID("$NetBSD: rune.c,v 1.32 2009/01/11 02:46:29 christos Exp $");
67 #endif
68 #endif /* LIBC_SCCS and not lint */
70 #include "namespace.h"
71 #include <assert.h>
72 #include <stdio.h>
73 #include <string.h>
74 #include <stdlib.h>
75 #include <errno.h>
76 #include <wchar.h>
77 #include <sys/types.h>
78 #include <sys/stat.h>
80 #include "citrus_module.h"
81 #include "citrus_ctype.h"
83 #include "bsdctype.h"
84 #include "rune.h"
85 #include "rune_local.h"
87 static int readrange __P((_RuneLocale *, _RuneRange *, _FileRuneRange *, void *, FILE *));
88 static void _freeentry __P((_RuneRange *));
89 static void _wctype_init __P((_RuneLocale *rl));
91 static int
92 readrange(_RuneLocale *rl, _RuneRange *rr, _FileRuneRange *frr, void *lastp,
93 FILE *fp)
95 uint32_t i;
96 _RuneEntry *re;
97 _FileRuneEntry fre;
99 _DIAGASSERT(rl != NULL);
100 _DIAGASSERT(rr != NULL);
101 _DIAGASSERT(frr != NULL);
102 _DIAGASSERT(lastp != NULL);
103 _DIAGASSERT(fp != NULL);
105 re = (_RuneEntry *)rl->rl_variable;
107 rr->rr_nranges = ntohl(frr->frr_nranges);
108 if (rr->rr_nranges == 0) {
109 rr->rr_rune_ranges = NULL;
110 return 0;
113 rr->rr_rune_ranges = re;
114 for (i = 0; i < rr->rr_nranges; i++) {
115 if (fread(&fre, sizeof(fre), 1, fp) != 1)
116 return -1;
118 re->re_min = ntohl((u_int32_t)fre.fre_min);
119 re->re_max = ntohl((u_int32_t)fre.fre_max);
120 re->re_map = ntohl((u_int32_t)fre.fre_map);
121 re++;
123 if ((void *)re > lastp)
124 return -1;
126 rl->rl_variable = re;
127 return 0;
130 static int
131 readentry(_RuneRange *rr, FILE *fp)
133 _RuneEntry *re;
134 size_t l, i, j;
135 int error;
137 _DIAGASSERT(rr != NULL);
138 _DIAGASSERT(fp != NULL);
140 re = rr->rr_rune_ranges;
141 for (i = 0; i < rr->rr_nranges; i++) {
142 if (re[i].re_map != 0) {
143 re[i].re_rune_types = NULL;
144 continue;
147 l = re[i].re_max - re[i].re_min + 1;
148 re[i].re_rune_types = malloc(l * sizeof(_RuneType));
149 if (!re[i].re_rune_types) {
150 error = ENOMEM;
151 goto fail;
153 memset(re[i].re_rune_types, 0, l * sizeof(_RuneType));
155 if (fread(re[i].re_rune_types, sizeof(_RuneType), l, fp) != l)
156 goto fail2;
158 for (j = 0; j < l; j++)
159 re[i].re_rune_types[j] = ntohl(re[i].re_rune_types[j]);
161 return 0;
163 fail:
164 for (j = 0; j < i; j++) {
165 free(re[j].re_rune_types);
166 re[j].re_rune_types = NULL;
168 return error;
169 fail2:
170 for (j = 0; j <= i; j++) {
171 free(re[j].re_rune_types);
172 re[j].re_rune_types = NULL;
174 return errno;
177 /* XXX: temporary implementation */
178 static void
179 find_codeset(_RuneLocale *rl)
181 char *top, *codeset, *tail, *ep;
183 /* end of rl_variable region */
184 ep = (char *)rl->rl_variable;
185 ep += rl->rl_variable_len;
186 rl->rl_codeset = NULL;
187 if (!(top = strstr(rl->rl_variable, _RUNE_CODESET)))
188 return;
189 tail = strpbrk(top, " \t");
190 codeset = top + sizeof(_RUNE_CODESET) - 1;
191 if (tail) {
192 *top = *tail;
193 *tail = '\0';
194 rl->rl_codeset = strdup(codeset);
195 strlcpy(top + 1, tail + 1, (unsigned)(ep - (top + 1)));
196 } else {
197 *top = '\0';
198 rl->rl_codeset = strdup(codeset);
202 void
203 _freeentry(_RuneRange *rr)
205 _RuneEntry *re;
206 uint32_t i;
208 _DIAGASSERT(rr != NULL);
210 re = rr->rr_rune_ranges;
211 for (i = 0; i < rr->rr_nranges; i++) {
212 if (re[i].re_rune_types)
213 free(re[i].re_rune_types);
214 re[i].re_rune_types = NULL;
218 void
219 _wctype_init(_RuneLocale *rl)
221 memcpy(&rl->rl_wctype, &_DefaultRuneLocale.rl_wctype,
222 sizeof(rl->rl_wctype));
226 _RuneLocale *
227 _Read_RuneMagi(fp)
228 FILE *fp;
230 /* file */
231 _FileRuneLocale frl;
232 /* host data */
233 char *hostdata;
234 size_t hostdatalen;
235 void *lastp;
236 _RuneLocale *rl;
237 struct stat sb;
238 int x;
240 _DIAGASSERT(fp != NULL);
242 if (fstat(fileno(fp), &sb) < 0)
243 return NULL;
245 if (sb.st_size < (off_t)sizeof(_FileRuneLocale))
246 return NULL;
247 /* XXX more validation? */
249 /* Someone might have read the magic number once already */
250 rewind(fp);
252 if (fread(&frl, sizeof(frl), 1, fp) != 1)
253 return NULL;
254 if (memcmp(frl.frl_magic, _RUNE_MAGIC_1, sizeof(frl.frl_magic)))
255 return NULL;
257 hostdatalen = sizeof(*rl) + ntohl((u_int32_t)frl.frl_variable_len) +
258 ntohl(frl.frl_runetype_ext.frr_nranges) * sizeof(_RuneEntry) +
259 ntohl(frl.frl_maplower_ext.frr_nranges) * sizeof(_RuneEntry) +
260 ntohl(frl.frl_mapupper_ext.frr_nranges) * sizeof(_RuneEntry);
262 if ((hostdata = malloc(hostdatalen)) == NULL)
263 return NULL;
264 memset(hostdata, 0, hostdatalen);
265 lastp = hostdata + hostdatalen;
267 rl = (_RuneLocale *)(void *)hostdata;
268 rl->rl_variable = rl + 1;
270 memcpy(rl->rl_magic, frl.frl_magic, sizeof(rl->rl_magic));
271 memcpy(rl->rl_encoding, frl.frl_encoding, sizeof(rl->rl_encoding));
273 rl->rl_invalid_rune = ntohl((u_int32_t)frl.frl_invalid_rune);
274 rl->rl_variable_len = ntohl((u_int32_t)frl.frl_variable_len);
276 for (x = 0; x < _CACHED_RUNES; ++x) {
277 rl->rl_runetype[x] = ntohl(frl.frl_runetype[x]);
279 /* XXX assumes rune_t = u_int32_t */
280 rl->rl_maplower[x] = ntohl((u_int32_t)frl.frl_maplower[x]);
281 rl->rl_mapupper[x] = ntohl((u_int32_t)frl.frl_mapupper[x]);
284 if (readrange(rl, &rl->rl_runetype_ext, &frl.frl_runetype_ext, lastp, fp))
286 free(hostdata);
287 return NULL;
289 if (readrange(rl, &rl->rl_maplower_ext, &frl.frl_maplower_ext, lastp, fp))
291 free(hostdata);
292 return NULL;
294 if (readrange(rl, &rl->rl_mapupper_ext, &frl.frl_mapupper_ext, lastp, fp))
296 free(hostdata);
297 return NULL;
300 if (readentry(&rl->rl_runetype_ext, fp) != 0) {
301 free(hostdata);
302 return NULL;
305 if ((u_int8_t *)rl->rl_variable + rl->rl_variable_len >
306 (u_int8_t *)lastp) {
307 _freeentry(&rl->rl_runetype_ext);
308 free(hostdata);
309 return NULL;
311 if (rl->rl_variable_len == 0)
312 rl->rl_variable = NULL;
313 if (rl->rl_variable == NULL ||
314 fread(rl->rl_variable, rl->rl_variable_len, 1, fp) != 1) {
315 _freeentry(&rl->rl_runetype_ext);
316 free(hostdata);
317 return NULL;
319 find_codeset(rl);
320 _wctype_init(rl);
322 /* error if we have junk at the tail */
323 if (ftell(fp) != sb.st_size) {
324 _freeentry(&rl->rl_runetype_ext);
325 free(hostdata);
326 return NULL;
329 return(rl);
332 void
333 _NukeRune(rl)
334 _RuneLocale *rl;
337 _DIAGASSERT(rl != NULL);
339 if (rl != &_DefaultRuneLocale) {
340 _freeentry(&rl->rl_runetype_ext);
341 if (rl->rl_codeset)
342 free(__UNCONST(rl->rl_codeset));
343 if (rl->rl_citrus_ctype)
344 _citrus_ctype_close(rl->rl_citrus_ctype);
345 free(__UNCONST(rl->rl_ctype_tab));
346 free(__UNCONST(rl->rl_tolower_tab));
347 free(__UNCONST(rl->rl_toupper_tab));
348 free(rl);
353 * read in old LC_CTYPE declaration file, convert into runelocale info
355 #define _CTYPE_PRIVATE
356 #include <limits.h>
357 #include <ctype.h>
359 _RuneLocale *
360 _Read_CTypeAsRune(fp)
361 FILE *fp;
363 char id[sizeof(_CTYPE_ID) - 1];
364 u_int32_t i, len;
365 u_int8_t *new_ctype = NULL;
366 int16_t *new_toupper = NULL, *new_tolower = NULL;
367 /* host data */
368 char *hostdata = NULL;
369 size_t hostdatalen;
370 _RuneLocale *rl;
371 struct stat sb;
372 int x;
374 _DIAGASSERT(fp != NULL);
376 if (fstat(fileno(fp), &sb) < 0)
377 return NULL;
379 if (sb.st_size < (off_t)sizeof(id))
380 return NULL;
381 /* XXX more validation? */
383 /* Someone might have read the magic number once already */
384 rewind(fp);
386 if (fread(id, sizeof(id), 1, fp) != 1)
387 goto bad;
388 if (memcmp(id, _CTYPE_ID, sizeof(id)) != 0)
389 goto bad;
391 if (fread(&i, sizeof(u_int32_t), 1, fp) != 1)
392 goto bad;
393 if ((i = ntohl(i)) != _CTYPE_REV)
394 goto bad;
396 if (fread(&len, sizeof(u_int32_t), 1, fp) != 1)
397 goto bad;
398 if ((len = ntohl(len)) != _CTYPE_NUM_CHARS)
399 goto bad;
401 if ((new_ctype = malloc(sizeof(u_int8_t) * (1 + len))) == NULL ||
402 (new_toupper = malloc(sizeof(int16_t) * (1 + len))) == NULL ||
403 (new_tolower = malloc(sizeof(int16_t) * (1 + len))) == NULL)
404 goto bad;
405 new_ctype[0] = 0;
406 if (fread(&new_ctype[1], sizeof(u_int8_t), len, fp) != len)
407 goto bad;
408 new_toupper[0] = EOF;
409 if (fread(&new_toupper[1], sizeof(int16_t), len, fp) != len)
410 goto bad;
411 new_tolower[0] = EOF;
412 if (fread(&new_tolower[1], sizeof(int16_t), len, fp) != len)
413 goto bad;
415 hostdatalen = sizeof(*rl);
417 if ((hostdata = malloc(hostdatalen)) == NULL)
418 goto bad;
419 memset(hostdata, 0, hostdatalen);
420 rl = (_RuneLocale *)(void *)hostdata;
421 rl->rl_variable = NULL;
423 memcpy(rl->rl_magic, _RUNE_MAGIC_1, sizeof(rl->rl_magic));
424 memcpy(rl->rl_encoding, "NONE", 4);
426 rl->rl_invalid_rune = _DefaultRuneLocale.rl_invalid_rune; /*XXX*/
427 rl->rl_variable_len = 0;
429 for (x = 0; x < _CACHED_RUNES; ++x) {
430 if ((uint32_t) x > len)
431 continue;
434 * TWEAKS!
435 * - old locale file declarations do not have proper _B
436 * in many cases.
437 * - isprint() declaration in ctype.h incorrectly uses _B.
438 * _B means "isprint but !isgraph", not "isblank" with the
439 * declaration.
440 * - _X and _CTYPE_X have negligible difference in meaning.
441 * - we don't set digit value, fearing that it would be
442 * too much of hardcoding. we may need to revisit it.
445 if (new_ctype[1 + x] & _U)
446 rl->rl_runetype[x] |= _CTYPE_U;
447 if (new_ctype[1 + x] & _L)
448 rl->rl_runetype[x] |= _CTYPE_L;
449 if (new_ctype[1 + x] & _N)
450 rl->rl_runetype[x] |= _CTYPE_D;
451 if (new_ctype[1 + x] & _S)
452 rl->rl_runetype[x] |= _CTYPE_S;
453 if (new_ctype[1 + x] & _P)
454 rl->rl_runetype[x] |= _CTYPE_P;
455 if (new_ctype[1 + x] & _C)
456 rl->rl_runetype[x] |= _CTYPE_C;
457 /* derived flag bits, duplicate of ctype.h */
458 if (new_ctype[1 + x] & (_U | _L))
459 rl->rl_runetype[x] |= _CTYPE_A;
460 if (new_ctype[1 + x] & (_N | _X))
461 rl->rl_runetype[x] |= _CTYPE_X;
462 if (new_ctype[1 + x] & (_P|_U|_L|_N))
463 rl->rl_runetype[x] |= _CTYPE_G;
464 /* we don't really trust _B in the file. see above. */
465 if (new_ctype[1 + x] & _B)
466 rl->rl_runetype[x] |= _CTYPE_B;
467 if ((new_ctype[1 + x] & (_P|_U|_L|_N|_B)) || x == ' ')
468 rl->rl_runetype[x] |= (_CTYPE_R | _CTYPE_SW1);
469 if (x == ' ' || x == '\t')
470 rl->rl_runetype[x] |= _CTYPE_B;
472 /* XXX may fail on non-8bit encoding only */
473 rl->rl_mapupper[x] = ntohs(new_toupper[1 + x]);
474 rl->rl_maplower[x] = ntohs(new_tolower[1 + x]);
477 _wctype_init(rl);
480 * __runetable_to_netbsd_ctype() will be called from
481 * setrunelocale.c:_newrunelocale(), and fill old ctype table.
484 free(new_ctype);
485 free(new_toupper);
486 free(new_tolower);
487 return(rl);
489 bad:
490 if (new_ctype)
491 free(new_ctype);
492 if (new_toupper)
493 free(new_toupper);
494 if (new_tolower)
495 free(new_tolower);
496 return NULL;