Fix mdoc(7)/man(7) mix up.
[netbsd-mini2440.git] / lib / libc / citrus / modules / citrus_iso2022.c
blob70cbcf525ea77e4ba077ad983b7bec43179f39e4
1 /* $NetBSD: citrus_iso2022.c,v 1.18 2007/11/21 14:19:32 tnozaki Exp $ */
3 /*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.18 2007/11/21 14:19:32 tnozaki Exp $");
34 #endif /* LIBC_SCCS and not lint */
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <wchar.h>
43 #include <sys/types.h>
44 #include <limits.h>
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_iso2022.h"
54 /* ----------------------------------------------------------------------
55 * private stuffs used by templates
60 * wchar_t mappings:
61 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
62 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
63 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
64 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
65 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
66 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
67 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
68 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
69 * 94x94 charset (ESC & V ESC $ ( F)
70 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
71 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
72 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
74 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
77 typedef struct {
78 u_char type;
79 #define CS94 (0U)
80 #define CS96 (1U)
81 #define CS94MULTI (2U)
82 #define CS96MULTI (3U)
84 u_char final;
85 u_char interm;
86 u_char vers;
87 } _ISO2022Charset;
89 static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' };
90 static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' };
92 typedef struct {
93 _ISO2022Charset g[4];
94 /* need 3 bits to hold -1, 0, ..., 3 */
95 int gl:3,
96 gr:3,
97 singlegl:3,
98 singlegr:3;
99 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
100 int chlen;
101 int flags;
102 #define _ISO2022STATE_FLAG_INITIALIZED 1
103 } _ISO2022State;
105 typedef struct {
106 _ISO2022Charset *recommend[4];
107 size_t recommendsize[4];
108 _ISO2022Charset initg[4];
109 int maxcharset;
110 int flags;
111 #define F_8BIT 0x0001
112 #define F_NOOLD 0x0002
113 #define F_SI 0x0010 /*0F*/
114 #define F_SO 0x0020 /*0E*/
115 #define F_LS0 0x0010 /*0F*/
116 #define F_LS1 0x0020 /*0E*/
117 #define F_LS2 0x0040 /*ESC n*/
118 #define F_LS3 0x0080 /*ESC o*/
119 #define F_LS1R 0x0100 /*ESC ~*/
120 #define F_LS2R 0x0200 /*ESC }*/
121 #define F_LS3R 0x0400 /*ESC |*/
122 #define F_SS2 0x0800 /*ESC N*/
123 #define F_SS3 0x1000 /*ESC O*/
124 #define F_SS2R 0x2000 /*8E*/
125 #define F_SS3R 0x4000 /*8F*/
126 } _ISO2022EncodingInfo;
127 typedef struct {
128 _ISO2022EncodingInfo ei;
129 struct {
130 /* for future multi-locale facility */
131 _ISO2022State s_mblen;
132 _ISO2022State s_mbrlen;
133 _ISO2022State s_mbrtowc;
134 _ISO2022State s_mbtowc;
135 _ISO2022State s_mbsrtowcs;
136 _ISO2022State s_wcrtomb;
137 _ISO2022State s_wcsrtombs;
138 _ISO2022State s_wctomb;
139 } states;
140 } _ISO2022CTypeInfo;
142 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
143 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
145 #define _FUNCNAME(m) _citrus_ISO2022_##m
146 #define _ENCODING_INFO _ISO2022EncodingInfo
147 #define _CTYPE_INFO _ISO2022CTypeInfo
148 #define _ENCODING_STATE _ISO2022State
149 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
150 #define _ENCODING_IS_STATE_DEPENDENT 1
151 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
152 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
155 #define _ISO2022INVALID (wchar_t)-1
157 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
158 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
159 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
160 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
161 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
162 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
163 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
164 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
166 static __inline int
167 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
170 _DIAGASSERT(p != NULL);
171 _DIAGASSERT(cs != NULL);
173 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
174 cs->final = (u_char)(p[3] & 0xff);
175 cs->interm = '\0';
176 cs->vers = '\0';
177 cs->type = CS94MULTI;
178 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
179 cs->final = (u_char)(p[3] & 0xff);
180 cs->interm = '\0';
181 cs->vers = '\0';
182 cs->type = CS96MULTI;
183 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
184 cs->final = (u_char)(p[2] & 0xff);
185 cs->interm = '\0';
186 cs->vers = '\0';
187 cs->type = CS94;
188 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
189 cs->final = (u_char )(p[2] & 0xff);
190 cs->interm = '\0';
191 cs->vers = '\0';
192 cs->type = CS96;
193 } else {
194 return 1;
197 return 0;
201 #define _NOTMATCH 0
202 #define _MATCH 1
203 #define _PARSEFAIL 2
205 static __inline int
206 get_recommend(_ISO2022EncodingInfo * __restrict ei,
207 const char * __restrict token)
209 int i;
210 _ISO2022Charset cs, *p;
212 if (!strchr("0123", token[0]) || token[1] != '=')
213 return (_NOTMATCH);
215 if (getcs(&token[2], &cs) == 0)
217 else if (!strcmp(&token[2], "94")) {
218 cs.final = (u_char)(token[4]);
219 cs.interm = '\0';
220 cs.vers = '\0';
221 cs.type = CS94;
222 } else if (!strcmp(&token[2], "96")) {
223 cs.final = (u_char)(token[4]);
224 cs.interm = '\0';
225 cs.vers = '\0';
226 cs.type = CS96;
227 } else if (!strcmp(&token[2], "94$")) {
228 cs.final = (u_char)(token[5]);
229 cs.interm = '\0';
230 cs.vers = '\0';
231 cs.type = CS94MULTI;
232 } else if (!strcmp(&token[2], "96$")) {
233 cs.final = (u_char)(token[5]);
234 cs.interm = '\0';
235 cs.vers = '\0';
236 cs.type = CS96MULTI;
237 } else {
238 return (_PARSEFAIL);
241 i = token[0] - '0';
242 if (!ei->recommend[i]) {
243 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
244 } else {
245 p = realloc(ei->recommend[i],
246 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
247 if (!p)
248 return (_PARSEFAIL);
249 ei->recommend[i] = p;
251 if (!ei->recommend[i])
252 return (_PARSEFAIL);
253 ei->recommendsize[i]++;
255 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
256 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
257 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
258 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
260 return (_MATCH);
263 static __inline int
264 get_initg(_ISO2022EncodingInfo * __restrict ei,
265 const char * __restrict token)
267 _ISO2022Charset cs;
269 if (strncmp("INIT", &token[0], 4) ||
270 !strchr("0123", token[4]) ||
271 token[5] != '=')
272 return (_NOTMATCH);
274 if (getcs(&token[6], &cs) != 0)
275 return (_PARSEFAIL);
277 ei->initg[token[4] - '0'].type = cs.type;
278 ei->initg[token[4] - '0'].final = cs.final;
279 ei->initg[token[4] - '0'].interm = cs.interm;
280 ei->initg[token[4] - '0'].vers = cs.vers;
282 return (_MATCH);
285 static __inline int
286 get_max(_ISO2022EncodingInfo * __restrict ei,
287 const char * __restrict token)
289 if (!strcmp(token, "MAX1")) {
290 ei->maxcharset = 1;
291 } else if (!strcmp(token, "MAX2")) {
292 ei->maxcharset = 2;
293 } else if (!strcmp(token, "MAX3")) {
294 ei->maxcharset = 3;
295 } else
296 return (_NOTMATCH);
298 return (_MATCH);
302 static __inline int
303 get_flags(_ISO2022EncodingInfo * __restrict ei,
304 const char * __restrict token)
306 int i;
307 static struct {
308 const char *tag;
309 int flag;
310 } const tags[] = {
311 { "DUMMY", 0 },
312 { "8BIT", F_8BIT },
313 { "NOOLD", F_NOOLD },
314 { "SI", F_SI },
315 { "SO", F_SO },
316 { "LS0", F_LS0 },
317 { "LS1", F_LS1 },
318 { "LS2", F_LS2 },
319 { "LS3", F_LS3 },
320 { "LS1R", F_LS1R },
321 { "LS2R", F_LS2R },
322 { "LS3R", F_LS3R },
323 { "SS2", F_SS2 },
324 { "SS3", F_SS3 },
325 { "SS2R", F_SS2R },
326 { "SS3R", F_SS3R },
327 { NULL, 0 }
330 for (i = 0; tags[i].tag; i++) {
331 if (!strcmp(token, tags[i].tag)) {
332 ei->flags |= tags[i].flag;
333 return (_MATCH);
337 return (_NOTMATCH);
341 static __inline int
342 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
343 const void * __restrict var, size_t lenvar)
345 char const *v, *e;
346 char buf[20];
347 int i, len, ret;
349 _DIAGASSERT(ei != NULL);
353 * parse VARIABLE section.
356 if (!var)
357 return (EFTYPE);
359 v = (const char *) var;
361 /* initialize structure */
362 ei->maxcharset = 0;
363 for (i = 0; i < 4; i++) {
364 ei->recommend[i] = NULL;
365 ei->recommendsize[i] = 0;
367 ei->flags = 0;
369 while (*v) {
370 while (*v == ' ' || *v == '\t')
371 ++v;
373 /* find the token */
374 e = v;
375 while (*e && *e != ' ' && *e != '\t')
376 ++e;
378 len = e-v;
379 if (len == 0)
380 break;
381 if (len>=sizeof(buf))
382 goto parsefail;
383 snprintf(buf, sizeof(buf), "%.*s", len, v);
385 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
387 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
389 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
391 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
393 else
394 ret = _PARSEFAIL;
395 if (ret==_PARSEFAIL)
396 goto parsefail;
397 v = e;
401 return (0);
403 parsefail:
404 free(ei->recommend[0]);
405 free(ei->recommend[1]);
406 free(ei->recommend[2]);
407 free(ei->recommend[3]);
409 return (EFTYPE);
412 static __inline void
413 /*ARGSUSED*/
414 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
415 _ISO2022State * __restrict s)
417 int i;
419 memset(s, 0, sizeof(*s));
420 s->gl = 0;
421 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
423 for (i = 0; i < 4; i++) {
424 if (ei->initg[i].final) {
425 s->g[i].type = ei->initg[i].type;
426 s->g[i].final = ei->initg[i].final;
427 s->g[i].interm = ei->initg[i].interm;
430 s->singlegl = s->singlegr = -1;
431 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
434 static __inline void
435 /*ARGSUSED*/
436 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
437 void * __restrict pspriv,
438 const _ISO2022State * __restrict s)
440 memcpy(pspriv, (const void *)s, sizeof(*s));
443 static __inline void
444 /*ARGSUSED*/
445 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
446 _ISO2022State * __restrict s,
447 const void * __restrict pspriv)
449 memcpy((void *)s, pspriv, sizeof(*s));
452 static int
453 /*ARGSUSED*/
454 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
455 const void * __restrict var,
456 size_t lenvar)
459 _DIAGASSERT(ei != NULL);
461 return _citrus_ISO2022_parse_variable(ei, var, lenvar);
464 static void
465 /*ARGSUSED*/
466 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
470 #define ESC '\033'
471 #define ECMA -1
472 #define INTERM -2
473 #define OECMA -3
474 static const struct seqtable {
475 int type;
476 int csoff;
477 int finaloff;
478 int intermoff;
479 int versoff;
480 int len;
481 int chars[10];
482 } seqtable[] = {
483 /* G0 94MULTI special */
484 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
485 /* G0 94MULTI special with version identification */
486 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
487 /* G? 94 */
488 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
489 /* G? 94 with 2nd intermediate char */
490 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
491 /* G? 96 */
492 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
493 /* G? 96 with 2nd intermediate char */
494 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
495 /* G? 94MULTI */
496 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
497 /* G? 96MULTI */
498 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
499 /* G? 94MULTI with version specification */
500 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
501 /* LS2/3 */
502 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
503 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
504 /* LS1/2/3R */
505 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
506 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
507 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
508 /* SS2/3 */
509 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
510 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
511 /* end of records */
512 { 0, }
515 static int
516 seqmatch(const char * __restrict s, size_t n,
517 const struct seqtable * __restrict sp)
519 const int *p;
521 _DIAGASSERT(s != NULL);
522 _DIAGASSERT(sp != NULL);
524 p = sp->chars;
525 while (p - sp->chars < n && p - sp->chars < sp->len) {
526 switch (*p) {
527 case ECMA:
528 if (!isecma(*s))
529 goto terminate;
530 break;
531 case OECMA:
532 if (*s && strchr("@AB", *s))
533 break;
534 else
535 goto terminate;
536 case INTERM:
537 if (!isinterm(*s))
538 goto terminate;
539 break;
540 case CS94:
541 if (*s && strchr("()*+", *s))
542 break;
543 else
544 goto terminate;
545 case CS96:
546 if (*s && strchr(",-./", *s))
547 break;
548 else
549 goto terminate;
550 default:
551 if (*s != *p)
552 goto terminate;
553 break;
556 p++;
557 s++;
560 terminate:
561 return p - sp->chars;
564 static wchar_t
565 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
566 const char * __restrict string, size_t n,
567 const char ** __restrict result,
568 _ISO2022State * __restrict psenc)
570 wchar_t wchar = 0;
571 int cur;
572 const struct seqtable *sp;
573 int nmatch;
574 int i;
576 _DIAGASSERT(ei != NULL);
577 _DIAGASSERT(psenc != NULL);
578 _DIAGASSERT(string != NULL);
579 /* result may be NULL */
581 while (1) {
582 /* SI/SO */
583 if (1 <= n && string[0] == '\017') {
584 psenc->gl = 0;
585 string++;
586 n--;
587 continue;
589 if (1 <= n && string[0] == '\016') {
590 psenc->gl = 1;
591 string++;
592 n--;
593 continue;
596 /* SS2/3R */
597 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
598 psenc->singlegl = psenc->singlegr =
599 (string[0] - '\216') + 2;
600 string++;
601 n--;
602 continue;
605 /* eat the letter if this is not ESC */
606 if (1 <= n && string[0] != '\033')
607 break;
609 /* look for a perfect match from escape sequences */
610 for (sp = &seqtable[0]; sp->len; sp++) {
611 nmatch = seqmatch(string, n, sp);
612 if (sp->len == nmatch && n >= sp->len)
613 break;
616 if (!sp->len)
617 goto notseq;
619 if (sp->type != -1) {
620 if (sp->csoff == -1)
621 i = 0;
622 else {
623 switch (sp->type) {
624 case CS94:
625 case CS94MULTI:
626 i = string[sp->csoff] - '(';
627 break;
628 case CS96:
629 case CS96MULTI:
630 i = string[sp->csoff] - ',';
631 break;
632 default:
633 return (_ISO2022INVALID);
636 psenc->g[i].type = sp->type;
637 psenc->g[i].final = '\0';
638 psenc->g[i].interm = '\0';
639 psenc->g[i].vers = '\0';
640 /* sp->finaloff must not be -1 */
641 if (sp->finaloff != -1)
642 psenc->g[i].final = string[sp->finaloff];
643 if (sp->intermoff != -1)
644 psenc->g[i].interm = string[sp->intermoff];
645 if (sp->versoff != -1)
646 psenc->g[i].vers = string[sp->versoff];
648 string += sp->len;
649 n -= sp->len;
650 continue;
653 /* LS2/3 */
654 if (2 <= n && string[0] == '\033'
655 && string[1] && strchr("no", string[1])) {
656 psenc->gl = string[1] - 'n' + 2;
657 string += 2;
658 n -= 2;
659 continue;
662 /* LS1/2/3R */
663 /* XXX: { for vi showmatch */
664 if (2 <= n && string[0] == '\033'
665 && string[1] && strchr("~}|", string[1])) {
666 psenc->gr = 3 - (string[1] - '|');
667 string += 2;
668 n -= 2;
669 continue;
672 /* SS2/3 */
673 if (2 <= n && string[0] == '\033'
674 && string[1] && strchr("NO", string[1])) {
675 psenc->singlegl = (string[1] - 'N') + 2;
676 string += 2;
677 n -= 2;
678 continue;
681 notseq:
683 * if we've got an unknown escape sequence, eat the ESC at the
684 * head. otherwise, wait till full escape sequence comes.
686 for (sp = &seqtable[0]; sp->len; sp++) {
687 nmatch = seqmatch(string, n, sp);
688 if (!nmatch)
689 continue;
692 * if we are in the middle of escape sequence,
693 * we still need to wait for more characters to come
695 if (n < sp->len) {
696 if (nmatch == n) {
697 if (result)
698 *result = string;
699 return (_ISO2022INVALID);
701 } else {
702 if (nmatch == sp->len) {
703 /* this case should not happen */
704 goto eat;
709 break;
712 eat:
713 /* no letter to eat */
714 if (n < 1) {
715 if (result)
716 *result = string;
717 return (_ISO2022INVALID);
720 /* normal chars. always eat C0/C1 as is. */
721 if (iscntl(*string & 0xff))
722 cur = -1;
723 else if (*string & 0x80) {
724 cur = (psenc->singlegr == -1)
725 ? psenc->gr : psenc->singlegr;
726 } else {
727 cur = (psenc->singlegl == -1)
728 ? psenc->gl : psenc->singlegl;
731 if (cur == -1) {
732 asis:
733 wchar = *string++ & 0xff;
734 if (result)
735 *result = string;
736 /* reset single shift state */
737 psenc->singlegr = psenc->singlegl = -1;
738 return wchar;
741 /* length error check */
742 switch (psenc->g[cur].type) {
743 case CS94MULTI:
744 case CS96MULTI:
745 if (!isthree(psenc->g[cur].final)) {
746 if (2 <= n
747 && (string[0] & 0x80) == (string[1] & 0x80))
748 break;
749 } else {
750 if (3 <= n
751 && (string[0] & 0x80) == (string[1] & 0x80)
752 && (string[0] & 0x80) == (string[2] & 0x80))
753 break;
756 /* we still need to wait for more characters to come */
757 if (result)
758 *result = string;
759 return (_ISO2022INVALID);
761 case CS94:
762 case CS96:
763 if (1 <= n)
764 break;
766 /* we still need to wait for more characters to come */
767 if (result)
768 *result = string;
769 return (_ISO2022INVALID);
772 /* range check */
773 switch (psenc->g[cur].type) {
774 case CS94:
775 if (!(is94(string[0] & 0x7f)))
776 goto asis;
777 case CS96:
778 if (!(is96(string[0] & 0x7f)))
779 goto asis;
780 break;
781 case CS94MULTI:
782 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
783 goto asis;
784 break;
785 case CS96MULTI:
786 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
787 goto asis;
788 break;
791 /* extract the character. */
792 switch (psenc->g[cur].type) {
793 case CS94:
794 /* special case for ASCII. */
795 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
796 wchar = *string++;
797 wchar &= 0x7f;
798 break;
800 wchar = psenc->g[cur].final;
801 wchar = (wchar << 8);
802 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
803 wchar = (wchar << 8);
804 wchar = (wchar << 8) | (*string++ & 0x7f);
805 break;
806 case CS96:
807 /* special case for ISO-8859-1. */
808 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
809 wchar = *string++;
810 wchar &= 0x7f;
811 wchar |= 0x80;
812 break;
814 wchar = psenc->g[cur].final;
815 wchar = (wchar << 8);
816 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
817 wchar = (wchar << 8);
818 wchar = (wchar << 8) | (*string++ & 0x7f);
819 wchar |= 0x80;
820 break;
821 case CS94MULTI:
822 case CS96MULTI:
823 wchar = psenc->g[cur].final;
824 wchar = (wchar << 8);
825 if (isthree(psenc->g[cur].final))
826 wchar |= (*string++ & 0x7f);
827 wchar = (wchar << 8) | (*string++ & 0x7f);
828 wchar = (wchar << 8) | (*string++ & 0x7f);
829 if (psenc->g[cur].type == CS96MULTI)
830 wchar |= 0x80;
831 break;
834 if (result)
835 *result = string;
836 /* reset single shift state */
837 psenc->singlegr = psenc->singlegl = -1;
838 return wchar;
843 static int
844 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
845 wchar_t * __restrict pwc,
846 const char ** __restrict s,
847 size_t n, _ISO2022State * __restrict psenc,
848 size_t * __restrict nresult)
850 wchar_t wchar;
851 const char *s0, *p, *result;
852 int c;
853 int chlenbak;
855 _DIAGASSERT(nresult != 0);
856 _DIAGASSERT(ei != NULL);
857 _DIAGASSERT(psenc != NULL);
858 _DIAGASSERT(s != NULL);
860 if (*s == NULL) {
861 _citrus_ISO2022_init_state(ei, psenc);
862 *nresult = _ENCODING_IS_STATE_DEPENDENT;
863 return 0;
865 s0 = *s;
866 c = 0;
867 chlenbak = psenc->chlen;
870 * if we have something in buffer, use that.
871 * otherwise, skip here
873 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
874 /* illgeal state */
875 _citrus_ISO2022_init_state(ei, psenc);
876 goto encoding_error;
878 if (psenc->chlen == 0)
879 goto emptybuf;
881 /* buffer is not empty */
882 p = psenc->ch;
883 while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
884 if (n > 0) {
885 psenc->ch[psenc->chlen++] = *s0++;
886 n--;
889 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
890 &result, psenc);
891 c += result - p;
892 if (wchar != _ISO2022INVALID) {
893 if (psenc->chlen > c)
894 memmove(psenc->ch, result, psenc->chlen - c);
895 if (psenc->chlen < c)
896 psenc->chlen = 0;
897 else
898 psenc->chlen -= c;
899 goto output;
902 if (n == 0) {
903 if ((result - p) == psenc->chlen)
904 /* complete shift sequence. */
905 psenc->chlen = 0;
906 goto restart;
909 p = result;
912 /* escape sequence too long? */
913 goto encoding_error;
915 emptybuf:
916 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
917 if (wchar != _ISO2022INVALID) {
918 c += result - s0;
919 psenc->chlen = 0;
920 s0 = result;
921 goto output;
923 if (result > s0) {
924 c += (result - s0);
925 n -= (result - s0);
926 s0 = result;
927 if (n>0)
928 goto emptybuf;
929 /* complete shift sequence. */
930 goto restart;
932 n += c;
933 if (n < sizeof(psenc->ch)) {
934 memcpy(psenc->ch, s0 - c, n);
935 psenc->chlen = n;
936 s0 = result;
937 goto restart;
940 /* escape sequence too long? */
942 encoding_error:
943 psenc->chlen = 0;
944 *nresult = (size_t)-1;
945 return (EILSEQ);
947 output:
948 *s = s0;
949 if (pwc)
950 *pwc = wchar;
952 if (!wchar)
953 *nresult = 0;
954 else
955 *nresult = c - chlenbak;
957 return (0);
959 restart:
960 *s = s0;
961 *nresult = (size_t)-2;
963 return (0);
966 static int
967 recommendation(_ISO2022EncodingInfo * __restrict ei,
968 _ISO2022Charset * __restrict cs)
970 int i, j;
971 _ISO2022Charset *recommend;
973 _DIAGASSERT(ei != NULL);
974 _DIAGASSERT(cs != NULL);
976 /* first, try a exact match. */
977 for (i = 0; i < 4; i++) {
978 recommend = ei->recommend[i];
979 for (j = 0; j < ei->recommendsize[i]; j++) {
980 if (cs->type != recommend[j].type)
981 continue;
982 if (cs->final != recommend[j].final)
983 continue;
984 if (cs->interm != recommend[j].interm)
985 continue;
987 return i;
991 /* then, try a wildcard match over final char. */
992 for (i = 0; i < 4; i++) {
993 recommend = ei->recommend[i];
994 for (j = 0; j < ei->recommendsize[i]; j++) {
995 if (cs->type != recommend[j].type)
996 continue;
997 if (cs->final && (cs->final != recommend[j].final))
998 continue;
999 if (cs->interm && (cs->interm != recommend[j].interm))
1000 continue;
1002 return i;
1006 /* there's no recommendation. make a guess. */
1007 if (ei->maxcharset == 0) {
1008 return 0;
1009 } else {
1010 switch (cs->type) {
1011 case CS94:
1012 case CS94MULTI:
1013 return 0;
1014 case CS96:
1015 case CS96MULTI:
1016 return 1;
1019 return 0;
1022 static int
1023 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1024 char * __restrict string, size_t n,
1025 char ** __restrict result,
1026 _ISO2022State * __restrict psenc,
1027 size_t * __restrict nresult)
1029 int i = 0;
1030 size_t len;
1031 _ISO2022Charset cs;
1032 char *p;
1033 char tmp[MB_LEN_MAX];
1034 int target;
1035 u_char mask;
1036 int bit8;
1038 _DIAGASSERT(ei != NULL);
1039 _DIAGASSERT(string != NULL);
1040 /* result may be NULL */
1041 _DIAGASSERT(psenc != NULL);
1042 _DIAGASSERT(nresult != NULL);
1044 if (isc0(wc & 0xff)) {
1045 /* go back to INIT0 or ASCII on control chars */
1046 cs = ei->initg[0].final ? ei->initg[0] : ascii;
1047 } else if (isc1(wc & 0xff)) {
1048 /* go back to INIT1 or ISO-8859-1 on control chars */
1049 cs = ei->initg[1].final ? ei->initg[1] : iso88591;
1050 } else if (!(wc & ~0xff)) {
1051 if (wc & 0x80) {
1052 /* special treatment for ISO-8859-1 */
1053 cs = iso88591;
1054 } else {
1055 /* special treatment for ASCII */
1056 cs = ascii;
1058 } else {
1059 cs.final = (wc >> 24) & 0x7f;
1060 if ((wc >> 16) & 0x80)
1061 cs.interm = (wc >> 16) & 0x7f;
1062 else
1063 cs.interm = '\0';
1064 if (wc & 0x80)
1065 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1066 else
1067 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1069 target = recommendation(ei, &cs);
1070 p = tmp;
1071 bit8 = ei->flags & F_8BIT;
1073 /* designate the charset onto the target plane(G0/1/2/3). */
1074 if (psenc->g[target].type == cs.type
1075 && psenc->g[target].final == cs.final
1076 && psenc->g[target].interm == cs.interm)
1077 goto planeok;
1079 *p++ = '\033';
1080 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1081 *p++ = '$';
1082 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1083 && !cs.interm && !(ei->flags & F_NOOLD))
1085 else if (cs.type == CS94 || cs.type == CS94MULTI)
1086 *p++ = "()*+"[target];
1087 else
1088 *p++ = ",-./"[target];
1089 if (cs.interm)
1090 *p++ = cs.interm;
1091 *p++ = cs.final;
1093 psenc->g[target].type = cs.type;
1094 psenc->g[target].final = cs.final;
1095 psenc->g[target].interm = cs.interm;
1097 planeok:
1098 /* invoke the plane onto GL or GR. */
1099 if (psenc->gl == target)
1100 goto sideok;
1101 if (bit8 && psenc->gr == target)
1102 goto sideok;
1104 if (target == 0 && (ei->flags & F_LS0)) {
1105 *p++ = '\017';
1106 psenc->gl = 0;
1107 } else if (target == 1 && (ei->flags & F_LS1)) {
1108 *p++ = '\016';
1109 psenc->gl = 1;
1110 } else if (target == 2 && (ei->flags & F_LS2)) {
1111 *p++ = '\033';
1112 *p++ = 'n';
1113 psenc->gl = 2;
1114 } else if (target == 3 && (ei->flags & F_LS3)) {
1115 *p++ = '\033';
1116 *p++ = 'o';
1117 psenc->gl = 3;
1118 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1119 *p++ = '\033';
1120 *p++ = '~';
1121 psenc->gr = 1;
1122 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1123 *p++ = '\033';
1124 /*{*/
1125 *p++ = '}';
1126 psenc->gr = 2;
1127 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1128 *p++ = '\033';
1129 *p++ = '|';
1130 psenc->gr = 3;
1131 } else if (target == 2 && (ei->flags & F_SS2)) {
1132 *p++ = '\033';
1133 *p++ = 'N';
1134 psenc->singlegl = 2;
1135 } else if (target == 3 && (ei->flags & F_SS3)) {
1136 *p++ = '\033';
1137 *p++ = 'O';
1138 psenc->singlegl = 3;
1139 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1140 *p++ = '\216';
1141 *p++ = 'N';
1142 psenc->singlegl = psenc->singlegr = 2;
1143 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1144 *p++ = '\217';
1145 *p++ = 'O';
1146 psenc->singlegl = psenc->singlegr = 3;
1147 } else
1148 goto ilseq;
1150 sideok:
1151 if (psenc->singlegl == target)
1152 mask = 0x00;
1153 else if (psenc->singlegr == target)
1154 mask = 0x80;
1155 else if (psenc->gl == target)
1156 mask = 0x00;
1157 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1158 mask = 0x80;
1159 else
1160 goto ilseq;
1162 switch (cs.type) {
1163 case CS94:
1164 case CS96:
1165 i = 1;
1166 break;
1167 case CS94MULTI:
1168 case CS96MULTI:
1169 i = !iscntl(wc & 0xff) ?
1170 (isthree(cs.final) ? 3 : 2) : 1;
1171 break;
1173 while (i-- > 0)
1174 *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1176 /* reset single shift state */
1177 psenc->singlegl = psenc->singlegr = -1;
1179 len = (size_t)(p - tmp);
1180 if (n < len) {
1181 if (result)
1182 *result = (char *)0;
1183 *nresult = (size_t)-1;
1184 return E2BIG;
1186 if (result)
1187 *result = string + len;
1188 memcpy(string, tmp, len);
1189 *nresult = len;
1191 return 0;
1193 ilseq:
1194 *nresult = (size_t)-1;
1195 return EILSEQ;
1198 static int
1199 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1200 char * __restrict s, size_t n,
1201 _ISO2022State * __restrict psenc,
1202 size_t * __restrict nresult)
1204 char buf[MB_LEN_MAX];
1205 char *result;
1206 int ret;
1207 size_t len;
1209 _DIAGASSERT(ei != NULL);
1210 _DIAGASSERT(nresult != 0);
1211 _DIAGASSERT(s != NULL);
1213 /* XXX state will be modified after this operation... */
1214 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc,
1215 &len);
1216 if (ret) {
1217 *nresult = len;
1218 return ret;
1221 if (sizeof(buf) < len || n < len-1) {
1222 /* XXX should recover state? */
1223 *nresult = (size_t)-1;
1224 return E2BIG;
1227 memcpy(s, buf, len-1);
1228 *nresult = len-1;
1229 return (0);
1232 static int
1233 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1234 char * __restrict s, size_t n, wchar_t wc,
1235 _ISO2022State * __restrict psenc,
1236 size_t * __restrict nresult)
1238 char buf[MB_LEN_MAX];
1239 char *result;
1240 int ret;
1241 size_t len;
1243 _DIAGASSERT(ei != NULL);
1244 _DIAGASSERT(s != NULL);
1245 _DIAGASSERT(psenc != NULL);
1246 _DIAGASSERT(nresult != 0);
1248 /* XXX state will be modified after this operation... */
1249 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc,
1250 &len);
1251 if (ret) {
1252 *nresult = len;
1253 return ret;
1256 if (sizeof(buf) < len || n < len) {
1257 /* XXX should recover state? */
1258 *nresult = (size_t)-1;
1259 return E2BIG;
1262 memcpy(s, buf, len);
1263 *nresult = len;
1264 return (0);
1267 static __inline int
1268 /*ARGSUSED*/
1269 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1270 _csid_t * __restrict csid,
1271 _index_t * __restrict idx, wchar_t wc)
1273 wchar_t m, nm;
1275 _DIAGASSERT(csid != NULL && idx != NULL);
1277 m = wc & 0x7FFF8080;
1278 nm = wc & 0x007F7F7F;
1279 if (m & 0x00800000) {
1280 nm &= 0x00007F7F;
1281 } else {
1282 m &= 0x7F008080;
1284 if (nm & 0x007F0000) {
1285 /* ^3 mark */
1286 m |= 0x007F0000;
1287 } else if (nm & 0x00007F00) {
1288 /* ^2 mark */
1289 m |= 0x00007F00;
1291 *csid = (_csid_t)m;
1292 *idx = (_index_t)nm;
1294 return (0);
1297 static __inline int
1298 /*ARGSUSED*/
1299 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1300 wchar_t * __restrict wc,
1301 _csid_t csid, _index_t idx)
1304 _DIAGASSERT(ei != NULL && wc != NULL);
1306 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1308 return (0);
1311 static __inline int
1312 /*ARGSUSED*/
1313 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei,
1314 _ISO2022State * __restrict psenc,
1315 int * __restrict rstate)
1318 if (psenc->chlen == 0) {
1319 /* XXX: it should distinguish initial and stable. */
1320 *rstate = _STDENC_SDGEN_STABLE;
1321 } else {
1322 if (psenc->ch[0] == '\033')
1323 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
1324 else
1325 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
1328 return 0;
1331 /* ----------------------------------------------------------------------
1332 * public interface for ctype
1335 _CITRUS_CTYPE_DECLS(ISO2022);
1336 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1338 #include "citrus_ctype_template.h"
1340 /* ----------------------------------------------------------------------
1341 * public interface for stdenc
1344 _CITRUS_STDENC_DECLS(ISO2022);
1345 _CITRUS_STDENC_DEF_OPS(ISO2022);
1347 #include "citrus_stdenc_template.h"