Remove building with NOCRYPTO option
[minix.git] / lib / libc / citrus / modules / citrus_iso2022.c
blob0b3a0a8012b0892a36acbcc9fde0748c4ea8136d
1 /* $NetBSD: citrus_iso2022.c,v 1.23 2013/05/28 16:57:56 joerg Exp $ */
3 /*-
4 * Copyright (c)1999, 2002 Citrus Project,
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.23 2013/05/28 16:57:56 joerg Exp $");
34 #endif /* LIBC_SCCS and not lint */
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <wchar.h>
43 #include <sys/types.h>
44 #include <limits.h>
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_iso2022.h"
54 /* ----------------------------------------------------------------------
55 * private stuffs used by templates
60 * wchar_t mappings:
61 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
62 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
63 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
64 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
65 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
66 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
67 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
68 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
69 * 94x94 charset (ESC & V ESC $ ( F)
70 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
71 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
72 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
74 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
77 typedef struct {
78 u_char type;
79 #define CS94 (0U)
80 #define CS96 (1U)
81 #define CS94MULTI (2U)
82 #define CS96MULTI (3U)
84 u_char final;
85 u_char interm;
86 u_char vers;
87 } _ISO2022Charset;
89 static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' };
90 static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' };
92 typedef struct {
93 _ISO2022Charset g[4];
94 /* need 3 bits to hold -1, 0, ..., 3 */
95 int gl:3,
96 gr:3,
97 singlegl:3,
98 singlegr:3;
99 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
100 int chlen;
101 int flags;
102 #define _ISO2022STATE_FLAG_INITIALIZED 1
103 } _ISO2022State;
105 typedef struct {
106 _ISO2022Charset *recommend[4];
107 size_t recommendsize[4];
108 _ISO2022Charset initg[4];
109 int maxcharset;
110 int flags;
111 #define F_8BIT 0x0001
112 #define F_NOOLD 0x0002
113 #define F_SI 0x0010 /*0F*/
114 #define F_SO 0x0020 /*0E*/
115 #define F_LS0 0x0010 /*0F*/
116 #define F_LS1 0x0020 /*0E*/
117 #define F_LS2 0x0040 /*ESC n*/
118 #define F_LS3 0x0080 /*ESC o*/
119 #define F_LS1R 0x0100 /*ESC ~*/
120 #define F_LS2R 0x0200 /*ESC }*/
121 #define F_LS3R 0x0400 /*ESC |*/
122 #define F_SS2 0x0800 /*ESC N*/
123 #define F_SS3 0x1000 /*ESC O*/
124 #define F_SS2R 0x2000 /*8E*/
125 #define F_SS3R 0x4000 /*8F*/
126 } _ISO2022EncodingInfo;
127 typedef struct {
128 _ISO2022EncodingInfo ei;
129 struct {
130 /* for future multi-locale facility */
131 _ISO2022State s_mblen;
132 _ISO2022State s_mbrlen;
133 _ISO2022State s_mbrtowc;
134 _ISO2022State s_mbtowc;
135 _ISO2022State s_mbsrtowcs;
136 _ISO2022State s_mbsnrtowcs;
137 _ISO2022State s_wcrtomb;
138 _ISO2022State s_wcsrtombs;
139 _ISO2022State s_wcsnrtombs;
140 _ISO2022State s_wctomb;
141 } states;
142 } _ISO2022CTypeInfo;
144 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
145 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
147 #define _FUNCNAME(m) _citrus_ISO2022_##m
148 #define _ENCODING_INFO _ISO2022EncodingInfo
149 #define _CTYPE_INFO _ISO2022CTypeInfo
150 #define _ENCODING_STATE _ISO2022State
151 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
152 #define _ENCODING_IS_STATE_DEPENDENT 1
153 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
154 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
157 #define _ISO2022INVALID (wchar_t)-1
159 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
160 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
161 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
162 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
163 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
164 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
165 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
166 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
168 static __inline int
169 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
172 _DIAGASSERT(p != NULL);
173 _DIAGASSERT(cs != NULL);
175 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
176 cs->final = (u_char)(p[3] & 0xff);
177 cs->interm = '\0';
178 cs->vers = '\0';
179 cs->type = CS94MULTI;
180 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
181 cs->final = (u_char)(p[3] & 0xff);
182 cs->interm = '\0';
183 cs->vers = '\0';
184 cs->type = CS96MULTI;
185 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
186 cs->final = (u_char)(p[2] & 0xff);
187 cs->interm = '\0';
188 cs->vers = '\0';
189 cs->type = CS94;
190 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
191 cs->final = (u_char )(p[2] & 0xff);
192 cs->interm = '\0';
193 cs->vers = '\0';
194 cs->type = CS96;
195 } else {
196 return 1;
199 return 0;
203 #define _NOTMATCH 0
204 #define _MATCH 1
205 #define _PARSEFAIL 2
207 static __inline int
208 get_recommend(_ISO2022EncodingInfo * __restrict ei,
209 const char * __restrict token)
211 int i;
212 _ISO2022Charset cs, *p;
214 if (!strchr("0123", token[0]) || token[1] != '=')
215 return (_NOTMATCH);
217 if (getcs(&token[2], &cs) == 0)
219 else if (!strcmp(&token[2], "94")) {
220 cs.final = (u_char)(token[4]);
221 cs.interm = '\0';
222 cs.vers = '\0';
223 cs.type = CS94;
224 } else if (!strcmp(&token[2], "96")) {
225 cs.final = (u_char)(token[4]);
226 cs.interm = '\0';
227 cs.vers = '\0';
228 cs.type = CS96;
229 } else if (!strcmp(&token[2], "94$")) {
230 cs.final = (u_char)(token[5]);
231 cs.interm = '\0';
232 cs.vers = '\0';
233 cs.type = CS94MULTI;
234 } else if (!strcmp(&token[2], "96$")) {
235 cs.final = (u_char)(token[5]);
236 cs.interm = '\0';
237 cs.vers = '\0';
238 cs.type = CS96MULTI;
239 } else {
240 return (_PARSEFAIL);
243 i = token[0] - '0';
244 if (!ei->recommend[i]) {
245 ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
246 } else {
247 p = realloc(ei->recommend[i],
248 sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
249 if (!p)
250 return (_PARSEFAIL);
251 ei->recommend[i] = p;
253 if (!ei->recommend[i])
254 return (_PARSEFAIL);
255 ei->recommendsize[i]++;
257 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
258 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
259 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
260 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
262 return (_MATCH);
265 static __inline int
266 get_initg(_ISO2022EncodingInfo * __restrict ei,
267 const char * __restrict token)
269 _ISO2022Charset cs;
271 if (strncmp("INIT", &token[0], 4) ||
272 !strchr("0123", token[4]) ||
273 token[5] != '=')
274 return (_NOTMATCH);
276 if (getcs(&token[6], &cs) != 0)
277 return (_PARSEFAIL);
279 ei->initg[token[4] - '0'].type = cs.type;
280 ei->initg[token[4] - '0'].final = cs.final;
281 ei->initg[token[4] - '0'].interm = cs.interm;
282 ei->initg[token[4] - '0'].vers = cs.vers;
284 return (_MATCH);
287 static __inline int
288 get_max(_ISO2022EncodingInfo * __restrict ei,
289 const char * __restrict token)
291 if (!strcmp(token, "MAX1")) {
292 ei->maxcharset = 1;
293 } else if (!strcmp(token, "MAX2")) {
294 ei->maxcharset = 2;
295 } else if (!strcmp(token, "MAX3")) {
296 ei->maxcharset = 3;
297 } else
298 return (_NOTMATCH);
300 return (_MATCH);
304 static __inline int
305 get_flags(_ISO2022EncodingInfo * __restrict ei,
306 const char * __restrict token)
308 int i;
309 static struct {
310 const char *tag;
311 int flag;
312 } const tags[] = {
313 { "DUMMY", 0 },
314 { "8BIT", F_8BIT },
315 { "NOOLD", F_NOOLD },
316 { "SI", F_SI },
317 { "SO", F_SO },
318 { "LS0", F_LS0 },
319 { "LS1", F_LS1 },
320 { "LS2", F_LS2 },
321 { "LS3", F_LS3 },
322 { "LS1R", F_LS1R },
323 { "LS2R", F_LS2R },
324 { "LS3R", F_LS3R },
325 { "SS2", F_SS2 },
326 { "SS3", F_SS3 },
327 { "SS2R", F_SS2R },
328 { "SS3R", F_SS3R },
329 { NULL, 0 }
332 for (i = 0; tags[i].tag; i++) {
333 if (!strcmp(token, tags[i].tag)) {
334 ei->flags |= tags[i].flag;
335 return (_MATCH);
339 return (_NOTMATCH);
343 static __inline int
344 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
345 const void * __restrict var, size_t lenvar)
347 char const *v, *e;
348 char buf[20];
349 int i, len, ret;
351 _DIAGASSERT(ei != NULL);
355 * parse VARIABLE section.
358 if (!var)
359 return (EFTYPE);
361 v = (const char *) var;
363 /* initialize structure */
364 ei->maxcharset = 0;
365 for (i = 0; i < 4; i++) {
366 ei->recommend[i] = NULL;
367 ei->recommendsize[i] = 0;
369 ei->flags = 0;
371 while (*v) {
372 while (*v == ' ' || *v == '\t')
373 ++v;
375 /* find the token */
376 e = v;
377 while (*e && *e != ' ' && *e != '\t')
378 ++e;
380 len = e-v;
381 if (len == 0)
382 break;
383 if (len>=sizeof(buf))
384 goto parsefail;
385 snprintf(buf, sizeof(buf), "%.*s", len, v);
387 if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
389 else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
391 else if ((ret = get_max(ei, buf)) != _NOTMATCH)
393 else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
395 else
396 ret = _PARSEFAIL;
397 if (ret==_PARSEFAIL)
398 goto parsefail;
399 v = e;
403 return (0);
405 parsefail:
406 free(ei->recommend[0]);
407 free(ei->recommend[1]);
408 free(ei->recommend[2]);
409 free(ei->recommend[3]);
411 return (EFTYPE);
414 static __inline void
415 /*ARGSUSED*/
416 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
417 _ISO2022State * __restrict s)
419 int i;
421 memset(s, 0, sizeof(*s));
422 s->gl = 0;
423 s->gr = (ei->flags & F_8BIT) ? 1 : -1;
425 for (i = 0; i < 4; i++) {
426 if (ei->initg[i].final) {
427 s->g[i].type = ei->initg[i].type;
428 s->g[i].final = ei->initg[i].final;
429 s->g[i].interm = ei->initg[i].interm;
432 s->singlegl = s->singlegr = -1;
433 s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
436 static __inline void
437 /*ARGSUSED*/
438 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
439 void * __restrict pspriv,
440 const _ISO2022State * __restrict s)
442 memcpy(pspriv, (const void *)s, sizeof(*s));
445 static __inline void
446 /*ARGSUSED*/
447 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
448 _ISO2022State * __restrict s,
449 const void * __restrict pspriv)
451 memcpy((void *)s, pspriv, sizeof(*s));
454 static int
455 /*ARGSUSED*/
456 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
457 const void * __restrict var,
458 size_t lenvar)
461 _DIAGASSERT(ei != NULL);
463 return _citrus_ISO2022_parse_variable(ei, var, lenvar);
466 static void
467 /*ARGSUSED*/
468 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
472 #define ESC '\033'
473 #define ECMA -1
474 #define INTERM -2
475 #define OECMA -3
476 static const struct seqtable {
477 int type;
478 int csoff;
479 int finaloff;
480 int intermoff;
481 int versoff;
482 int len;
483 int chars[10];
484 } seqtable[] = {
485 /* G0 94MULTI special */
486 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, },
487 /* G0 94MULTI special with version identification */
488 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
489 /* G? 94 */
490 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, },
491 /* G? 94 with 2nd intermediate char */
492 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, },
493 /* G? 96 */
494 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, },
495 /* G? 96 with 2nd intermediate char */
496 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, },
497 /* G? 94MULTI */
498 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, },
499 /* G? 96MULTI */
500 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, },
501 /* G? 94MULTI with version specification */
502 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
503 /* LS2/3 */
504 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, },
505 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, },
506 /* LS1/2/3R */
507 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, },
508 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, },
509 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, },
510 /* SS2/3 */
511 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, },
512 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, },
513 /* end of records */
514 { 0, }
517 static int
518 seqmatch(const char * __restrict s, size_t n,
519 const struct seqtable * __restrict sp)
521 const int *p;
523 _DIAGASSERT(s != NULL);
524 _DIAGASSERT(sp != NULL);
526 p = sp->chars;
527 while (p - sp->chars < n && p - sp->chars < sp->len) {
528 switch (*p) {
529 case ECMA:
530 if (!isecma(*s))
531 goto terminate;
532 break;
533 case OECMA:
534 if (*s && strchr("@AB", *s))
535 break;
536 else
537 goto terminate;
538 case INTERM:
539 if (!isinterm(*s))
540 goto terminate;
541 break;
542 case CS94:
543 if (*s && strchr("()*+", *s))
544 break;
545 else
546 goto terminate;
547 case CS96:
548 if (*s && strchr(",-./", *s))
549 break;
550 else
551 goto terminate;
552 default:
553 if (*s != *p)
554 goto terminate;
555 break;
558 p++;
559 s++;
562 terminate:
563 return p - sp->chars;
566 static wchar_t
567 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
568 const char * __restrict string, size_t n,
569 const char ** __restrict result,
570 _ISO2022State * __restrict psenc)
572 wchar_t wchar = 0;
573 int cur;
574 const struct seqtable *sp;
575 int nmatch;
576 int i;
578 _DIAGASSERT(ei != NULL);
579 _DIAGASSERT(psenc != NULL);
580 _DIAGASSERT(string != NULL);
581 /* result may be NULL */
583 while (1) {
584 /* SI/SO */
585 if (1 <= n && string[0] == '\017') {
586 psenc->gl = 0;
587 string++;
588 n--;
589 continue;
591 if (1 <= n && string[0] == '\016') {
592 psenc->gl = 1;
593 string++;
594 n--;
595 continue;
598 /* SS2/3R */
599 if (1 <= n && string[0] && strchr("\217\216", string[0])) {
600 psenc->singlegl = psenc->singlegr =
601 (string[0] - '\216') + 2;
602 string++;
603 n--;
604 continue;
607 /* eat the letter if this is not ESC */
608 if (1 <= n && string[0] != '\033')
609 break;
611 /* look for a perfect match from escape sequences */
612 for (sp = &seqtable[0]; sp->len; sp++) {
613 nmatch = seqmatch(string, n, sp);
614 if (sp->len == nmatch && n >= sp->len)
615 break;
618 if (!sp->len)
619 goto notseq;
621 if (sp->type != -1) {
622 if (sp->csoff == -1)
623 i = 0;
624 else {
625 switch (sp->type) {
626 case CS94:
627 case CS94MULTI:
628 i = string[sp->csoff] - '(';
629 break;
630 case CS96:
631 case CS96MULTI:
632 i = string[sp->csoff] - ',';
633 break;
634 default:
635 return (_ISO2022INVALID);
638 psenc->g[i].type = sp->type;
639 psenc->g[i].final = '\0';
640 psenc->g[i].interm = '\0';
641 psenc->g[i].vers = '\0';
642 /* sp->finaloff must not be -1 */
643 if (sp->finaloff != -1)
644 psenc->g[i].final = string[sp->finaloff];
645 if (sp->intermoff != -1)
646 psenc->g[i].interm = string[sp->intermoff];
647 if (sp->versoff != -1)
648 psenc->g[i].vers = string[sp->versoff];
650 string += sp->len;
651 n -= sp->len;
652 continue;
655 /* LS2/3 */
656 if (2 <= n && string[0] == '\033'
657 && string[1] && strchr("no", string[1])) {
658 psenc->gl = string[1] - 'n' + 2;
659 string += 2;
660 n -= 2;
661 continue;
664 /* LS1/2/3R */
665 /* XXX: { for vi showmatch */
666 if (2 <= n && string[0] == '\033'
667 && string[1] && strchr("~}|", string[1])) {
668 psenc->gr = 3 - (string[1] - '|');
669 string += 2;
670 n -= 2;
671 continue;
674 /* SS2/3 */
675 if (2 <= n && string[0] == '\033'
676 && string[1] && strchr("NO", string[1])) {
677 psenc->singlegl = (string[1] - 'N') + 2;
678 string += 2;
679 n -= 2;
680 continue;
683 notseq:
685 * if we've got an unknown escape sequence, eat the ESC at the
686 * head. otherwise, wait till full escape sequence comes.
688 for (sp = &seqtable[0]; sp->len; sp++) {
689 nmatch = seqmatch(string, n, sp);
690 if (!nmatch)
691 continue;
694 * if we are in the middle of escape sequence,
695 * we still need to wait for more characters to come
697 if (n < sp->len) {
698 if (nmatch == n) {
699 if (result)
700 *result = string;
701 return (_ISO2022INVALID);
703 } else {
704 if (nmatch == sp->len) {
705 /* this case should not happen */
706 goto eat;
711 break;
714 eat:
715 /* no letter to eat */
716 if (n < 1) {
717 if (result)
718 *result = string;
719 return (_ISO2022INVALID);
722 /* normal chars. always eat C0/C1 as is. */
723 if (iscntl(*string & 0xff))
724 cur = -1;
725 else if (*string & 0x80) {
726 cur = (psenc->singlegr == -1)
727 ? psenc->gr : psenc->singlegr;
728 } else {
729 cur = (psenc->singlegl == -1)
730 ? psenc->gl : psenc->singlegl;
733 if (cur == -1) {
734 asis:
735 wchar = *string++ & 0xff;
736 if (result)
737 *result = string;
738 /* reset single shift state */
739 psenc->singlegr = psenc->singlegl = -1;
740 return wchar;
743 /* length error check */
744 switch (psenc->g[cur].type) {
745 case CS94MULTI:
746 case CS96MULTI:
747 if (!isthree(psenc->g[cur].final)) {
748 if (2 <= n
749 && (string[0] & 0x80) == (string[1] & 0x80))
750 break;
751 } else {
752 if (3 <= n
753 && (string[0] & 0x80) == (string[1] & 0x80)
754 && (string[0] & 0x80) == (string[2] & 0x80))
755 break;
758 /* we still need to wait for more characters to come */
759 if (result)
760 *result = string;
761 return (_ISO2022INVALID);
763 case CS94:
764 case CS96:
765 if (1 <= n)
766 break;
768 /* we still need to wait for more characters to come */
769 if (result)
770 *result = string;
771 return (_ISO2022INVALID);
774 /* range check */
775 switch (psenc->g[cur].type) {
776 case CS94:
777 if (!(is94(string[0] & 0x7f)))
778 goto asis;
779 case CS96:
780 if (!(is96(string[0] & 0x7f)))
781 goto asis;
782 break;
783 case CS94MULTI:
784 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
785 goto asis;
786 break;
787 case CS96MULTI:
788 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
789 goto asis;
790 break;
793 /* extract the character. */
794 switch (psenc->g[cur].type) {
795 case CS94:
796 /* special case for ASCII. */
797 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
798 wchar = *string++;
799 wchar &= 0x7f;
800 break;
802 wchar = psenc->g[cur].final;
803 wchar = (wchar << 8);
804 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
805 wchar = (wchar << 8);
806 wchar = (wchar << 8) | (*string++ & 0x7f);
807 break;
808 case CS96:
809 /* special case for ISO-8859-1. */
810 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
811 wchar = *string++;
812 wchar &= 0x7f;
813 wchar |= 0x80;
814 break;
816 wchar = psenc->g[cur].final;
817 wchar = (wchar << 8);
818 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
819 wchar = (wchar << 8);
820 wchar = (wchar << 8) | (*string++ & 0x7f);
821 wchar |= 0x80;
822 break;
823 case CS94MULTI:
824 case CS96MULTI:
825 wchar = psenc->g[cur].final;
826 wchar = (wchar << 8);
827 if (isthree(psenc->g[cur].final))
828 wchar |= (*string++ & 0x7f);
829 wchar = (wchar << 8) | (*string++ & 0x7f);
830 wchar = (wchar << 8) | (*string++ & 0x7f);
831 if (psenc->g[cur].type == CS96MULTI)
832 wchar |= 0x80;
833 break;
836 if (result)
837 *result = string;
838 /* reset single shift state */
839 psenc->singlegr = psenc->singlegl = -1;
840 return wchar;
845 static int
846 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
847 wchar_t * __restrict pwc,
848 const char ** __restrict s,
849 size_t n, _ISO2022State * __restrict psenc,
850 size_t * __restrict nresult)
852 wchar_t wchar;
853 const char *s0, *p, *result;
854 int c;
855 int chlenbak;
857 _DIAGASSERT(nresult != 0);
858 _DIAGASSERT(ei != NULL);
859 _DIAGASSERT(psenc != NULL);
860 _DIAGASSERT(s != NULL);
862 if (*s == NULL) {
863 _citrus_ISO2022_init_state(ei, psenc);
864 *nresult = _ENCODING_IS_STATE_DEPENDENT;
865 return 0;
867 s0 = *s;
868 c = 0;
869 chlenbak = psenc->chlen;
872 * if we have something in buffer, use that.
873 * otherwise, skip here
875 if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
876 /* illgeal state */
877 _citrus_ISO2022_init_state(ei, psenc);
878 goto encoding_error;
880 if (psenc->chlen == 0)
881 goto emptybuf;
883 /* buffer is not empty */
884 p = psenc->ch;
885 while (psenc->chlen < sizeof(psenc->ch)) {
886 if (n > 0) {
887 psenc->ch[psenc->chlen++] = *s0++;
888 n--;
891 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
892 &result, psenc);
893 c += result - p;
894 if (wchar != _ISO2022INVALID) {
895 if (psenc->chlen > c)
896 memmove(psenc->ch, result, psenc->chlen - c);
897 if (psenc->chlen < c)
898 psenc->chlen = 0;
899 else
900 psenc->chlen -= c;
901 goto output;
904 if (n == 0) {
905 if ((result - p) == psenc->chlen)
906 /* complete shift sequence. */
907 psenc->chlen = 0;
908 goto restart;
911 p = result;
914 /* escape sequence too long? */
915 goto encoding_error;
917 emptybuf:
918 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
919 if (wchar != _ISO2022INVALID) {
920 c += result - s0;
921 psenc->chlen = 0;
922 s0 = result;
923 goto output;
925 if (result > s0) {
926 c += (result - s0);
927 n -= (result - s0);
928 s0 = result;
929 if (n>0)
930 goto emptybuf;
931 /* complete shift sequence. */
932 goto restart;
934 n += c;
935 if (n < sizeof(psenc->ch)) {
936 memcpy(psenc->ch, s0 - c, n);
937 psenc->chlen = n;
938 s0 = result;
939 goto restart;
942 /* escape sequence too long? */
944 encoding_error:
945 psenc->chlen = 0;
946 *nresult = (size_t)-1;
947 return (EILSEQ);
949 output:
950 *s = s0;
951 if (pwc)
952 *pwc = wchar;
954 if (!wchar)
955 *nresult = 0;
956 else
957 *nresult = c - chlenbak;
959 return (0);
961 restart:
962 *s = s0;
963 *nresult = (size_t)-2;
965 return (0);
968 static int
969 recommendation(_ISO2022EncodingInfo * __restrict ei,
970 _ISO2022Charset * __restrict cs)
972 int i, j;
973 _ISO2022Charset *recommend;
975 _DIAGASSERT(ei != NULL);
976 _DIAGASSERT(cs != NULL);
978 /* first, try a exact match. */
979 for (i = 0; i < 4; i++) {
980 recommend = ei->recommend[i];
981 for (j = 0; j < ei->recommendsize[i]; j++) {
982 if (cs->type != recommend[j].type)
983 continue;
984 if (cs->final != recommend[j].final)
985 continue;
986 if (cs->interm != recommend[j].interm)
987 continue;
989 return i;
993 /* then, try a wildcard match over final char. */
994 for (i = 0; i < 4; i++) {
995 recommend = ei->recommend[i];
996 for (j = 0; j < ei->recommendsize[i]; j++) {
997 if (cs->type != recommend[j].type)
998 continue;
999 if (cs->final && (cs->final != recommend[j].final))
1000 continue;
1001 if (cs->interm && (cs->interm != recommend[j].interm))
1002 continue;
1004 return i;
1008 /* there's no recommendation. make a guess. */
1009 if (ei->maxcharset == 0) {
1010 return 0;
1011 } else {
1012 switch (cs->type) {
1013 case CS94:
1014 case CS94MULTI:
1015 return 0;
1016 case CS96:
1017 case CS96MULTI:
1018 return 1;
1021 return 0;
1024 static int
1025 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1026 char * __restrict string, size_t n,
1027 char ** __restrict result,
1028 _ISO2022State * __restrict psenc,
1029 size_t * __restrict nresult)
1031 int i = 0;
1032 size_t len;
1033 _ISO2022Charset cs;
1034 char *p;
1035 char tmp[MB_LEN_MAX];
1036 int target;
1037 u_char mask;
1038 int bit8;
1040 _DIAGASSERT(ei != NULL);
1041 _DIAGASSERT(string != NULL);
1042 /* result may be NULL */
1043 _DIAGASSERT(psenc != NULL);
1044 _DIAGASSERT(nresult != NULL);
1046 if (isc0(wc & 0xff)) {
1047 /* go back to INIT0 or ASCII on control chars */
1048 cs = ei->initg[0].final ? ei->initg[0] : ascii;
1049 } else if (isc1(wc & 0xff)) {
1050 /* go back to INIT1 or ISO-8859-1 on control chars */
1051 cs = ei->initg[1].final ? ei->initg[1] : iso88591;
1052 } else if (!(wc & ~0xff)) {
1053 if (wc & 0x80) {
1054 /* special treatment for ISO-8859-1 */
1055 cs = iso88591;
1056 } else {
1057 /* special treatment for ASCII */
1058 cs = ascii;
1060 } else {
1061 cs.final = (wc >> 24) & 0x7f;
1062 if ((wc >> 16) & 0x80)
1063 cs.interm = (wc >> 16) & 0x7f;
1064 else
1065 cs.interm = '\0';
1066 if (wc & 0x80)
1067 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1068 else
1069 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1071 target = recommendation(ei, &cs);
1072 p = tmp;
1073 bit8 = ei->flags & F_8BIT;
1075 /* designate the charset onto the target plane(G0/1/2/3). */
1076 if (psenc->g[target].type == cs.type
1077 && psenc->g[target].final == cs.final
1078 && psenc->g[target].interm == cs.interm)
1079 goto planeok;
1081 *p++ = '\033';
1082 if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1083 *p++ = '$';
1084 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1085 && !cs.interm && !(ei->flags & F_NOOLD))
1087 else if (cs.type == CS94 || cs.type == CS94MULTI)
1088 *p++ = "()*+"[target];
1089 else
1090 *p++ = ",-./"[target];
1091 if (cs.interm)
1092 *p++ = cs.interm;
1093 *p++ = cs.final;
1095 psenc->g[target].type = cs.type;
1096 psenc->g[target].final = cs.final;
1097 psenc->g[target].interm = cs.interm;
1099 planeok:
1100 /* invoke the plane onto GL or GR. */
1101 if (psenc->gl == target)
1102 goto sideok;
1103 if (bit8 && psenc->gr == target)
1104 goto sideok;
1106 if (target == 0 && (ei->flags & F_LS0)) {
1107 *p++ = '\017';
1108 psenc->gl = 0;
1109 } else if (target == 1 && (ei->flags & F_LS1)) {
1110 *p++ = '\016';
1111 psenc->gl = 1;
1112 } else if (target == 2 && (ei->flags & F_LS2)) {
1113 *p++ = '\033';
1114 *p++ = 'n';
1115 psenc->gl = 2;
1116 } else if (target == 3 && (ei->flags & F_LS3)) {
1117 *p++ = '\033';
1118 *p++ = 'o';
1119 psenc->gl = 3;
1120 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1121 *p++ = '\033';
1122 *p++ = '~';
1123 psenc->gr = 1;
1124 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1125 *p++ = '\033';
1126 /*{*/
1127 *p++ = '}';
1128 psenc->gr = 2;
1129 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1130 *p++ = '\033';
1131 *p++ = '|';
1132 psenc->gr = 3;
1133 } else if (target == 2 && (ei->flags & F_SS2)) {
1134 *p++ = '\033';
1135 *p++ = 'N';
1136 psenc->singlegl = 2;
1137 } else if (target == 3 && (ei->flags & F_SS3)) {
1138 *p++ = '\033';
1139 *p++ = 'O';
1140 psenc->singlegl = 3;
1141 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1142 *p++ = '\216';
1143 *p++ = 'N';
1144 psenc->singlegl = psenc->singlegr = 2;
1145 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1146 *p++ = '\217';
1147 *p++ = 'O';
1148 psenc->singlegl = psenc->singlegr = 3;
1149 } else
1150 goto ilseq;
1152 sideok:
1153 if (psenc->singlegl == target)
1154 mask = 0x00;
1155 else if (psenc->singlegr == target)
1156 mask = 0x80;
1157 else if (psenc->gl == target)
1158 mask = 0x00;
1159 else if ((ei->flags & F_8BIT) && psenc->gr == target)
1160 mask = 0x80;
1161 else
1162 goto ilseq;
1164 switch (cs.type) {
1165 case CS94:
1166 case CS96:
1167 i = 1;
1168 break;
1169 case CS94MULTI:
1170 case CS96MULTI:
1171 i = !iscntl(wc & 0xff) ?
1172 (isthree(cs.final) ? 3 : 2) : 1;
1173 break;
1175 while (i-- > 0)
1176 *p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1178 /* reset single shift state */
1179 psenc->singlegl = psenc->singlegr = -1;
1181 len = (size_t)(p - tmp);
1182 if (n < len) {
1183 if (result)
1184 *result = (char *)0;
1185 *nresult = (size_t)-1;
1186 return E2BIG;
1188 if (result)
1189 *result = string + len;
1190 memcpy(string, tmp, len);
1191 *nresult = len;
1193 return 0;
1195 ilseq:
1196 *nresult = (size_t)-1;
1197 return EILSEQ;
1200 static int
1201 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1202 char * __restrict s, size_t n,
1203 _ISO2022State * __restrict psenc,
1204 size_t * __restrict nresult)
1206 char buf[MB_LEN_MAX];
1207 char *result;
1208 int ret;
1209 size_t len;
1211 _DIAGASSERT(ei != NULL);
1212 _DIAGASSERT(nresult != 0);
1213 _DIAGASSERT(s != NULL);
1215 /* XXX state will be modified after this operation... */
1216 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc,
1217 &len);
1218 if (ret) {
1219 *nresult = len;
1220 return ret;
1223 if (sizeof(buf) < len || n < len-1) {
1224 /* XXX should recover state? */
1225 *nresult = (size_t)-1;
1226 return E2BIG;
1229 memcpy(s, buf, len-1);
1230 *nresult = len-1;
1231 return (0);
1234 static int
1235 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1236 char * __restrict s, size_t n, wchar_t wc,
1237 _ISO2022State * __restrict psenc,
1238 size_t * __restrict nresult)
1240 char buf[MB_LEN_MAX];
1241 char *result;
1242 int ret;
1243 size_t len;
1245 _DIAGASSERT(ei != NULL);
1246 _DIAGASSERT(s != NULL);
1247 _DIAGASSERT(psenc != NULL);
1248 _DIAGASSERT(nresult != 0);
1250 /* XXX state will be modified after this operation... */
1251 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc,
1252 &len);
1253 if (ret) {
1254 *nresult = len;
1255 return ret;
1258 if (sizeof(buf) < len || n < len) {
1259 /* XXX should recover state? */
1260 *nresult = (size_t)-1;
1261 return E2BIG;
1264 memcpy(s, buf, len);
1265 *nresult = len;
1266 return (0);
1269 static __inline int
1270 /*ARGSUSED*/
1271 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1272 _csid_t * __restrict csid,
1273 _index_t * __restrict idx, wchar_t wc)
1275 wchar_t m, nm;
1277 _DIAGASSERT(csid != NULL && idx != NULL);
1279 m = wc & 0x7FFF8080;
1280 nm = wc & 0x007F7F7F;
1281 if (m & 0x00800000) {
1282 nm &= 0x00007F7F;
1283 } else {
1284 m &= 0x7F008080;
1286 if (nm & 0x007F0000) {
1287 /* ^3 mark */
1288 m |= 0x007F0000;
1289 } else if (nm & 0x00007F00) {
1290 /* ^2 mark */
1291 m |= 0x00007F00;
1293 *csid = (_csid_t)m;
1294 *idx = (_index_t)nm;
1296 return (0);
1299 static __inline int
1300 /*ARGSUSED*/
1301 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1302 wchar_t * __restrict wc,
1303 _csid_t csid, _index_t idx)
1306 _DIAGASSERT(ei != NULL && wc != NULL);
1308 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1310 return (0);
1313 static __inline int
1314 /*ARGSUSED*/
1315 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei,
1316 _ISO2022State * __restrict psenc,
1317 int * __restrict rstate)
1320 if (psenc->chlen == 0) {
1321 /* XXX: it should distinguish initial and stable. */
1322 *rstate = _STDENC_SDGEN_STABLE;
1323 } else {
1324 if (psenc->ch[0] == '\033')
1325 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
1326 else
1327 *rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
1330 return 0;
1333 /* ----------------------------------------------------------------------
1334 * public interface for ctype
1337 _CITRUS_CTYPE_DECLS(ISO2022);
1338 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1340 #include "citrus_ctype_template.h"
1342 /* ----------------------------------------------------------------------
1343 * public interface for stdenc
1346 _CITRUS_STDENC_DECLS(ISO2022);
1347 _CITRUS_STDENC_DEF_OPS(ISO2022);
1349 #include "citrus_stdenc_template.h"