1 /* $NetBSD: citrus_iso2022.c,v 1.23 2013/05/28 16:57:56 joerg Exp $ */
4 * Copyright (c)1999, 2002 Citrus Project,
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.23 2013/05/28 16:57:56 joerg Exp $");
34 #endif /* LIBC_SCCS and not lint */
43 #include <sys/types.h>
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_iso2022.h"
54 /* ----------------------------------------------------------------------
55 * private stuffs used by templates
61 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
62 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
63 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
64 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
65 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
66 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
67 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
68 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
69 * 94x94 charset (ESC & V ESC $ ( F)
70 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
71 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
72 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
74 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
81 #define CS94MULTI (2U)
82 #define CS96MULTI (3U)
89 static const _ISO2022Charset ascii
= { CS94
, 'B', '\0', '\0' };
90 static const _ISO2022Charset iso88591
= { CS96
, 'A', '\0', '\0' };
94 /* need 3 bits to hold -1, 0, ..., 3 */
99 char ch
[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
102 #define _ISO2022STATE_FLAG_INITIALIZED 1
106 _ISO2022Charset
*recommend
[4];
107 size_t recommendsize
[4];
108 _ISO2022Charset initg
[4];
111 #define F_8BIT 0x0001
112 #define F_NOOLD 0x0002
113 #define F_SI 0x0010 /*0F*/
114 #define F_SO 0x0020 /*0E*/
115 #define F_LS0 0x0010 /*0F*/
116 #define F_LS1 0x0020 /*0E*/
117 #define F_LS2 0x0040 /*ESC n*/
118 #define F_LS3 0x0080 /*ESC o*/
119 #define F_LS1R 0x0100 /*ESC ~*/
120 #define F_LS2R 0x0200 /*ESC }*/
121 #define F_LS3R 0x0400 /*ESC |*/
122 #define F_SS2 0x0800 /*ESC N*/
123 #define F_SS3 0x1000 /*ESC O*/
124 #define F_SS2R 0x2000 /*8E*/
125 #define F_SS3R 0x4000 /*8F*/
126 } _ISO2022EncodingInfo
;
128 _ISO2022EncodingInfo ei
;
130 /* for future multi-locale facility */
131 _ISO2022State s_mblen
;
132 _ISO2022State s_mbrlen
;
133 _ISO2022State s_mbrtowc
;
134 _ISO2022State s_mbtowc
;
135 _ISO2022State s_mbsrtowcs
;
136 _ISO2022State s_mbsnrtowcs
;
137 _ISO2022State s_wcrtomb
;
138 _ISO2022State s_wcsrtombs
;
139 _ISO2022State s_wcsnrtombs
;
140 _ISO2022State s_wctomb
;
144 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
145 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
147 #define _FUNCNAME(m) _citrus_ISO2022_##m
148 #define _ENCODING_INFO _ISO2022EncodingInfo
149 #define _CTYPE_INFO _ISO2022CTypeInfo
150 #define _ENCODING_STATE _ISO2022State
151 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
152 #define _ENCODING_IS_STATE_DEPENDENT 1
153 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
154 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
157 #define _ISO2022INVALID (wchar_t)-1
159 static __inline
int isc0(__uint8_t x
) { return ((x
& 0x1f) == x
); }
160 static __inline
int isc1(__uint8_t x
) { return (0x80 <= x
&& x
<= 0x9f); }
161 static __inline
int iscntl(__uint8_t x
) { return (isc0(x
) || isc1(x
) || x
== 0x7f); }
162 static __inline
int is94(__uint8_t x
) { return (0x21 <= x
&& x
<= 0x7e); }
163 static __inline
int is96(__uint8_t x
) { return (0x20 <= x
&& x
<= 0x7f); }
164 static __inline
int isecma(__uint8_t x
) { return (0x30 <= x
&& x
<= 0x7f); }
165 static __inline
int isinterm(__uint8_t x
) { return (0x20 <= x
&& x
<= 0x2f); }
166 static __inline
int isthree(__uint8_t x
) { return (0x60 <= x
&& x
<= 0x6f); }
169 getcs(const char * __restrict p
, _ISO2022Charset
* __restrict cs
)
172 _DIAGASSERT(p
!= NULL
);
173 _DIAGASSERT(cs
!= NULL
);
175 if (!strncmp(p
, "94$", 3) && p
[3] && !p
[4]) {
176 cs
->final
= (u_char
)(p
[3] & 0xff);
179 cs
->type
= CS94MULTI
;
180 } else if (!strncmp(p
, "96$", 3) && p
[3] && !p
[4]) {
181 cs
->final
= (u_char
)(p
[3] & 0xff);
184 cs
->type
= CS96MULTI
;
185 } else if (!strncmp(p
, "94", 2) && p
[2] && !p
[3]) {
186 cs
->final
= (u_char
)(p
[2] & 0xff);
190 } else if (!strncmp(p
, "96", 2) && p
[2] && !p
[3]) {
191 cs
->final
= (u_char
)(p
[2] & 0xff);
208 get_recommend(_ISO2022EncodingInfo
* __restrict ei
,
209 const char * __restrict token
)
212 _ISO2022Charset cs
, *p
;
214 if (!strchr("0123", token
[0]) || token
[1] != '=')
217 if (getcs(&token
[2], &cs
) == 0)
219 else if (!strcmp(&token
[2], "94")) {
220 cs
.final
= (u_char
)(token
[4]);
224 } else if (!strcmp(&token
[2], "96")) {
225 cs
.final
= (u_char
)(token
[4]);
229 } else if (!strcmp(&token
[2], "94$")) {
230 cs
.final
= (u_char
)(token
[5]);
234 } else if (!strcmp(&token
[2], "96$")) {
235 cs
.final
= (u_char
)(token
[5]);
244 if (!ei
->recommend
[i
]) {
245 ei
->recommend
[i
] = malloc(sizeof(_ISO2022Charset
));
247 p
= realloc(ei
->recommend
[i
],
248 sizeof(_ISO2022Charset
) * (ei
->recommendsize
[i
] + 1));
251 ei
->recommend
[i
] = p
;
253 if (!ei
->recommend
[i
])
255 ei
->recommendsize
[i
]++;
257 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->final
= cs
.final
;
258 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->interm
= cs
.interm
;
259 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->vers
= cs
.vers
;
260 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->type
= cs
.type
;
266 get_initg(_ISO2022EncodingInfo
* __restrict ei
,
267 const char * __restrict token
)
271 if (strncmp("INIT", &token
[0], 4) ||
272 !strchr("0123", token
[4]) ||
276 if (getcs(&token
[6], &cs
) != 0)
279 ei
->initg
[token
[4] - '0'].type
= cs
.type
;
280 ei
->initg
[token
[4] - '0'].final
= cs
.final
;
281 ei
->initg
[token
[4] - '0'].interm
= cs
.interm
;
282 ei
->initg
[token
[4] - '0'].vers
= cs
.vers
;
288 get_max(_ISO2022EncodingInfo
* __restrict ei
,
289 const char * __restrict token
)
291 if (!strcmp(token
, "MAX1")) {
293 } else if (!strcmp(token
, "MAX2")) {
295 } else if (!strcmp(token
, "MAX3")) {
305 get_flags(_ISO2022EncodingInfo
* __restrict ei
,
306 const char * __restrict token
)
315 { "NOOLD", F_NOOLD
},
332 for (i
= 0; tags
[i
].tag
; i
++) {
333 if (!strcmp(token
, tags
[i
].tag
)) {
334 ei
->flags
|= tags
[i
].flag
;
344 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo
* __restrict ei
,
345 const void * __restrict var
, size_t lenvar
)
351 _DIAGASSERT(ei
!= NULL
);
355 * parse VARIABLE section.
361 v
= (const char *) var
;
363 /* initialize structure */
365 for (i
= 0; i
< 4; i
++) {
366 ei
->recommend
[i
] = NULL
;
367 ei
->recommendsize
[i
] = 0;
372 while (*v
== ' ' || *v
== '\t')
377 while (*e
&& *e
!= ' ' && *e
!= '\t')
383 if (len
>=sizeof(buf
))
385 snprintf(buf
, sizeof(buf
), "%.*s", len
, v
);
387 if ((ret
= get_recommend(ei
, buf
)) != _NOTMATCH
)
389 else if ((ret
= get_initg(ei
, buf
)) != _NOTMATCH
)
391 else if ((ret
= get_max(ei
, buf
)) != _NOTMATCH
)
393 else if ((ret
= get_flags(ei
, buf
)) != _NOTMATCH
)
406 free(ei
->recommend
[0]);
407 free(ei
->recommend
[1]);
408 free(ei
->recommend
[2]);
409 free(ei
->recommend
[3]);
416 _citrus_ISO2022_init_state(_ISO2022EncodingInfo
* __restrict ei
,
417 _ISO2022State
* __restrict s
)
421 memset(s
, 0, sizeof(*s
));
423 s
->gr
= (ei
->flags
& F_8BIT
) ? 1 : -1;
425 for (i
= 0; i
< 4; i
++) {
426 if (ei
->initg
[i
].final
) {
427 s
->g
[i
].type
= ei
->initg
[i
].type
;
428 s
->g
[i
].final
= ei
->initg
[i
].final
;
429 s
->g
[i
].interm
= ei
->initg
[i
].interm
;
432 s
->singlegl
= s
->singlegr
= -1;
433 s
->flags
|= _ISO2022STATE_FLAG_INITIALIZED
;
438 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo
* __restrict ei
,
439 void * __restrict pspriv
,
440 const _ISO2022State
* __restrict s
)
442 memcpy(pspriv
, (const void *)s
, sizeof(*s
));
447 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo
* __restrict ei
,
448 _ISO2022State
* __restrict s
,
449 const void * __restrict pspriv
)
451 memcpy((void *)s
, pspriv
, sizeof(*s
));
456 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo
* __restrict ei
,
457 const void * __restrict var
,
461 _DIAGASSERT(ei
!= NULL
);
463 return _citrus_ISO2022_parse_variable(ei
, var
, lenvar
);
468 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo
*ei
)
476 static const struct seqtable
{
485 /* G0 94MULTI special */
486 { CS94MULTI
, -1, 2, -1, -1, 3, { ESC
, '$', OECMA
}, },
487 /* G0 94MULTI special with version identification */
488 { CS94MULTI
, -1, 5, -1, 2, 6, { ESC
, '&', ECMA
, ESC
, '$', OECMA
}, },
490 { CS94
, 1, 2, -1, -1, 3, { ESC
, CS94
, ECMA
, }, },
491 /* G? 94 with 2nd intermediate char */
492 { CS94
, 1, 3, 2, -1, 4, { ESC
, CS94
, INTERM
, ECMA
, }, },
494 { CS96
, 1, 2, -1, -1, 3, { ESC
, CS96
, ECMA
, }, },
495 /* G? 96 with 2nd intermediate char */
496 { CS96
, 1, 3, 2, -1, 4, { ESC
, CS96
, INTERM
, ECMA
, }, },
498 { CS94MULTI
, 2, 3, -1, -1, 4, { ESC
, '$', CS94
, ECMA
, }, },
500 { CS96MULTI
, 2, 3, -1, -1, 4, { ESC
, '$', CS96
, ECMA
, }, },
501 /* G? 94MULTI with version specification */
502 { CS94MULTI
, 5, 6, -1, 2, 7, { ESC
, '&', ECMA
, ESC
, '$', CS94
, ECMA
, }, },
504 { -1, -1, -1, -1, -1, 2, { ESC
, 'n', }, },
505 { -1, -1, -1, -1, -1, 2, { ESC
, 'o', }, },
507 { -1, -1, -1, -1, -1, 2, { ESC
, '~', }, },
508 { -1, -1, -1, -1, -1, 2, { ESC
, /*{*/ '}', }, },
509 { -1, -1, -1, -1, -1, 2, { ESC
, '|', }, },
511 { -1, -1, -1, -1, -1, 2, { ESC
, 'N', }, },
512 { -1, -1, -1, -1, -1, 2, { ESC
, 'O', }, },
518 seqmatch(const char * __restrict s
, size_t n
,
519 const struct seqtable
* __restrict sp
)
523 _DIAGASSERT(s
!= NULL
);
524 _DIAGASSERT(sp
!= NULL
);
527 while (p
- sp
->chars
< n
&& p
- sp
->chars
< sp
->len
) {
534 if (*s
&& strchr("@AB", *s
))
543 if (*s
&& strchr("()*+", *s
))
548 if (*s
&& strchr(",-./", *s
))
563 return p
- sp
->chars
;
567 _ISO2022_sgetwchar(_ISO2022EncodingInfo
* __restrict ei
,
568 const char * __restrict string
, size_t n
,
569 const char ** __restrict result
,
570 _ISO2022State
* __restrict psenc
)
574 const struct seqtable
*sp
;
578 _DIAGASSERT(ei
!= NULL
);
579 _DIAGASSERT(psenc
!= NULL
);
580 _DIAGASSERT(string
!= NULL
);
581 /* result may be NULL */
585 if (1 <= n
&& string
[0] == '\017') {
591 if (1 <= n
&& string
[0] == '\016') {
599 if (1 <= n
&& string
[0] && strchr("\217\216", string
[0])) {
600 psenc
->singlegl
= psenc
->singlegr
=
601 (string
[0] - '\216') + 2;
607 /* eat the letter if this is not ESC */
608 if (1 <= n
&& string
[0] != '\033')
611 /* look for a perfect match from escape sequences */
612 for (sp
= &seqtable
[0]; sp
->len
; sp
++) {
613 nmatch
= seqmatch(string
, n
, sp
);
614 if (sp
->len
== nmatch
&& n
>= sp
->len
)
621 if (sp
->type
!= -1) {
628 i
= string
[sp
->csoff
] - '(';
632 i
= string
[sp
->csoff
] - ',';
635 return (_ISO2022INVALID
);
638 psenc
->g
[i
].type
= sp
->type
;
639 psenc
->g
[i
].final
= '\0';
640 psenc
->g
[i
].interm
= '\0';
641 psenc
->g
[i
].vers
= '\0';
642 /* sp->finaloff must not be -1 */
643 if (sp
->finaloff
!= -1)
644 psenc
->g
[i
].final
= string
[sp
->finaloff
];
645 if (sp
->intermoff
!= -1)
646 psenc
->g
[i
].interm
= string
[sp
->intermoff
];
647 if (sp
->versoff
!= -1)
648 psenc
->g
[i
].vers
= string
[sp
->versoff
];
656 if (2 <= n
&& string
[0] == '\033'
657 && string
[1] && strchr("no", string
[1])) {
658 psenc
->gl
= string
[1] - 'n' + 2;
665 /* XXX: { for vi showmatch */
666 if (2 <= n
&& string
[0] == '\033'
667 && string
[1] && strchr("~}|", string
[1])) {
668 psenc
->gr
= 3 - (string
[1] - '|');
675 if (2 <= n
&& string
[0] == '\033'
676 && string
[1] && strchr("NO", string
[1])) {
677 psenc
->singlegl
= (string
[1] - 'N') + 2;
685 * if we've got an unknown escape sequence, eat the ESC at the
686 * head. otherwise, wait till full escape sequence comes.
688 for (sp
= &seqtable
[0]; sp
->len
; sp
++) {
689 nmatch
= seqmatch(string
, n
, sp
);
694 * if we are in the middle of escape sequence,
695 * we still need to wait for more characters to come
701 return (_ISO2022INVALID
);
704 if (nmatch
== sp
->len
) {
705 /* this case should not happen */
715 /* no letter to eat */
719 return (_ISO2022INVALID
);
722 /* normal chars. always eat C0/C1 as is. */
723 if (iscntl(*string
& 0xff))
725 else if (*string
& 0x80) {
726 cur
= (psenc
->singlegr
== -1)
727 ? psenc
->gr
: psenc
->singlegr
;
729 cur
= (psenc
->singlegl
== -1)
730 ? psenc
->gl
: psenc
->singlegl
;
735 wchar
= *string
++ & 0xff;
738 /* reset single shift state */
739 psenc
->singlegr
= psenc
->singlegl
= -1;
743 /* length error check */
744 switch (psenc
->g
[cur
].type
) {
747 if (!isthree(psenc
->g
[cur
].final
)) {
749 && (string
[0] & 0x80) == (string
[1] & 0x80))
753 && (string
[0] & 0x80) == (string
[1] & 0x80)
754 && (string
[0] & 0x80) == (string
[2] & 0x80))
758 /* we still need to wait for more characters to come */
761 return (_ISO2022INVALID
);
768 /* we still need to wait for more characters to come */
771 return (_ISO2022INVALID
);
775 switch (psenc
->g
[cur
].type
) {
777 if (!(is94(string
[0] & 0x7f)))
780 if (!(is96(string
[0] & 0x7f)))
784 if (!(is94(string
[0] & 0x7f) && is94(string
[1] & 0x7f)))
788 if (!(is96(string
[0] & 0x7f) && is96(string
[1] & 0x7f)))
793 /* extract the character. */
794 switch (psenc
->g
[cur
].type
) {
796 /* special case for ASCII. */
797 if (psenc
->g
[cur
].final
== 'B' && !psenc
->g
[cur
].interm
) {
802 wchar
= psenc
->g
[cur
].final
;
803 wchar
= (wchar
<< 8);
804 wchar
|= (psenc
->g
[cur
].interm
? (0x80 | psenc
->g
[cur
].interm
) : 0);
805 wchar
= (wchar
<< 8);
806 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
809 /* special case for ISO-8859-1. */
810 if (psenc
->g
[cur
].final
== 'A' && !psenc
->g
[cur
].interm
) {
816 wchar
= psenc
->g
[cur
].final
;
817 wchar
= (wchar
<< 8);
818 wchar
|= (psenc
->g
[cur
].interm
? (0x80 | psenc
->g
[cur
].interm
) : 0);
819 wchar
= (wchar
<< 8);
820 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
825 wchar
= psenc
->g
[cur
].final
;
826 wchar
= (wchar
<< 8);
827 if (isthree(psenc
->g
[cur
].final
))
828 wchar
|= (*string
++ & 0x7f);
829 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
830 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
831 if (psenc
->g
[cur
].type
== CS96MULTI
)
838 /* reset single shift state */
839 psenc
->singlegr
= psenc
->singlegl
= -1;
846 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo
* __restrict ei
,
847 wchar_t * __restrict pwc
,
848 const char ** __restrict s
,
849 size_t n
, _ISO2022State
* __restrict psenc
,
850 size_t * __restrict nresult
)
853 const char *s0
, *p
, *result
;
857 _DIAGASSERT(nresult
!= 0);
858 _DIAGASSERT(ei
!= NULL
);
859 _DIAGASSERT(psenc
!= NULL
);
860 _DIAGASSERT(s
!= NULL
);
863 _citrus_ISO2022_init_state(ei
, psenc
);
864 *nresult
= _ENCODING_IS_STATE_DEPENDENT
;
869 chlenbak
= psenc
->chlen
;
872 * if we have something in buffer, use that.
873 * otherwise, skip here
875 if (psenc
->chlen
< 0 || psenc
->chlen
> sizeof(psenc
->ch
)) {
877 _citrus_ISO2022_init_state(ei
, psenc
);
880 if (psenc
->chlen
== 0)
883 /* buffer is not empty */
885 while (psenc
->chlen
< sizeof(psenc
->ch
)) {
887 psenc
->ch
[psenc
->chlen
++] = *s0
++;
891 wchar
= _ISO2022_sgetwchar(ei
, p
, psenc
->chlen
- (p
-psenc
->ch
),
894 if (wchar
!= _ISO2022INVALID
) {
895 if (psenc
->chlen
> c
)
896 memmove(psenc
->ch
, result
, psenc
->chlen
- c
);
897 if (psenc
->chlen
< c
)
905 if ((result
- p
) == psenc
->chlen
)
906 /* complete shift sequence. */
914 /* escape sequence too long? */
918 wchar
= _ISO2022_sgetwchar(ei
, s0
, n
, &result
, psenc
);
919 if (wchar
!= _ISO2022INVALID
) {
931 /* complete shift sequence. */
935 if (n
< sizeof(psenc
->ch
)) {
936 memcpy(psenc
->ch
, s0
- c
, n
);
942 /* escape sequence too long? */
946 *nresult
= (size_t)-1;
957 *nresult
= c
- chlenbak
;
963 *nresult
= (size_t)-2;
969 recommendation(_ISO2022EncodingInfo
* __restrict ei
,
970 _ISO2022Charset
* __restrict cs
)
973 _ISO2022Charset
*recommend
;
975 _DIAGASSERT(ei
!= NULL
);
976 _DIAGASSERT(cs
!= NULL
);
978 /* first, try a exact match. */
979 for (i
= 0; i
< 4; i
++) {
980 recommend
= ei
->recommend
[i
];
981 for (j
= 0; j
< ei
->recommendsize
[i
]; j
++) {
982 if (cs
->type
!= recommend
[j
].type
)
984 if (cs
->final
!= recommend
[j
].final
)
986 if (cs
->interm
!= recommend
[j
].interm
)
993 /* then, try a wildcard match over final char. */
994 for (i
= 0; i
< 4; i
++) {
995 recommend
= ei
->recommend
[i
];
996 for (j
= 0; j
< ei
->recommendsize
[i
]; j
++) {
997 if (cs
->type
!= recommend
[j
].type
)
999 if (cs
->final
&& (cs
->final
!= recommend
[j
].final
))
1001 if (cs
->interm
&& (cs
->interm
!= recommend
[j
].interm
))
1008 /* there's no recommendation. make a guess. */
1009 if (ei
->maxcharset
== 0) {
1025 _ISO2022_sputwchar(_ISO2022EncodingInfo
* __restrict ei
, wchar_t wc
,
1026 char * __restrict string
, size_t n
,
1027 char ** __restrict result
,
1028 _ISO2022State
* __restrict psenc
,
1029 size_t * __restrict nresult
)
1035 char tmp
[MB_LEN_MAX
];
1040 _DIAGASSERT(ei
!= NULL
);
1041 _DIAGASSERT(string
!= NULL
);
1042 /* result may be NULL */
1043 _DIAGASSERT(psenc
!= NULL
);
1044 _DIAGASSERT(nresult
!= NULL
);
1046 if (isc0(wc
& 0xff)) {
1047 /* go back to INIT0 or ASCII on control chars */
1048 cs
= ei
->initg
[0].final
? ei
->initg
[0] : ascii
;
1049 } else if (isc1(wc
& 0xff)) {
1050 /* go back to INIT1 or ISO-8859-1 on control chars */
1051 cs
= ei
->initg
[1].final
? ei
->initg
[1] : iso88591
;
1052 } else if (!(wc
& ~0xff)) {
1054 /* special treatment for ISO-8859-1 */
1057 /* special treatment for ASCII */
1061 cs
.final
= (wc
>> 24) & 0x7f;
1062 if ((wc
>> 16) & 0x80)
1063 cs
.interm
= (wc
>> 16) & 0x7f;
1067 cs
.type
= (wc
& 0x00007f00) ? CS96MULTI
: CS96
;
1069 cs
.type
= (wc
& 0x00007f00) ? CS94MULTI
: CS94
;
1071 target
= recommendation(ei
, &cs
);
1073 bit8
= ei
->flags
& F_8BIT
;
1075 /* designate the charset onto the target plane(G0/1/2/3). */
1076 if (psenc
->g
[target
].type
== cs
.type
1077 && psenc
->g
[target
].final
== cs
.final
1078 && psenc
->g
[target
].interm
== cs
.interm
)
1082 if (cs
.type
== CS94MULTI
|| cs
.type
== CS96MULTI
)
1084 if (target
== 0 && cs
.type
== CS94MULTI
&& strchr("@AB", cs
.final
)
1085 && !cs
.interm
&& !(ei
->flags
& F_NOOLD
))
1087 else if (cs
.type
== CS94
|| cs
.type
== CS94MULTI
)
1088 *p
++ = "()*+"[target
];
1090 *p
++ = ",-./"[target
];
1095 psenc
->g
[target
].type
= cs
.type
;
1096 psenc
->g
[target
].final
= cs
.final
;
1097 psenc
->g
[target
].interm
= cs
.interm
;
1100 /* invoke the plane onto GL or GR. */
1101 if (psenc
->gl
== target
)
1103 if (bit8
&& psenc
->gr
== target
)
1106 if (target
== 0 && (ei
->flags
& F_LS0
)) {
1109 } else if (target
== 1 && (ei
->flags
& F_LS1
)) {
1112 } else if (target
== 2 && (ei
->flags
& F_LS2
)) {
1116 } else if (target
== 3 && (ei
->flags
& F_LS3
)) {
1120 } else if (bit8
&& target
== 1 && (ei
->flags
& F_LS1R
)) {
1124 } else if (bit8
&& target
== 2 && (ei
->flags
& F_LS2R
)) {
1129 } else if (bit8
&& target
== 3 && (ei
->flags
& F_LS3R
)) {
1133 } else if (target
== 2 && (ei
->flags
& F_SS2
)) {
1136 psenc
->singlegl
= 2;
1137 } else if (target
== 3 && (ei
->flags
& F_SS3
)) {
1140 psenc
->singlegl
= 3;
1141 } else if (bit8
&& target
== 2 && (ei
->flags
& F_SS2R
)) {
1144 psenc
->singlegl
= psenc
->singlegr
= 2;
1145 } else if (bit8
&& target
== 3 && (ei
->flags
& F_SS3R
)) {
1148 psenc
->singlegl
= psenc
->singlegr
= 3;
1153 if (psenc
->singlegl
== target
)
1155 else if (psenc
->singlegr
== target
)
1157 else if (psenc
->gl
== target
)
1159 else if ((ei
->flags
& F_8BIT
) && psenc
->gr
== target
)
1171 i
= !iscntl(wc
& 0xff) ?
1172 (isthree(cs
.final
) ? 3 : 2) : 1;
1176 *p
++ = ((wc
>> (i
<< 3)) & 0x7f) | mask
;
1178 /* reset single shift state */
1179 psenc
->singlegl
= psenc
->singlegr
= -1;
1181 len
= (size_t)(p
- tmp
);
1184 *result
= (char *)0;
1185 *nresult
= (size_t)-1;
1189 *result
= string
+ len
;
1190 memcpy(string
, tmp
, len
);
1196 *nresult
= (size_t)-1;
1201 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo
* __restrict ei
,
1202 char * __restrict s
, size_t n
,
1203 _ISO2022State
* __restrict psenc
,
1204 size_t * __restrict nresult
)
1206 char buf
[MB_LEN_MAX
];
1211 _DIAGASSERT(ei
!= NULL
);
1212 _DIAGASSERT(nresult
!= 0);
1213 _DIAGASSERT(s
!= NULL
);
1215 /* XXX state will be modified after this operation... */
1216 ret
= _ISO2022_sputwchar(ei
, L
'\0', buf
, sizeof(buf
), &result
, psenc
,
1223 if (sizeof(buf
) < len
|| n
< len
-1) {
1224 /* XXX should recover state? */
1225 *nresult
= (size_t)-1;
1229 memcpy(s
, buf
, len
-1);
1235 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo
* __restrict ei
,
1236 char * __restrict s
, size_t n
, wchar_t wc
,
1237 _ISO2022State
* __restrict psenc
,
1238 size_t * __restrict nresult
)
1240 char buf
[MB_LEN_MAX
];
1245 _DIAGASSERT(ei
!= NULL
);
1246 _DIAGASSERT(s
!= NULL
);
1247 _DIAGASSERT(psenc
!= NULL
);
1248 _DIAGASSERT(nresult
!= 0);
1250 /* XXX state will be modified after this operation... */
1251 ret
= _ISO2022_sputwchar(ei
, wc
, buf
, sizeof(buf
), &result
, psenc
,
1258 if (sizeof(buf
) < len
|| n
< len
) {
1259 /* XXX should recover state? */
1260 *nresult
= (size_t)-1;
1264 memcpy(s
, buf
, len
);
1271 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo
* __restrict ei
,
1272 _csid_t
* __restrict csid
,
1273 _index_t
* __restrict idx
, wchar_t wc
)
1277 _DIAGASSERT(csid
!= NULL
&& idx
!= NULL
);
1279 m
= wc
& 0x7FFF8080;
1280 nm
= wc
& 0x007F7F7F;
1281 if (m
& 0x00800000) {
1286 if (nm
& 0x007F0000) {
1289 } else if (nm
& 0x00007F00) {
1294 *idx
= (_index_t
)nm
;
1301 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo
* __restrict ei
,
1302 wchar_t * __restrict wc
,
1303 _csid_t csid
, _index_t idx
)
1306 _DIAGASSERT(ei
!= NULL
&& wc
!= NULL
);
1308 *wc
= (wchar_t)(csid
& 0x7F808080) | (wchar_t)idx
;
1315 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo
* __restrict ei
,
1316 _ISO2022State
* __restrict psenc
,
1317 int * __restrict rstate
)
1320 if (psenc
->chlen
== 0) {
1321 /* XXX: it should distinguish initial and stable. */
1322 *rstate
= _STDENC_SDGEN_STABLE
;
1324 if (psenc
->ch
[0] == '\033')
1325 *rstate
= _STDENC_SDGEN_INCOMPLETE_SHIFT
;
1327 *rstate
= _STDENC_SDGEN_INCOMPLETE_CHAR
;
1333 /* ----------------------------------------------------------------------
1334 * public interface for ctype
1337 _CITRUS_CTYPE_DECLS(ISO2022
);
1338 _CITRUS_CTYPE_DEF_OPS(ISO2022
);
1340 #include "citrus_ctype_template.h"
1342 /* ----------------------------------------------------------------------
1343 * public interface for stdenc
1346 _CITRUS_STDENC_DECLS(ISO2022
);
1347 _CITRUS_STDENC_DEF_OPS(ISO2022
);
1349 #include "citrus_stdenc_template.h"