1 /* $NetBSD: citrus_iso2022.c,v 1.22 2011/10/10 22:45:45 tnozaki Exp $ */
4 * Copyright (c)1999, 2002 Citrus Project,
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.22 2011/10/10 22:45:45 tnozaki Exp $");
34 #endif /* LIBC_SCCS and not lint */
43 #include <sys/types.h>
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_ctype.h"
50 #include "citrus_stdenc.h"
51 #include "citrus_iso2022.h"
54 /* ----------------------------------------------------------------------
55 * private stuffs used by templates
61 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx
62 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx
63 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx
64 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx
65 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx
66 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx
67 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx
68 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx
69 * 94x94 charset (ESC & V ESC $ ( F)
70 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
71 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
72 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
73 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
74 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
81 #define CS94MULTI (2U)
82 #define CS96MULTI (3U)
89 static const _ISO2022Charset ascii
= { CS94
, 'B', '\0', '\0' };
90 static const _ISO2022Charset iso88591
= { CS96
, 'A', '\0', '\0' };
94 /* need 3 bits to hold -1, 0, ..., 3 */
99 char ch
[7]; /* longest escape sequence (ESC & V ESC $ ( F) */
102 #define _ISO2022STATE_FLAG_INITIALIZED 1
106 _ISO2022Charset
*recommend
[4];
107 size_t recommendsize
[4];
108 _ISO2022Charset initg
[4];
111 #define F_8BIT 0x0001
112 #define F_NOOLD 0x0002
113 #define F_SI 0x0010 /*0F*/
114 #define F_SO 0x0020 /*0E*/
115 #define F_LS0 0x0010 /*0F*/
116 #define F_LS1 0x0020 /*0E*/
117 #define F_LS2 0x0040 /*ESC n*/
118 #define F_LS3 0x0080 /*ESC o*/
119 #define F_LS1R 0x0100 /*ESC ~*/
120 #define F_LS2R 0x0200 /*ESC }*/
121 #define F_LS3R 0x0400 /*ESC |*/
122 #define F_SS2 0x0800 /*ESC N*/
123 #define F_SS3 0x1000 /*ESC O*/
124 #define F_SS2R 0x2000 /*8E*/
125 #define F_SS3R 0x4000 /*8F*/
126 } _ISO2022EncodingInfo
;
128 _ISO2022EncodingInfo ei
;
130 /* for future multi-locale facility */
131 _ISO2022State s_mblen
;
132 _ISO2022State s_mbrlen
;
133 _ISO2022State s_mbrtowc
;
134 _ISO2022State s_mbtowc
;
135 _ISO2022State s_mbsrtowcs
;
136 _ISO2022State s_wcrtomb
;
137 _ISO2022State s_wcsrtombs
;
138 _ISO2022State s_wctomb
;
142 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
143 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
145 #define _FUNCNAME(m) _citrus_ISO2022_##m
146 #define _ENCODING_INFO _ISO2022EncodingInfo
147 #define _CTYPE_INFO _ISO2022CTypeInfo
148 #define _ENCODING_STATE _ISO2022State
149 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
150 #define _ENCODING_IS_STATE_DEPENDENT 1
151 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \
152 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
155 #define _ISO2022INVALID (wchar_t)-1
157 static __inline
int isc0(__uint8_t x
) { return ((x
& 0x1f) == x
); }
158 static __inline
int isc1(__uint8_t x
) { return (0x80 <= x
&& x
<= 0x9f); }
159 static __inline
int iscntl(__uint8_t x
) { return (isc0(x
) || isc1(x
) || x
== 0x7f); }
160 static __inline
int is94(__uint8_t x
) { return (0x21 <= x
&& x
<= 0x7e); }
161 static __inline
int is96(__uint8_t x
) { return (0x20 <= x
&& x
<= 0x7f); }
162 static __inline
int isecma(__uint8_t x
) { return (0x30 <= x
&& x
<= 0x7f); }
163 static __inline
int isinterm(__uint8_t x
) { return (0x20 <= x
&& x
<= 0x2f); }
164 static __inline
int isthree(__uint8_t x
) { return (0x60 <= x
&& x
<= 0x6f); }
167 getcs(const char * __restrict p
, _ISO2022Charset
* __restrict cs
)
170 _DIAGASSERT(p
!= NULL
);
171 _DIAGASSERT(cs
!= NULL
);
173 if (!strncmp(p
, "94$", 3) && p
[3] && !p
[4]) {
174 cs
->final
= (u_char
)(p
[3] & 0xff);
177 cs
->type
= CS94MULTI
;
178 } else if (!strncmp(p
, "96$", 3) && p
[3] && !p
[4]) {
179 cs
->final
= (u_char
)(p
[3] & 0xff);
182 cs
->type
= CS96MULTI
;
183 } else if (!strncmp(p
, "94", 2) && p
[2] && !p
[3]) {
184 cs
->final
= (u_char
)(p
[2] & 0xff);
188 } else if (!strncmp(p
, "96", 2) && p
[2] && !p
[3]) {
189 cs
->final
= (u_char
)(p
[2] & 0xff);
206 get_recommend(_ISO2022EncodingInfo
* __restrict ei
,
207 const char * __restrict token
)
210 _ISO2022Charset cs
, *p
;
212 if (!strchr("0123", token
[0]) || token
[1] != '=')
215 if (getcs(&token
[2], &cs
) == 0)
217 else if (!strcmp(&token
[2], "94")) {
218 cs
.final
= (u_char
)(token
[4]);
222 } else if (!strcmp(&token
[2], "96")) {
223 cs
.final
= (u_char
)(token
[4]);
227 } else if (!strcmp(&token
[2], "94$")) {
228 cs
.final
= (u_char
)(token
[5]);
232 } else if (!strcmp(&token
[2], "96$")) {
233 cs
.final
= (u_char
)(token
[5]);
242 if (!ei
->recommend
[i
]) {
243 ei
->recommend
[i
] = malloc(sizeof(_ISO2022Charset
));
245 p
= realloc(ei
->recommend
[i
],
246 sizeof(_ISO2022Charset
) * (ei
->recommendsize
[i
] + 1));
249 ei
->recommend
[i
] = p
;
251 if (!ei
->recommend
[i
])
253 ei
->recommendsize
[i
]++;
255 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->final
= cs
.final
;
256 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->interm
= cs
.interm
;
257 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->vers
= cs
.vers
;
258 (ei
->recommend
[i
] + (ei
->recommendsize
[i
] - 1))->type
= cs
.type
;
264 get_initg(_ISO2022EncodingInfo
* __restrict ei
,
265 const char * __restrict token
)
269 if (strncmp("INIT", &token
[0], 4) ||
270 !strchr("0123", token
[4]) ||
274 if (getcs(&token
[6], &cs
) != 0)
277 ei
->initg
[token
[4] - '0'].type
= cs
.type
;
278 ei
->initg
[token
[4] - '0'].final
= cs
.final
;
279 ei
->initg
[token
[4] - '0'].interm
= cs
.interm
;
280 ei
->initg
[token
[4] - '0'].vers
= cs
.vers
;
286 get_max(_ISO2022EncodingInfo
* __restrict ei
,
287 const char * __restrict token
)
289 if (!strcmp(token
, "MAX1")) {
291 } else if (!strcmp(token
, "MAX2")) {
293 } else if (!strcmp(token
, "MAX3")) {
303 get_flags(_ISO2022EncodingInfo
* __restrict ei
,
304 const char * __restrict token
)
313 { "NOOLD", F_NOOLD
},
330 for (i
= 0; tags
[i
].tag
; i
++) {
331 if (!strcmp(token
, tags
[i
].tag
)) {
332 ei
->flags
|= tags
[i
].flag
;
342 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo
* __restrict ei
,
343 const void * __restrict var
, size_t lenvar
)
349 _DIAGASSERT(ei
!= NULL
);
353 * parse VARIABLE section.
359 v
= (const char *) var
;
361 /* initialize structure */
363 for (i
= 0; i
< 4; i
++) {
364 ei
->recommend
[i
] = NULL
;
365 ei
->recommendsize
[i
] = 0;
370 while (*v
== ' ' || *v
== '\t')
375 while (*e
&& *e
!= ' ' && *e
!= '\t')
381 if (len
>=sizeof(buf
))
383 snprintf(buf
, sizeof(buf
), "%.*s", len
, v
);
385 if ((ret
= get_recommend(ei
, buf
)) != _NOTMATCH
)
387 else if ((ret
= get_initg(ei
, buf
)) != _NOTMATCH
)
389 else if ((ret
= get_max(ei
, buf
)) != _NOTMATCH
)
391 else if ((ret
= get_flags(ei
, buf
)) != _NOTMATCH
)
404 free(ei
->recommend
[0]);
405 free(ei
->recommend
[1]);
406 free(ei
->recommend
[2]);
407 free(ei
->recommend
[3]);
414 _citrus_ISO2022_init_state(_ISO2022EncodingInfo
* __restrict ei
,
415 _ISO2022State
* __restrict s
)
419 memset(s
, 0, sizeof(*s
));
421 s
->gr
= (ei
->flags
& F_8BIT
) ? 1 : -1;
423 for (i
= 0; i
< 4; i
++) {
424 if (ei
->initg
[i
].final
) {
425 s
->g
[i
].type
= ei
->initg
[i
].type
;
426 s
->g
[i
].final
= ei
->initg
[i
].final
;
427 s
->g
[i
].interm
= ei
->initg
[i
].interm
;
430 s
->singlegl
= s
->singlegr
= -1;
431 s
->flags
|= _ISO2022STATE_FLAG_INITIALIZED
;
436 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo
* __restrict ei
,
437 void * __restrict pspriv
,
438 const _ISO2022State
* __restrict s
)
440 memcpy(pspriv
, (const void *)s
, sizeof(*s
));
445 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo
* __restrict ei
,
446 _ISO2022State
* __restrict s
,
447 const void * __restrict pspriv
)
449 memcpy((void *)s
, pspriv
, sizeof(*s
));
454 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo
* __restrict ei
,
455 const void * __restrict var
,
459 _DIAGASSERT(ei
!= NULL
);
461 return _citrus_ISO2022_parse_variable(ei
, var
, lenvar
);
466 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo
*ei
)
474 static const struct seqtable
{
483 /* G0 94MULTI special */
484 { CS94MULTI
, -1, 2, -1, -1, 3, { ESC
, '$', OECMA
}, },
485 /* G0 94MULTI special with version identification */
486 { CS94MULTI
, -1, 5, -1, 2, 6, { ESC
, '&', ECMA
, ESC
, '$', OECMA
}, },
488 { CS94
, 1, 2, -1, -1, 3, { ESC
, CS94
, ECMA
, }, },
489 /* G? 94 with 2nd intermediate char */
490 { CS94
, 1, 3, 2, -1, 4, { ESC
, CS94
, INTERM
, ECMA
, }, },
492 { CS96
, 1, 2, -1, -1, 3, { ESC
, CS96
, ECMA
, }, },
493 /* G? 96 with 2nd intermediate char */
494 { CS96
, 1, 3, 2, -1, 4, { ESC
, CS96
, INTERM
, ECMA
, }, },
496 { CS94MULTI
, 2, 3, -1, -1, 4, { ESC
, '$', CS94
, ECMA
, }, },
498 { CS96MULTI
, 2, 3, -1, -1, 4, { ESC
, '$', CS96
, ECMA
, }, },
499 /* G? 94MULTI with version specification */
500 { CS94MULTI
, 5, 6, -1, 2, 7, { ESC
, '&', ECMA
, ESC
, '$', CS94
, ECMA
, }, },
502 { -1, -1, -1, -1, -1, 2, { ESC
, 'n', }, },
503 { -1, -1, -1, -1, -1, 2, { ESC
, 'o', }, },
505 { -1, -1, -1, -1, -1, 2, { ESC
, '~', }, },
506 { -1, -1, -1, -1, -1, 2, { ESC
, /*{*/ '}', }, },
507 { -1, -1, -1, -1, -1, 2, { ESC
, '|', }, },
509 { -1, -1, -1, -1, -1, 2, { ESC
, 'N', }, },
510 { -1, -1, -1, -1, -1, 2, { ESC
, 'O', }, },
516 seqmatch(const char * __restrict s
, size_t n
,
517 const struct seqtable
* __restrict sp
)
521 _DIAGASSERT(s
!= NULL
);
522 _DIAGASSERT(sp
!= NULL
);
525 while (p
- sp
->chars
< n
&& p
- sp
->chars
< sp
->len
) {
532 if (*s
&& strchr("@AB", *s
))
541 if (*s
&& strchr("()*+", *s
))
546 if (*s
&& strchr(",-./", *s
))
561 return p
- sp
->chars
;
565 _ISO2022_sgetwchar(_ISO2022EncodingInfo
* __restrict ei
,
566 const char * __restrict string
, size_t n
,
567 const char ** __restrict result
,
568 _ISO2022State
* __restrict psenc
)
572 const struct seqtable
*sp
;
576 _DIAGASSERT(ei
!= NULL
);
577 _DIAGASSERT(psenc
!= NULL
);
578 _DIAGASSERT(string
!= NULL
);
579 /* result may be NULL */
583 if (1 <= n
&& string
[0] == '\017') {
589 if (1 <= n
&& string
[0] == '\016') {
597 if (1 <= n
&& string
[0] && strchr("\217\216", string
[0])) {
598 psenc
->singlegl
= psenc
->singlegr
=
599 (string
[0] - '\216') + 2;
605 /* eat the letter if this is not ESC */
606 if (1 <= n
&& string
[0] != '\033')
609 /* look for a perfect match from escape sequences */
610 for (sp
= &seqtable
[0]; sp
->len
; sp
++) {
611 nmatch
= seqmatch(string
, n
, sp
);
612 if (sp
->len
== nmatch
&& n
>= sp
->len
)
619 if (sp
->type
!= -1) {
626 i
= string
[sp
->csoff
] - '(';
630 i
= string
[sp
->csoff
] - ',';
633 return (_ISO2022INVALID
);
636 psenc
->g
[i
].type
= sp
->type
;
637 psenc
->g
[i
].final
= '\0';
638 psenc
->g
[i
].interm
= '\0';
639 psenc
->g
[i
].vers
= '\0';
640 /* sp->finaloff must not be -1 */
641 if (sp
->finaloff
!= -1)
642 psenc
->g
[i
].final
= string
[sp
->finaloff
];
643 if (sp
->intermoff
!= -1)
644 psenc
->g
[i
].interm
= string
[sp
->intermoff
];
645 if (sp
->versoff
!= -1)
646 psenc
->g
[i
].vers
= string
[sp
->versoff
];
654 if (2 <= n
&& string
[0] == '\033'
655 && string
[1] && strchr("no", string
[1])) {
656 psenc
->gl
= string
[1] - 'n' + 2;
663 /* XXX: { for vi showmatch */
664 if (2 <= n
&& string
[0] == '\033'
665 && string
[1] && strchr("~}|", string
[1])) {
666 psenc
->gr
= 3 - (string
[1] - '|');
673 if (2 <= n
&& string
[0] == '\033'
674 && string
[1] && strchr("NO", string
[1])) {
675 psenc
->singlegl
= (string
[1] - 'N') + 2;
683 * if we've got an unknown escape sequence, eat the ESC at the
684 * head. otherwise, wait till full escape sequence comes.
686 for (sp
= &seqtable
[0]; sp
->len
; sp
++) {
687 nmatch
= seqmatch(string
, n
, sp
);
692 * if we are in the middle of escape sequence,
693 * we still need to wait for more characters to come
699 return (_ISO2022INVALID
);
702 if (nmatch
== sp
->len
) {
703 /* this case should not happen */
713 /* no letter to eat */
717 return (_ISO2022INVALID
);
720 /* normal chars. always eat C0/C1 as is. */
721 if (iscntl(*string
& 0xff))
723 else if (*string
& 0x80) {
724 cur
= (psenc
->singlegr
== -1)
725 ? psenc
->gr
: psenc
->singlegr
;
727 cur
= (psenc
->singlegl
== -1)
728 ? psenc
->gl
: psenc
->singlegl
;
733 wchar
= *string
++ & 0xff;
736 /* reset single shift state */
737 psenc
->singlegr
= psenc
->singlegl
= -1;
741 /* length error check */
742 switch (psenc
->g
[cur
].type
) {
745 if (!isthree(psenc
->g
[cur
].final
)) {
747 && (string
[0] & 0x80) == (string
[1] & 0x80))
751 && (string
[0] & 0x80) == (string
[1] & 0x80)
752 && (string
[0] & 0x80) == (string
[2] & 0x80))
756 /* we still need to wait for more characters to come */
759 return (_ISO2022INVALID
);
766 /* we still need to wait for more characters to come */
769 return (_ISO2022INVALID
);
773 switch (psenc
->g
[cur
].type
) {
775 if (!(is94(string
[0] & 0x7f)))
778 if (!(is96(string
[0] & 0x7f)))
782 if (!(is94(string
[0] & 0x7f) && is94(string
[1] & 0x7f)))
786 if (!(is96(string
[0] & 0x7f) && is96(string
[1] & 0x7f)))
791 /* extract the character. */
792 switch (psenc
->g
[cur
].type
) {
794 /* special case for ASCII. */
795 if (psenc
->g
[cur
].final
== 'B' && !psenc
->g
[cur
].interm
) {
800 wchar
= psenc
->g
[cur
].final
;
801 wchar
= (wchar
<< 8);
802 wchar
|= (psenc
->g
[cur
].interm
? (0x80 | psenc
->g
[cur
].interm
) : 0);
803 wchar
= (wchar
<< 8);
804 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
807 /* special case for ISO-8859-1. */
808 if (psenc
->g
[cur
].final
== 'A' && !psenc
->g
[cur
].interm
) {
814 wchar
= psenc
->g
[cur
].final
;
815 wchar
= (wchar
<< 8);
816 wchar
|= (psenc
->g
[cur
].interm
? (0x80 | psenc
->g
[cur
].interm
) : 0);
817 wchar
= (wchar
<< 8);
818 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
823 wchar
= psenc
->g
[cur
].final
;
824 wchar
= (wchar
<< 8);
825 if (isthree(psenc
->g
[cur
].final
))
826 wchar
|= (*string
++ & 0x7f);
827 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
828 wchar
= (wchar
<< 8) | (*string
++ & 0x7f);
829 if (psenc
->g
[cur
].type
== CS96MULTI
)
836 /* reset single shift state */
837 psenc
->singlegr
= psenc
->singlegl
= -1;
844 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo
* __restrict ei
,
845 wchar_t * __restrict pwc
,
846 const char ** __restrict s
,
847 size_t n
, _ISO2022State
* __restrict psenc
,
848 size_t * __restrict nresult
)
851 const char *s0
, *p
, *result
;
855 _DIAGASSERT(nresult
!= 0);
856 _DIAGASSERT(ei
!= NULL
);
857 _DIAGASSERT(psenc
!= NULL
);
858 _DIAGASSERT(s
!= NULL
);
861 _citrus_ISO2022_init_state(ei
, psenc
);
862 *nresult
= _ENCODING_IS_STATE_DEPENDENT
;
867 chlenbak
= psenc
->chlen
;
870 * if we have something in buffer, use that.
871 * otherwise, skip here
873 if (psenc
->chlen
< 0 || psenc
->chlen
> sizeof(psenc
->ch
)) {
875 _citrus_ISO2022_init_state(ei
, psenc
);
878 if (psenc
->chlen
== 0)
881 /* buffer is not empty */
883 while (psenc
->chlen
< sizeof(psenc
->ch
)) {
885 psenc
->ch
[psenc
->chlen
++] = *s0
++;
889 wchar
= _ISO2022_sgetwchar(ei
, p
, psenc
->chlen
- (p
-psenc
->ch
),
892 if (wchar
!= _ISO2022INVALID
) {
893 if (psenc
->chlen
> c
)
894 memmove(psenc
->ch
, result
, psenc
->chlen
- c
);
895 if (psenc
->chlen
< c
)
903 if ((result
- p
) == psenc
->chlen
)
904 /* complete shift sequence. */
912 /* escape sequence too long? */
916 wchar
= _ISO2022_sgetwchar(ei
, s0
, n
, &result
, psenc
);
917 if (wchar
!= _ISO2022INVALID
) {
929 /* complete shift sequence. */
933 if (n
< sizeof(psenc
->ch
)) {
934 memcpy(psenc
->ch
, s0
- c
, n
);
940 /* escape sequence too long? */
944 *nresult
= (size_t)-1;
955 *nresult
= c
- chlenbak
;
961 *nresult
= (size_t)-2;
967 recommendation(_ISO2022EncodingInfo
* __restrict ei
,
968 _ISO2022Charset
* __restrict cs
)
971 _ISO2022Charset
*recommend
;
973 _DIAGASSERT(ei
!= NULL
);
974 _DIAGASSERT(cs
!= NULL
);
976 /* first, try a exact match. */
977 for (i
= 0; i
< 4; i
++) {
978 recommend
= ei
->recommend
[i
];
979 for (j
= 0; j
< ei
->recommendsize
[i
]; j
++) {
980 if (cs
->type
!= recommend
[j
].type
)
982 if (cs
->final
!= recommend
[j
].final
)
984 if (cs
->interm
!= recommend
[j
].interm
)
991 /* then, try a wildcard match over final char. */
992 for (i
= 0; i
< 4; i
++) {
993 recommend
= ei
->recommend
[i
];
994 for (j
= 0; j
< ei
->recommendsize
[i
]; j
++) {
995 if (cs
->type
!= recommend
[j
].type
)
997 if (cs
->final
&& (cs
->final
!= recommend
[j
].final
))
999 if (cs
->interm
&& (cs
->interm
!= recommend
[j
].interm
))
1006 /* there's no recommendation. make a guess. */
1007 if (ei
->maxcharset
== 0) {
1023 _ISO2022_sputwchar(_ISO2022EncodingInfo
* __restrict ei
, wchar_t wc
,
1024 char * __restrict string
, size_t n
,
1025 char ** __restrict result
,
1026 _ISO2022State
* __restrict psenc
,
1027 size_t * __restrict nresult
)
1033 char tmp
[MB_LEN_MAX
];
1038 _DIAGASSERT(ei
!= NULL
);
1039 _DIAGASSERT(string
!= NULL
);
1040 /* result may be NULL */
1041 _DIAGASSERT(psenc
!= NULL
);
1042 _DIAGASSERT(nresult
!= NULL
);
1044 if (isc0(wc
& 0xff)) {
1045 /* go back to INIT0 or ASCII on control chars */
1046 cs
= ei
->initg
[0].final
? ei
->initg
[0] : ascii
;
1047 } else if (isc1(wc
& 0xff)) {
1048 /* go back to INIT1 or ISO-8859-1 on control chars */
1049 cs
= ei
->initg
[1].final
? ei
->initg
[1] : iso88591
;
1050 } else if (!(wc
& ~0xff)) {
1052 /* special treatment for ISO-8859-1 */
1055 /* special treatment for ASCII */
1059 cs
.final
= (wc
>> 24) & 0x7f;
1060 if ((wc
>> 16) & 0x80)
1061 cs
.interm
= (wc
>> 16) & 0x7f;
1065 cs
.type
= (wc
& 0x00007f00) ? CS96MULTI
: CS96
;
1067 cs
.type
= (wc
& 0x00007f00) ? CS94MULTI
: CS94
;
1069 target
= recommendation(ei
, &cs
);
1071 bit8
= ei
->flags
& F_8BIT
;
1073 /* designate the charset onto the target plane(G0/1/2/3). */
1074 if (psenc
->g
[target
].type
== cs
.type
1075 && psenc
->g
[target
].final
== cs
.final
1076 && psenc
->g
[target
].interm
== cs
.interm
)
1080 if (cs
.type
== CS94MULTI
|| cs
.type
== CS96MULTI
)
1082 if (target
== 0 && cs
.type
== CS94MULTI
&& strchr("@AB", cs
.final
)
1083 && !cs
.interm
&& !(ei
->flags
& F_NOOLD
))
1085 else if (cs
.type
== CS94
|| cs
.type
== CS94MULTI
)
1086 *p
++ = "()*+"[target
];
1088 *p
++ = ",-./"[target
];
1093 psenc
->g
[target
].type
= cs
.type
;
1094 psenc
->g
[target
].final
= cs
.final
;
1095 psenc
->g
[target
].interm
= cs
.interm
;
1098 /* invoke the plane onto GL or GR. */
1099 if (psenc
->gl
== target
)
1101 if (bit8
&& psenc
->gr
== target
)
1104 if (target
== 0 && (ei
->flags
& F_LS0
)) {
1107 } else if (target
== 1 && (ei
->flags
& F_LS1
)) {
1110 } else if (target
== 2 && (ei
->flags
& F_LS2
)) {
1114 } else if (target
== 3 && (ei
->flags
& F_LS3
)) {
1118 } else if (bit8
&& target
== 1 && (ei
->flags
& F_LS1R
)) {
1122 } else if (bit8
&& target
== 2 && (ei
->flags
& F_LS2R
)) {
1127 } else if (bit8
&& target
== 3 && (ei
->flags
& F_LS3R
)) {
1131 } else if (target
== 2 && (ei
->flags
& F_SS2
)) {
1134 psenc
->singlegl
= 2;
1135 } else if (target
== 3 && (ei
->flags
& F_SS3
)) {
1138 psenc
->singlegl
= 3;
1139 } else if (bit8
&& target
== 2 && (ei
->flags
& F_SS2R
)) {
1142 psenc
->singlegl
= psenc
->singlegr
= 2;
1143 } else if (bit8
&& target
== 3 && (ei
->flags
& F_SS3R
)) {
1146 psenc
->singlegl
= psenc
->singlegr
= 3;
1151 if (psenc
->singlegl
== target
)
1153 else if (psenc
->singlegr
== target
)
1155 else if (psenc
->gl
== target
)
1157 else if ((ei
->flags
& F_8BIT
) && psenc
->gr
== target
)
1169 i
= !iscntl(wc
& 0xff) ?
1170 (isthree(cs
.final
) ? 3 : 2) : 1;
1174 *p
++ = ((wc
>> (i
<< 3)) & 0x7f) | mask
;
1176 /* reset single shift state */
1177 psenc
->singlegl
= psenc
->singlegr
= -1;
1179 len
= (size_t)(p
- tmp
);
1182 *result
= (char *)0;
1183 *nresult
= (size_t)-1;
1187 *result
= string
+ len
;
1188 memcpy(string
, tmp
, len
);
1194 *nresult
= (size_t)-1;
1199 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo
* __restrict ei
,
1200 char * __restrict s
, size_t n
,
1201 _ISO2022State
* __restrict psenc
,
1202 size_t * __restrict nresult
)
1204 char buf
[MB_LEN_MAX
];
1209 _DIAGASSERT(ei
!= NULL
);
1210 _DIAGASSERT(nresult
!= 0);
1211 _DIAGASSERT(s
!= NULL
);
1213 /* XXX state will be modified after this operation... */
1214 ret
= _ISO2022_sputwchar(ei
, L
'\0', buf
, sizeof(buf
), &result
, psenc
,
1221 if (sizeof(buf
) < len
|| n
< len
-1) {
1222 /* XXX should recover state? */
1223 *nresult
= (size_t)-1;
1227 memcpy(s
, buf
, len
-1);
1233 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo
* __restrict ei
,
1234 char * __restrict s
, size_t n
, wchar_t wc
,
1235 _ISO2022State
* __restrict psenc
,
1236 size_t * __restrict nresult
)
1238 char buf
[MB_LEN_MAX
];
1243 _DIAGASSERT(ei
!= NULL
);
1244 _DIAGASSERT(s
!= NULL
);
1245 _DIAGASSERT(psenc
!= NULL
);
1246 _DIAGASSERT(nresult
!= 0);
1248 /* XXX state will be modified after this operation... */
1249 ret
= _ISO2022_sputwchar(ei
, wc
, buf
, sizeof(buf
), &result
, psenc
,
1256 if (sizeof(buf
) < len
|| n
< len
) {
1257 /* XXX should recover state? */
1258 *nresult
= (size_t)-1;
1262 memcpy(s
, buf
, len
);
1269 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo
* __restrict ei
,
1270 _csid_t
* __restrict csid
,
1271 _index_t
* __restrict idx
, wchar_t wc
)
1275 _DIAGASSERT(csid
!= NULL
&& idx
!= NULL
);
1277 m
= wc
& 0x7FFF8080;
1278 nm
= wc
& 0x007F7F7F;
1279 if (m
& 0x00800000) {
1284 if (nm
& 0x007F0000) {
1287 } else if (nm
& 0x00007F00) {
1292 *idx
= (_index_t
)nm
;
1299 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo
* __restrict ei
,
1300 wchar_t * __restrict wc
,
1301 _csid_t csid
, _index_t idx
)
1304 _DIAGASSERT(ei
!= NULL
&& wc
!= NULL
);
1306 *wc
= (wchar_t)(csid
& 0x7F808080) | (wchar_t)idx
;
1313 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo
* __restrict ei
,
1314 _ISO2022State
* __restrict psenc
,
1315 int * __restrict rstate
)
1318 if (psenc
->chlen
== 0) {
1319 /* XXX: it should distinguish initial and stable. */
1320 *rstate
= _STDENC_SDGEN_STABLE
;
1322 if (psenc
->ch
[0] == '\033')
1323 *rstate
= _STDENC_SDGEN_INCOMPLETE_SHIFT
;
1325 *rstate
= _STDENC_SDGEN_INCOMPLETE_CHAR
;
1331 /* ----------------------------------------------------------------------
1332 * public interface for ctype
1335 _CITRUS_CTYPE_DECLS(ISO2022
);
1336 _CITRUS_CTYPE_DEF_OPS(ISO2022
);
1338 #include "citrus_ctype_template.h"
1340 /* ----------------------------------------------------------------------
1341 * public interface for stdenc
1344 _CITRUS_STDENC_DECLS(ISO2022
);
1345 _CITRUS_STDENC_DEF_OPS(ISO2022
);
1347 #include "citrus_stdenc_template.h"