1 /* $NetBSD: citrus_hz.c,v 1.4 2014/06/24 22:24:18 spz Exp $ */
4 * Copyright (c)2004, 2006 Citrus Project,
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 #if defined(LIBC_SCCS) && !defined(lint)
32 __RCSID("$NetBSD: citrus_hz.c,v 1.4 2014/06/24 22:24:18 spz Exp $");
33 #endif /* LIBC_SCCS and not lint */
35 #include <sys/queue.h>
36 #include <sys/types.h>
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_bcs.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
53 #include "citrus_hz.h"
54 #include "citrus_prop.h"
59 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx
60 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx
61 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx
64 #define ESCAPE_CHAR '~'
67 CTRL
= 0, ASCII
= 1, GB2312
= 2, CS94
= 3, CS96
= 4
71 int start
, end
, width
;
74 static const range_t ranges
[] = {
75 #define RANGE(start, end) { start, end, (end - start) + 1 }
76 /* CTRL */ RANGE(0x00, 0x1F),
77 /* ASCII */ RANGE(0x20, 0x7F),
78 /* GB2312 */ RANGE(0x21, 0x7E),
79 /* CS94 */ RANGE(0x21, 0x7E),
80 /* CS96 */ RANGE(0x20, 0x7F),
84 typedef struct escape_t escape_t
;
92 typedef TAILQ_HEAD(escape_list
, escape_t
) escape_list
;
94 TAILQ_ENTRY(escape_t
) entry
;
96 graphic_t
*left
, *right
;
100 #define GL(escape) ((escape)->left)
101 #define GR(escape) ((escape)->right)
102 #define SET(escape) ((escape)->set)
103 #define ESC(escape) ((escape)->ch)
104 #define INIT(escape) (TAILQ_FIRST(SET(escape)))
106 static __inline escape_t
*
107 find_escape(escape_list
*set
, int ch
)
111 _DIAGASSERT(set
!= NULL
);
113 TAILQ_FOREACH(escape
, set
, entry
) {
114 if (ESC(escape
) == ch
)
123 graphic_t
*ascii
, *gb2312
;
126 #define E0SET(ei) (&(ei)->e0)
127 #define E1SET(ei) (&(ei)->e1)
128 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei)))
129 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei)))
140 /* for future multi-locale facility */
145 _HZState s_mbsrtowcs
;
146 _HZState s_mbsnrtowcs
;
148 _HZState s_wcsrtombs
;
149 _HZState s_wcsnrtombs
;
154 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
155 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
157 #define _FUNCNAME(m) _citrus_HZ_##m
158 #define _ENCODING_INFO _HZEncodingInfo
159 #define _CTYPE_INFO _HZCTypeInfo
160 #define _ENCODING_STATE _HZState
161 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX
162 #define _ENCODING_IS_STATE_DEPENDENT 1
163 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL)
166 _citrus_HZ_init_state(_HZEncodingInfo
* __restrict ei
,
167 _HZState
* __restrict psenc
)
169 _DIAGASSERT(ei
!= NULL
);
170 _DIAGASSERT(psenc
!= NULL
);
173 psenc
->inuse
= INIT0(ei
);
178 _citrus_HZ_pack_state(_HZEncodingInfo
* __restrict ei
,
179 void *__restrict pspriv
, const _HZState
* __restrict psenc
)
181 /* ei may be unused */
182 _DIAGASSERT(pspriv
!= NULL
);
183 _DIAGASSERT(psenc
!= NULL
);
185 memcpy(pspriv
, (const void *)psenc
, sizeof(*psenc
));
190 _citrus_HZ_unpack_state(_HZEncodingInfo
* __restrict ei
,
191 _HZState
* __restrict psenc
, const void * __restrict pspriv
)
193 /* ei may be unused */
194 _DIAGASSERT(psenc
!= NULL
);
195 _DIAGASSERT(pspriv
!= NULL
);
197 memcpy((void *)psenc
, pspriv
, sizeof(*psenc
));
201 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo
* __restrict ei
,
202 wchar_t * __restrict pwc
, const char ** __restrict s
, size_t n
,
203 _HZState
* __restrict psenc
, size_t * __restrict nresult
)
207 int bit
, head
, tail
, len
, ch
;
209 escape_t
*candidate
, *init
;
210 const range_t
*range
;
212 _DIAGASSERT(ei
!= NULL
);
213 /* pwc may be null */
214 _DIAGASSERT(s
!= NULL
);
215 _DIAGASSERT(psenc
!= NULL
);
216 _DIAGASSERT(nresult
!= NULL
);
219 _citrus_HZ_init_state(ei
, psenc
);
224 if (psenc
->chlen
< 0 || psenc
->inuse
== NULL
)
228 bit
= head
= tail
= 0;
230 for (len
= 0; len
<= MB_LEN_MAX
; /**/) {
231 if (psenc
->chlen
== tail
) {
234 *nresult
= (size_t)-2;
237 psenc
->ch
[psenc
->chlen
++] = *s0
++;
240 ch
= (unsigned char)psenc
->ch
[tail
++];
242 if ((ch
& ~0x80) <= 0x1F) {
243 if (psenc
->inuse
!= INIT0(ei
))
249 graphic
= GR(psenc
->inuse
);
253 graphic
= GL(psenc
->inuse
);
254 if (ch
== ESCAPE_CHAR
)
260 } else if (tail
== 2 && psenc
->ch
[0] == ESCAPE_CHAR
) {
261 if (tail
< psenc
->chlen
)
263 if (ch
== ESCAPE_CHAR
) {
265 } else if (ch
== '\n') {
266 if (psenc
->inuse
!= INIT0(ei
))
268 tail
= psenc
->chlen
= 0;
273 _DIAGASSERT(init
!= NULL
);
274 if (psenc
->inuse
== init
) {
276 } else if (INIT(psenc
->inuse
) == init
) {
281 if (candidate
== NULL
) {
282 candidate
= find_escape(
283 SET(psenc
->inuse
), ch
);
284 if (candidate
== NULL
) {
291 psenc
->inuse
= candidate
;
292 tail
= psenc
->chlen
= 0;
295 } else if (ch
& 0x80) {
296 if (graphic
!= GR(psenc
->inuse
))
300 if (graphic
!= GL(psenc
->inuse
))
303 _DIAGASSERT(graphic
!= NULL
);
304 range
= &ranges
[(size_t)graphic
->charset
];
305 if (range
->start
> ch
|| range
->end
< ch
)
309 if (graphic
->length
== (tail
- head
)) {
310 if (graphic
->charset
> GB2312
)
311 bit
|= ESC(psenc
->inuse
) << 24;
316 *nresult
= (size_t)-1;
319 if (tail
< psenc
->chlen
)
325 *nresult
= (wc
== 0) ? 0 : len
;
331 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo
* __restrict ei
,
332 char * __restrict s
, size_t n
, wchar_t wc
,
333 _HZState
* __restrict psenc
, size_t * __restrict nresult
)
336 escape_t
*candidate
, *init
;
339 const range_t
*range
;
341 _DIAGASSERT(ei
!= NULL
);
342 _DIAGASSERT(s
!= NULL
);
343 _DIAGASSERT(psenc
!= NULL
);
344 _DIAGASSERT(nresult
!= NULL
);
346 if (psenc
->chlen
!= 0 || psenc
->inuse
== NULL
)
354 if ((uint32_t)wc
<= 0x1F) {
355 candidate
= INIT0(ei
);
357 ? candidate
->left
: candidate
->right
;
360 range
= &ranges
[(size_t)CTRL
];
362 } else if ((uint32_t)wc
<= 0x7F) {
366 candidate
= graphic
->escape
;
367 range
= &ranges
[(size_t)graphic
->charset
];
368 len
= graphic
->length
;
369 } else if ((uint32_t)wc
<= 0x7F7F) {
370 graphic
= ei
->gb2312
;
373 candidate
= graphic
->escape
;
374 range
= &ranges
[(size_t)graphic
->charset
];
375 len
= graphic
->length
;
377 ch
= (wc
>> 24) & 0xFF;
378 candidate
= find_escape(E0SET(ei
), ch
);
379 if (candidate
== NULL
) {
380 candidate
= find_escape(E1SET(ei
), ch
);
381 if (candidate
== NULL
)
386 ? candidate
->left
: candidate
->right
;
389 range
= &ranges
[(size_t)graphic
->charset
];
390 len
= graphic
->length
;
392 if (psenc
->inuse
!= candidate
) {
394 if (SET(psenc
->inuse
) == SET(candidate
)) {
395 if (INIT(psenc
->inuse
) != init
||
396 psenc
->inuse
== init
|| candidate
== init
)
398 } else if (candidate
== (init
= INIT(candidate
))) {
405 psenc
->ch
[psenc
->chlen
++] = ESCAPE_CHAR
;
406 psenc
->ch
[psenc
->chlen
++] = ESC(init
);
411 psenc
->ch
[psenc
->chlen
++] = ESCAPE_CHAR
;
412 psenc
->ch
[psenc
->chlen
++] = ESC(candidate
);
413 psenc
->inuse
= candidate
;
418 ch
= (wc
>> (len
* 8)) & 0xFF;
419 if (range
->start
> ch
|| range
->end
< ch
)
421 psenc
->ch
[psenc
->chlen
++] = ch
| bit
;
423 memcpy(s
, psenc
->ch
, psenc
->chlen
);
424 *nresult
= psenc
->chlen
;
430 *nresult
= (size_t)-1;
435 _citrus_HZ_put_state_reset(_HZEncodingInfo
* __restrict ei
,
436 char * __restrict s
, size_t n
, _HZState
* __restrict psenc
,
437 size_t * __restrict nresult
)
441 _DIAGASSERT(ei
!= NULL
);
442 _DIAGASSERT(s
!= NULL
);
443 _DIAGASSERT(psenc
!= NULL
);
444 _DIAGASSERT(nresult
!= NULL
);
446 if (psenc
->chlen
!= 0 || psenc
->inuse
== NULL
)
448 candidate
= INIT0(ei
);
449 if (psenc
->inuse
!= candidate
) {
453 psenc
->ch
[psenc
->chlen
++] = ESCAPE_CHAR
;
454 psenc
->ch
[psenc
->chlen
++] = ESC(candidate
);
458 if (psenc
->chlen
> 0)
459 memcpy(s
, psenc
->ch
, psenc
->chlen
);
460 *nresult
= psenc
->chlen
;
461 _citrus_HZ_init_state(ei
, psenc
);
467 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo
* __restrict ei
,
468 _HZState
* __restrict psenc
, int * __restrict rstate
)
470 _DIAGASSERT(ei
!= NULL
);
471 _DIAGASSERT(psenc
!= NULL
);
472 _DIAGASSERT(rstate
!= NULL
);
474 if (psenc
->chlen
< 0 || psenc
->inuse
== NULL
)
476 *rstate
= (psenc
->chlen
== 0)
477 ? ((psenc
->inuse
== INIT0(ei
))
478 ? _STDENC_SDGEN_INITIAL
479 : _STDENC_SDGEN_STABLE
)
480 : ((psenc
->ch
[0] == ESCAPE_CHAR
)
481 ? _STDENC_SDGEN_INCOMPLETE_SHIFT
482 : _STDENC_SDGEN_INCOMPLETE_CHAR
);
489 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo
* __restrict ei
,
490 _csid_t
* __restrict csid
, _index_t
* __restrict idx
, wchar_t wc
)
494 _DIAGASSERT(csid
!= NULL
);
495 _DIAGASSERT(idx
!= NULL
);
503 if ((uint32_t)wc
<= 0x7F) {
504 *csid
= (_csid_t
)bit
;
506 } else if ((uint32_t)wc
<= 0x7F7F) {
507 *csid
= (_csid_t
)(bit
| 0x8000);
510 *csid
= (_index_t
)(wc
& ~0x00FFFF7F);
511 *idx
= (_csid_t
)(wc
& 0x00FFFF7F);
519 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo
* __restrict ei
,
520 wchar_t * __restrict wc
, _csid_t csid
, _index_t idx
)
522 _DIAGASSERT(ei
!= NULL
);
523 _DIAGASSERT(wc
!= NULL
);
529 *wc
|= (wchar_t)0x80;
535 *wc
|= (wchar_t)csid
;
542 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo
*ei
)
546 _DIAGASSERT(ei
!= NULL
);
547 while ((escape
= TAILQ_FIRST(E0SET(ei
))) != NULL
) {
548 TAILQ_REMOVE(E0SET(ei
), escape
, entry
);
553 while ((escape
= TAILQ_FIRST(E1SET(ei
))) != NULL
) {
554 TAILQ_REMOVE(E1SET(ei
), escape
, entry
);
562 _citrus_HZ_parse_char(void *context
, const char *name
, const char *s
)
567 _DIAGASSERT(context
!= NULL
&& *context
!= NULL
);
568 _DIAGASSERT(name
!= NULL
);
569 _DIAGASSERT(s
!= NULL
);
571 p
= (void **)context
;
572 escape
= (escape_t
*)p
[0];
573 if (escape
->ch
!= '\0')
576 if (escape
->ch
== ESCAPE_CHAR
|| *s
!= '\0')
583 _citrus_HZ_parse_graphic(void *context
, const char *name
, const char *s
)
590 _DIAGASSERT(context
!= NULL
&& *context
!= NULL
);
591 _DIAGASSERT(name
!= NULL
);
592 _DIAGASSERT(s
!= NULL
);
594 p
= (void **)context
;
595 escape
= (escape_t
*)p
[0];
596 ei
= (_HZEncodingInfo
*)p
[1];
597 graphic
= malloc(sizeof(*graphic
));
600 memset(graphic
, 0, sizeof(*graphic
));
601 if (strcmp("GL", name
) == 0) {
602 if (GL(escape
) != NULL
)
604 GL(escape
) = graphic
;
605 } else if (strcmp("GR", name
) == 0) {
606 if (GR(escape
) != NULL
)
608 GR(escape
) = graphic
;
614 graphic
->escape
= escape
;
615 if (_bcs_strncasecmp("ASCII", s
, 5) == 0) {
618 graphic
->charset
= ASCII
;
622 } else if (_bcs_strncasecmp("GB2312", s
, 6) == 0) {
625 graphic
->charset
= GB2312
;
627 ei
->gb2312
= graphic
;
629 } else if (strncmp("94*", s
, 3) == 0) {
630 graphic
->charset
= CS94
;
631 } else if (strncmp("96*", s
, 3) == 0) {
632 graphic
->charset
= CS96
;
638 case '1': case '2': case '3':
639 graphic
->length
= (size_t)(*s
- '0');
649 static const _citrus_prop_hint_t escape_hints
[] = {
650 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char
),
651 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic
),
652 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic
),
653 _CITRUS_PROP_HINT_END
657 _citrus_HZ_parse_escape(void *context
, const char *name
, const char *s
)
663 _DIAGASSERT(context
!= NULL
);
664 _DIAGASSERT(name
!= NULL
);
665 _DIAGASSERT(s
!= NULL
);
667 ei
= (_HZEncodingInfo
*)context
;
668 escape
= malloc(sizeof(*escape
));
671 memset(escape
, 0, sizeof(*escape
));
672 if (strcmp("0", name
) == 0) {
673 escape
->set
= E0SET(ei
);
674 TAILQ_INSERT_TAIL(E0SET(ei
), escape
, entry
);
675 } else if (strcmp("1", name
) == 0) {
676 escape
->set
= E1SET(ei
);
677 TAILQ_INSERT_TAIL(E1SET(ei
), escape
, entry
);
682 p
[0] = (void *)escape
;
684 return _citrus_prop_parse_variable(
685 escape_hints
, (void *)&p
[0], s
, strlen(s
));
688 static const _citrus_prop_hint_t root_hints
[] = {
689 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape
),
690 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape
),
691 _CITRUS_PROP_HINT_END
695 _citrus_HZ_encoding_module_init(_HZEncodingInfo
* __restrict ei
,
696 const void * __restrict var
, size_t lenvar
)
700 _DIAGASSERT(ei
!= NULL
);
702 memset(ei
, 0, sizeof(*ei
));
703 TAILQ_INIT(E0SET(ei
));
704 TAILQ_INIT(E1SET(ei
));
705 errnum
= _citrus_prop_parse_variable(
706 root_hints
, (void *)ei
, var
, lenvar
);
708 _citrus_HZ_encoding_module_uninit(ei
);
712 /* ----------------------------------------------------------------------
713 * public interface for ctype
716 _CITRUS_CTYPE_DECLS(HZ
);
717 _CITRUS_CTYPE_DEF_OPS(HZ
);
719 #include "citrus_ctype_template.h"
721 /* ----------------------------------------------------------------------
722 * public interface for stdenc
725 _CITRUS_STDENC_DECLS(HZ
);
726 _CITRUS_STDENC_DEF_OPS(HZ
);
728 #include "citrus_stdenc_template.h"