1 /* $OpenBSD: chartype.c,v 1.15 2016/04/11 21:17:29 schwarze Exp $ */
2 /* $NetBSD: chartype.c,v 1.6 2011/07/28 00:48:21 christos Exp $ */
5 * Copyright (c) 2009 The NetBSD Foundation, Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
31 * chartype.c: character classification and meta information
41 #define CT_BUFSIZ 1024
43 static void ct_conv_buff_resize(ct_buffer_t
*, size_t, size_t);
46 ct_conv_buff_resize(ct_buffer_t
*conv
, size_t mincsize
, size_t minwsize
)
49 if (mincsize
> conv
->csize
) {
50 conv
->csize
= mincsize
;
51 p
= reallocarray(conv
->cbuff
, conv
->csize
, sizeof(char));
60 if (minwsize
> conv
->wsize
) {
61 conv
->wsize
= minwsize
;
62 p
= reallocarray(conv
->wbuff
, conv
->wsize
, sizeof(wchar_t));
74 ct_encode_string(const wchar_t *s
, ct_buffer_t
*conv
)
82 ct_conv_buff_resize(conv
, CT_BUFSIZ
, 0);
88 used
= conv
->csize
- (dst
- conv
->cbuff
);
90 used
= dst
- conv
->cbuff
;
91 ct_conv_buff_resize(conv
, conv
->csize
+ CT_BUFSIZ
, 0);
94 dst
= conv
->cbuff
+ used
;
96 used
= ct_encode_char(dst
, 5, *s
);
97 if (used
== -1) /* failed to encode, need more buffer space */
107 ct_decode_string(const char *s
, ct_buffer_t
*conv
)
114 ct_conv_buff_resize(conv
, 0, CT_BUFSIZ
);
118 len
= mbstowcs(NULL
, s
, 0);
119 if (len
== (size_t)-1)
121 if (len
> conv
->wsize
)
122 ct_conv_buff_resize(conv
, 0, len
+ 1);
126 mbstowcs(conv
->wbuff
, s
, conv
->wsize
);
132 ct_decode_argv(int argc
, const char *argv
[], ct_buffer_t
*conv
)
140 /* Make sure we have enough space in the conversion buffer to store all
141 * the argv strings. */
142 for (i
= 0, bufspace
= 0; i
< argc
; ++i
)
143 bufspace
+= argv
[i
] ? strlen(argv
[i
]) + 1 : 0;
144 ct_conv_buff_resize(conv
, 0, bufspace
* sizeof(*p
));
148 wargv
= reallocarray(NULL
, argc
, sizeof(*wargv
));
150 for (i
= 0, p
= conv
->wbuff
; i
< argc
; ++i
) {
151 if (!argv
[i
]) { /* don't pass null pointers to mbstowcs */
156 wlen
= mbstowcs(p
, argv
[i
], bufspace
);
158 if (wlen
== (size_t)-1 || wlen
== bufspace
) {
159 /* Encoding error or not enough room for NUL. */
163 wlen
++; /* include NUL in the count */
173 ct_enc_width(wchar_t c
)
175 /* UTF-8 encoding specific values */
180 else if (c
< 0x10000)
182 else if (c
< 0x110000)
185 return 0; /* not a valid codepoint */
189 ct_encode_char(char *dst
, size_t len
, wchar_t c
)
192 if (len
< ct_enc_width(c
))
203 protected const wchar_t *
204 ct_visual_string(const wchar_t *s
)
206 static wchar_t *buff
= NULL
;
207 static size_t buffsize
= 0;
215 buffsize
= CT_BUFSIZ
;
216 buff
= reallocarray(NULL
, buffsize
, sizeof(*buff
));
220 used
= ct_visual_char(dst
, buffsize
- (dst
- buff
), *s
);
221 if (used
== -1) { /* failed to encode, need more buffer space */
223 buffsize
+= CT_BUFSIZ
;
224 p
= reallocarray(buff
, buffsize
, sizeof(*buff
));
229 /* don't increment s here - we want to retry it! */
235 if (dst
>= (buff
+ buffsize
)) { /* sigh */
237 p
= reallocarray(buff
, buffsize
, sizeof(*buff
));
241 dst
= buff
+ buffsize
- 1;
254 ct_visual_width(wchar_t c
)
256 int t
= ct_chr_class(c
);
259 case CHTYPE_ASCIICTL
:
260 return 2; /* ^@ ^? etc. */
262 return 1; /* Hmm, this really need to be handled outside! */
264 return 0; /* Should this be 1 instead? */
267 return (w
== -1 ? 0 : w
);
268 case CHTYPE_NONPRINT
:
269 if (c
> 0xffff) /* prefer standard 4-byte display over 5-byte */
270 return 8; /* \U+12345 */
272 return 7; /* \U+1234 */
274 return 0; /* should not happen */
280 ct_visual_char(wchar_t *dst
, size_t len
, wchar_t c
)
282 int t
= ct_chr_class(c
);
286 case CHTYPE_ASCIICTL
:
288 return -1; /* insufficient space */
291 *dst
= '?'; /* DEL -> ^? */
293 *dst
= c
| 0100; /* uncontrolify it */
297 return -1; /* insufficient space */
300 case CHTYPE_NONPRINT
:
301 /* we only use single-width glyphs for display,
302 * so this is right */
303 if ((ssize_t
)len
< ct_visual_width(c
))
304 return -1; /* insufficient space */
308 #define tohexdigit(v) "0123456789ABCDEF"[v]
309 if (c
> 0xffff) /* prefer standard 4-byte display over 5-byte */
310 *dst
++ = tohexdigit(((unsigned int) c
>> 16) & 0xf);
311 *dst
++ = tohexdigit(((unsigned int) c
>> 12) & 0xf);
312 *dst
++ = tohexdigit(((unsigned int) c
>> 8) & 0xf);
313 *dst
++ = tohexdigit(((unsigned int) c
>> 4) & 0xf);
314 *dst
= tohexdigit(((unsigned int) c
) & 0xf);
315 return (c
> 0xffff) ? 8 : 7;
317 /* these two should be handled outside this function */
318 default: /* we should never hit the default */
327 ct_chr_class(wchar_t c
)
333 else if (c
< 0x100 && iswcntrl(c
))
334 return CHTYPE_ASCIICTL
;
335 else if (iswprint(c
))
338 return CHTYPE_NONPRINT
;