3 static const unsigned char utf8_length
[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
12 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
13 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
16 static const unsigned char utf8_mask
[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03,
20 tb_utf8_char_length(char c
)
22 return utf8_length
[(unsigned char)c
];
26 tb_utf8_char_to_unicode(uint32_t *out
, const char *c
)
32 unsigned char len
= tb_utf8_char_length(*c
);
33 unsigned char mask
= utf8_mask
[len
- 1];
34 uint32_t result
= c
[0] & mask
;
35 for (i
= 1; i
< len
; ++i
) {
37 result
|= c
[i
] & 0x3f;
45 tb_utf8_unicode_to_char(char *out
, uint32_t c
)
54 } else if (c
< 0x800) {
57 } else if (c
< 0x10000) {
60 } else if (c
< 0x200000) {
63 } else if (c
< 0x4000000) {
71 for (i
= len
- 1; i
> 0; --i
) {
72 out
[i
] = (c
& 0x3f) | 0x80;