2 * The authors of this software are Rob Pike and Ken Thompson.
3 * Copyright (c) 2002 by Lucent Technologies.
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose without fee is hereby granted, provided that this entire notice
6 * is included in all copies of any software which is or includes a copy
7 * or modification of this software and in all copies of the supporting
8 * documentation for such software.
9 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
11 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
29 T1
= ((1<<(Bit1
+1))-1) ^ 0xFF, /* 0000 0000 */
30 Tx
= ((1<<(Bitx
+1))-1) ^ 0xFF, /* 1000 0000 */
31 T2
= ((1<<(Bit2
+1))-1) ^ 0xFF, /* 1100 0000 */
32 T3
= ((1<<(Bit3
+1))-1) ^ 0xFF, /* 1110 0000 */
33 T4
= ((1<<(Bit4
+1))-1) ^ 0xFF, /* 1111 0000 */
34 T5
= ((1<<(Bit5
+1))-1) ^ 0xFF, /* 1111 1000 */
36 Rune1
= (1<<(Bit1
+0*Bitx
))-1, /* 0000 0000 0111 1111 */
37 Rune2
= (1<<(Bit2
+1*Bitx
))-1, /* 0000 0111 1111 1111 */
38 Rune3
= (1<<(Bit3
+2*Bitx
))-1, /* 1111 1111 1111 1111 */
39 Rune4
= (1<<(Bit4
+3*Bitx
))-1,
40 /* 0001 1111 1111 1111 1111 1111 */
42 Maskx
= (1<<Bitx
)-1, /* 0011 1111 */
43 Testx
= Maskx
^ 0xFF, /* 1100 0000 */
49 chartorune(Rune
*rune
, const char *str
)
55 * one character sequence
58 c
= *(unsigned char*)str
;
65 * two character sequence
68 c1
= *(unsigned char*)(str
+1) ^ Tx
;
74 l
= ((c
<< Bitx
) | c1
) & Rune2
;
82 * three character sequence
83 * 0800-FFFF => T3 Tx Tx
85 c2
= *(unsigned char*)(str
+2) ^ Tx
;
89 l
= ((((c
<< Bitx
) | c1
) << Bitx
) | c2
) & Rune3
;
97 * four character sequence (21-bit value)
98 * 10000-1FFFFF => T4 Tx Tx Tx
100 c3
= *(unsigned char*)(str
+3) ^ Tx
;
104 l
= ((((((c
<< Bitx
) | c1
) << Bitx
) | c2
) << Bitx
) | c3
) & Rune4
;
112 * Support for 5-byte or longer UTF-8 would go here, but
113 * since we don't have that, we'll just fall through to bad.
125 runetochar(char *str
, const Rune
*rune
)
127 /* Runes are signed, so convert to unsigned for range check. */
131 * one character sequence
132 * 00000-0007F => 00-7F
141 * two character sequence
145 str
[0] = T2
| (c
>> 1*Bitx
);
146 str
[1] = Tx
| (c
& Maskx
);
151 * If the Rune is out of range, convert it to the error rune.
152 * Do this test here because the error rune encodes to three bytes.
153 * Doing it earlier would duplicate work, since an out of range
154 * Rune wouldn't have fit in one or two bytes.
160 * three character sequence
161 * 0800-FFFF => T3 Tx Tx
164 str
[0] = T3
| (c
>> 2*Bitx
);
165 str
[1] = Tx
| ((c
>> 1*Bitx
) & Maskx
);
166 str
[2] = Tx
| (c
& Maskx
);
171 * four character sequence (21-bit value)
172 * 10000-1FFFFF => T4 Tx Tx Tx
174 str
[0] = T4
| (c
>> 3*Bitx
);
175 str
[1] = Tx
| ((c
>> 2*Bitx
) & Maskx
);
176 str
[2] = Tx
| ((c
>> 1*Bitx
) & Maskx
);
177 str
[3] = Tx
| (c
& Maskx
);
186 return runetochar(str
, &rune
);
190 fullrune(const char *str
, int n
)
193 int c
= *(unsigned char*)str
;
210 utflen(const char *s
)
218 c
= *(unsigned char*)s
;
224 s
+= chartorune(&rune
, s
);
231 utfrune(const char *s
, Rune c
)
237 if(c
< Runesync
) /* not part of utf sequence */
238 return strchr((char*)s
, c
);
241 c1
= *(unsigned char*)s
;
242 if(c1
< Runeself
) { /* one byte rune */
250 n
= chartorune(&r
, s
);