2 * Copyright 2009, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
7 #include "UnicodeBlocks.h"
11 #undef B_TRANSLATION_CONTEXT
12 #define B_TRANSLATION_CONTEXT "UnicodeBlocks"
14 const unicode_block kNoBlock
;
16 const struct unicode_block_entry kUnicodeBlocks
[] = {
17 {B_TRANSLATE("Basic Latin"), 0x0000, 0x007f, false, B_BASIC_LATIN_BLOCK
},
18 {B_TRANSLATE("Latin-1 supplement"), 0x0080, 0x00ff, false,
19 B_LATIN1_SUPPLEMENT_BLOCK
},
20 {B_TRANSLATE("Latin extended A"), 0x0100, 0x017f, false,
21 B_LATIN_EXTENDED_A_BLOCK
},
22 {B_TRANSLATE("Latin extended B"), 0x0180, 0x024f, false,
23 B_LATIN_EXTENDED_B_BLOCK
},
24 {B_TRANSLATE("IPA extensions"), 0x0250, 0x02af, false,
25 B_IPA_EXTENSIONS_BLOCK
},
26 {B_TRANSLATE("Spacing modifier letters"), 0x02b0, 0x02ff, false,
27 B_SPACING_MODIFIER_LETTERS_BLOCK
},
28 {B_TRANSLATE("Combining diacritical marks"), 0x0300, 0x036f, false,
29 B_COMBINING_DIACRITICAL_MARKS_BLOCK
},
30 {B_TRANSLATE("Greek and Coptic"), 0x0370, 0x03ff, false,
31 B_BASIC_GREEK_BLOCK
| B_GREEK_SYMBOLS_AND_COPTIC_BLOCK
},
32 {B_TRANSLATE("Cyrillic"), 0x0400, 0x04ff, false, B_CYRILLIC_BLOCK
},
33 {B_TRANSLATE("Cyrillic supplement"), 0x0500, 0x052f, false,
35 {B_TRANSLATE("Armenian"), 0x0530, 0x058f, false, B_ARMENIAN_BLOCK
},
36 {B_TRANSLATE("Hebrew"), 0x0590, 0x05ff, false,
37 B_BASIC_HEBREW_BLOCK
| B_HEBREW_EXTENDED_BLOCK
},
38 {B_TRANSLATE("Arabic"), 0x0600, 0x06ff, false,
39 B_BASIC_ARABIC_BLOCK
| B_ARABIC_EXTENDED_BLOCK
},
40 {B_TRANSLATE("Syriac"), 0x0700, 0x074f, false, kNoBlock
},
41 {B_TRANSLATE("Arabic supplement"), 0x0750, 0x077f, false, kNoBlock
},
42 {B_TRANSLATE("Thaana"), 0x0780, 0x07bf, false, kNoBlock
},
43 {B_TRANSLATE("N'Ko"), 0x07c0, 0x07ff, false, kNoBlock
},
44 {B_TRANSLATE("Devanagari"), 0x0900, 0x097f, false, B_DEVANAGARI_BLOCK
},
45 {B_TRANSLATE("Bengali"), 0x0980, 0x09ff, false, B_BENGALI_BLOCK
},
46 {B_TRANSLATE("Gurmukhi"), 0x0a00, 0x0a7f, false, B_GURMUKHI_BLOCK
},
47 {B_TRANSLATE("Gujarati"), 0x0a80, 0x0aff, false, B_GUJARATI_BLOCK
},
48 {B_TRANSLATE("Oriya"), 0x0b00, 0x0b7f, false, B_ORIYA_BLOCK
},
49 {B_TRANSLATE("Tamil"), 0x0b80, 0x0bff, false, B_TAMIL_BLOCK
},
50 {B_TRANSLATE("Telugu"), 0x0c00, 0x0c7f, false, B_TELUGU_BLOCK
},
51 {B_TRANSLATE("Kannada"), 0x0c80, 0x0cff, false, B_KANNADA_BLOCK
},
52 {B_TRANSLATE("Malayalam"), 0x0d00, 0x0d7f, false, B_MALAYALAM_BLOCK
},
53 {B_TRANSLATE("Sinhala"), 0x0d80, 0x0dff, false, kNoBlock
},
54 {B_TRANSLATE("Thai"), 0x0e00, 0x0e7f, false, B_THAI_BLOCK
},
55 {B_TRANSLATE("Lao"), 0x0e80, 0x0eff, false, B_LAO_BLOCK
},
56 {B_TRANSLATE("Tibetan"), 0x0f00, 0x0fff, false, kNoBlock
},
57 {B_TRANSLATE("Myanmar"), 0x1000, 0x109f, false, kNoBlock
},
58 {B_TRANSLATE("Georgian"), 0x10a0, 0x10ff, false, B_BASIC_GEORGIAN_BLOCK
},
59 {B_TRANSLATE("Hangul Jamo"), 0x1100, 0x11ff, false, kNoBlock
},
61 {B_TRANSLATE("Ethiopic"), 0x1200, 0x137f, false, kNoBlock
},
62 {B_TRANSLATE("Ethiopic supplement"), 0x1380, 0x139f, false, kNoBlock
},
63 {B_TRANSLATE("Cherokee"), 0x13a0, 0x13ff, false, kNoBlock
},
64 {B_TRANSLATE("Unified Canadian Aboriginal syllabics"),
65 0x1400, 0x167f, false, kNoBlock
},
66 {B_TRANSLATE("Ogham"), 0x1680, 0x169f, false, kNoBlock
},
67 {B_TRANSLATE("Runic"), 0x16a0, 0x16ff, false, kNoBlock
},
68 {B_TRANSLATE("Tagalog"), 0x1700, 0x171f, false, kNoBlock
},
69 {B_TRANSLATE("Hanunoo"), 0x1720, 0x173f, false, kNoBlock
},
70 {B_TRANSLATE("Buhid"), 0x1740, 0x175f, false, kNoBlock
},
71 {B_TRANSLATE("Tagbanwa"), 0x1760, 0x177f, false, kNoBlock
},
72 {B_TRANSLATE("Khmer"), 0x1780, 0x17ff, false, kNoBlock
},
73 {B_TRANSLATE("Mongolian"), 0x1800, 0x18af, false, kNoBlock
},
74 {B_TRANSLATE("Limbu"), 0x1900, 0x194f, false, kNoBlock
},
75 {B_TRANSLATE("Tai Le"), 0x1950, 0x197f, false, kNoBlock
},
76 {B_TRANSLATE("New Tai Lue"), 0x1980, 0x19df, false, kNoBlock
},
77 {B_TRANSLATE("Khmer symbols"), 0x19e0, 0x19ff, false, kNoBlock
},
78 {B_TRANSLATE("Buginese"), 0x1a00, 0x1a1f, false, kNoBlock
},
79 {B_TRANSLATE("Balinese"), 0x1b00, 0x1b7f, false, kNoBlock
},
80 {B_TRANSLATE("Sundanese"), 0x1b80, 0x1bbf, false, kNoBlock
},
81 {B_TRANSLATE("Lepcha"), 0x1c00, 0x1c4f, false, kNoBlock
},
82 {B_TRANSLATE("Ol Chiki"), 0x1c50, 0x1c7f, false, kNoBlock
},
83 {B_TRANSLATE("Cyrillic extended C"), 0x1c80, 0x1c8f, false, kNoBlock
},
84 {B_TRANSLATE("Phonetic extensions"), 0x1d00, 0x1d7f, false, kNoBlock
},
85 {B_TRANSLATE("Phonetic extensions supplement"),
86 0x1d80, 0x1dbf, false, kNoBlock
},
87 {B_TRANSLATE("Combining diacritical marks supplement"),
88 0x1dc0, 0x1dff, false, kNoBlock
},
89 {B_TRANSLATE("Latin extended additional"),
90 0x1e00, 0x1eff, false, kNoBlock
},
91 {B_TRANSLATE("Greek extended"), 0x1f00, 0x1fff, false, kNoBlock
},
92 {B_TRANSLATE("General punctuation"), 0x2000, 0x206f, false, kNoBlock
},
93 {B_TRANSLATE("Superscripts and subscripts"),
94 0x2070, 0x209f, false, kNoBlock
},
95 {B_TRANSLATE("Currency symbols"), 0x20a0, 0x20cf, false, kNoBlock
},
96 {B_TRANSLATE("Combining diacritical marks for symbols"),
97 0x20d0, 0x20ff, false, kNoBlock
},
98 {B_TRANSLATE("Letterlike symbols"), 0x2100, 0x214f, false, kNoBlock
},
99 {B_TRANSLATE("Number forms"), 0x2150, 0x218f, false, kNoBlock
},
100 {B_TRANSLATE("Arrows"), 0x2190, 0x21ff, false, kNoBlock
},
101 {B_TRANSLATE("Mathematical operators"), 0x2200, 0x22ff, false, kNoBlock
},
102 {B_TRANSLATE("Miscellaneous technical"), 0x2300, 0x23ff, false, kNoBlock
},
103 {B_TRANSLATE("Control pictures"), 0x2400, 0x243f, false, kNoBlock
},
104 {B_TRANSLATE("Optical character recognition"),
105 0x2440, 0x245f, false, kNoBlock
},
106 {B_TRANSLATE("Enclosed alphanumerics"), 0x2460, 0x24ff, false, kNoBlock
},
107 {B_TRANSLATE("Box drawing"), 0x2500, 0x257f, false, kNoBlock
},
108 {B_TRANSLATE("Block elements"), 0x2580, 0x259f, false, kNoBlock
},
109 {B_TRANSLATE("Geometric shapes"), 0x25a0, 0x25ff, false, kNoBlock
},
110 {B_TRANSLATE("Miscellaneous symbols"), 0x2600, 0x26ff, false, kNoBlock
},
111 {B_TRANSLATE("Dingbats"), 0x2700, 0x27bf, false, kNoBlock
},
112 {B_TRANSLATE("Miscellaneous mathematical symbols A"),
113 0x27c0, 0x27ef, false, kNoBlock
},
114 {B_TRANSLATE("Supplemental arrows A"), 0x27f0, 0x27ff, false, kNoBlock
},
115 {B_TRANSLATE("Braille patterns"), 0x2800, 0x28ff, false, kNoBlock
},
116 {B_TRANSLATE("Supplemental arrows B"), 0x2900, 0x297f, false, kNoBlock
},
117 {B_TRANSLATE("Miscellaneous mathematical symbols B"),
118 0x2980, 0x29ff, false, kNoBlock
},
119 {B_TRANSLATE("Supplemental mathematical operators"),
120 0x2a00, 0x2aff, false, kNoBlock
},
121 {B_TRANSLATE("Miscellaneous symbols and arrows"),
122 0x2b00, 0x2bff, false, kNoBlock
},
123 {B_TRANSLATE("Glagotic"), 0x2c00, 0x2c5f, false, kNoBlock
},
124 {B_TRANSLATE("Latin extended C"), 0x2c60, 0x2c7f, false, kNoBlock
},
125 {B_TRANSLATE("Coptic"), 0x2c80, 0x2cff, false, kNoBlock
},
126 {B_TRANSLATE("Georgian supplement"), 0x2d00, 0x2d2f, false, kNoBlock
},
127 {B_TRANSLATE("Tifinagh"), 0x2d30, 0x2d7f, false, kNoBlock
},
128 {B_TRANSLATE("Ethiopic extended"), 0x2d80, 0x2ddf, false, kNoBlock
},
129 {B_TRANSLATE("Cyrillic extended A"), 0x2de0, 0x2dff, false, kNoBlock
},
130 {B_TRANSLATE("Supplement punctuation"), 0x2e00, 0x2e7f, false, kNoBlock
},
131 {B_TRANSLATE("CJK radicals supplement"), 0x2e80, 0x2eff, false, kNoBlock
},
132 {B_TRANSLATE("Kangxi radicals"), 0x2f00, 0x2fdf, false, kNoBlock
},
133 {B_TRANSLATE("Ideographic description characters"),
134 0x2ff0, 0x2fff, false, kNoBlock
},
135 {B_TRANSLATE("CJK symbols and punctuation"),
136 0x3000, 0x303f, false, kNoBlock
},
137 {B_TRANSLATE("Hiragana"), 0x3040, 0x309f, false, kNoBlock
},
138 {B_TRANSLATE("Katakana"), 0x30a0, 0x30ff, false, kNoBlock
},
139 {B_TRANSLATE("Bopomofo"), 0x3100, 0x312f, false, kNoBlock
},
140 {B_TRANSLATE("Hangul compatibility Jamo"), 0x3130, 0x318f, false, kNoBlock
},
141 {B_TRANSLATE("Kanbun"), 0x3190, 0x319f, false, kNoBlock
},
142 {B_TRANSLATE("Bopomofo extended"), 0x31a0, 0x31bf, false, kNoBlock
},
143 {B_TRANSLATE("CJK strokes"), 0x31c0, 0x31ef, false, kNoBlock
},
144 {B_TRANSLATE("Katakana phonetic extensions"),
145 0x31f0, 0x31ff, false, kNoBlock
},
146 {B_TRANSLATE("Enclosed CJK letters and months"),
147 0x2ff0, 0x2fff, false, kNoBlock
},
148 {B_TRANSLATE("CJK compatibility"), 0x3300, 0x33ff, false, kNoBlock
},
149 {B_TRANSLATE("CJK unified ideographs extension A"),
150 0x3400, 0x4dbf, false, kNoBlock
},
151 {B_TRANSLATE("Yijing hexagram symbols"), 0x4dc0, 0x4dff, false, kNoBlock
},
152 {B_TRANSLATE("CJK unified ideographs"), 0x4e00, 0x9fff, false, kNoBlock
},
153 {B_TRANSLATE("Yi syllables"), 0xa000, 0xa48f, false, kNoBlock
},
154 {B_TRANSLATE("Yi Radicals"), 0xa490, 0xa4cf, false, kNoBlock
},
155 {B_TRANSLATE("Vai"), 0xa500, 0xa63f, false, kNoBlock
},
156 {B_TRANSLATE("Cyrillic extended B"), 0xa640, 0xa69f, false, kNoBlock
},
157 {B_TRANSLATE("Modifier tone letters"), 0xa700, 0xa71f, false, kNoBlock
},
158 {B_TRANSLATE("Latin extended D"), 0xa720, 0xa7ff, false, kNoBlock
},
159 {B_TRANSLATE("Syloti Nagri"), 0xa800, 0xa82f, false, kNoBlock
},
160 {B_TRANSLATE("Phags-pa"), 0xa840, 0xa87f, false, kNoBlock
},
161 {B_TRANSLATE("Saurashtra"), 0xa880, 0xa8df, false, kNoBlock
},
162 {B_TRANSLATE("Kayah Li"), 0xa900, 0xa92f, false, kNoBlock
},
163 {B_TRANSLATE("Rejang"), 0xa930, 0xa95f, false, kNoBlock
},
164 {B_TRANSLATE("Cham"), 0xaa00, 0xaa5f, false, kNoBlock
},
165 {B_TRANSLATE("Cherokee supplement"), 0xab70, 0xabbf, false, kNoBlock
},
166 {B_TRANSLATE("Hangul syllables"), 0xac00, 0xd7af, false, kNoBlock
},
167 // {B_TRANSLATE("High surrogates"), 0xd800, 0xdb7f, false, kNoBlock},
168 // {B_TRANSLATE("High private use surrogates"), 0xdb80, 0xdbff, false, kNoBlock},
169 // {B_TRANSLATE("Low surrogates"), 0xdc00, 0xdfff, false, kNoBlock},
170 {B_TRANSLATE("Private use area"), 0xe000, 0xf8ff, true, kNoBlock
},
171 {B_TRANSLATE("CJK compatibility ideographs"),
172 0xf900, 0xfaff, false, kNoBlock
},
173 {B_TRANSLATE("Alphabetic presentation forms"),
174 0xfb00, 0xfb4f, false, kNoBlock
},
175 {B_TRANSLATE("Arabic presentation forms A"),
176 0xfb50, 0xfdff, false, kNoBlock
},
177 {B_TRANSLATE("Variation selectors"), 0xfe00, 0xfe0f, false, kNoBlock
},
178 {B_TRANSLATE("Vertical forms"), 0xfe10, 0xfe1f, false, kNoBlock
},
179 {B_TRANSLATE("Combining half marks"), 0xfe20, 0xfe2f, false, kNoBlock
},
180 {B_TRANSLATE("CJK compatibility forms"), 0xfe30, 0xfe4f, false, kNoBlock
},
181 {B_TRANSLATE("Small form variants"), 0xfe50, 0xfe6f, false, kNoBlock
},
182 {B_TRANSLATE("Arabic presentation forms B"),
183 0xfe70, 0xfeff, false, kNoBlock
},
184 {B_TRANSLATE("Halfwidth and fullwidth forms"),
185 0xff00, 0xffef, false, kNoBlock
},
186 {B_TRANSLATE("Specials"), 0xfff0, 0xffff, false, kNoBlock
},
187 {B_TRANSLATE("Linear B syllabary"), 0x010000, 0x01007f, false, kNoBlock
},
188 {B_TRANSLATE("Linear B ideograms"), 0x010080, 0x0100ff, false, kNoBlock
},
189 {B_TRANSLATE("Aegean numbers"), 0x010100, 0x01013f, false, kNoBlock
},
190 {B_TRANSLATE("Ancient Greek numbers"),
191 0x010140, 0x01018f, false, kNoBlock
},
192 {B_TRANSLATE("Ancient symbols"), 0x010190, 0x0101cf, false, kNoBlock
},
193 {B_TRANSLATE("Phaistos disc"), 0x0101d0, 0x0101ff, false, kNoBlock
},
194 {B_TRANSLATE("Lycian"), 0x010280, 0x01029f, false, kNoBlock
},
195 {B_TRANSLATE("Carian"), 0x0102a0, 0x0102df, false, kNoBlock
},
196 {B_TRANSLATE("Old italic"), 0x010300, 0x01032f, false, kNoBlock
},
197 {B_TRANSLATE("Gothic"), 0x010330, 0x01034f, false, kNoBlock
},
198 {B_TRANSLATE("Ugaritic"), 0x010380, 0x01039f, false, kNoBlock
},
199 {B_TRANSLATE("Old Persian"), 0x0103a0, 0x0103df, false, kNoBlock
},
200 {B_TRANSLATE("Deseret"), 0x010400, 0x01044f, false, kNoBlock
},
201 {B_TRANSLATE("Shavian"), 0x010450, 0x01047f, false, kNoBlock
},
202 {B_TRANSLATE("Osmanya"), 0x010480, 0x0104af, false, kNoBlock
},
203 {B_TRANSLATE("Osage"), 0x0104b0, 0x0104ff, false, kNoBlock
},
204 {B_TRANSLATE("Cypriot syllabary"), 0x010800, 0x01083f, false, kNoBlock
},
205 {B_TRANSLATE("Hatran"), 0x0108e0, 0x0108ff, false, kNoBlock
},
206 {B_TRANSLATE("Phoenician"), 0x010900, 0x01091f, false, kNoBlock
},
207 {B_TRANSLATE("Lydian"), 0x010920, 0x01093f, false, kNoBlock
},
208 {B_TRANSLATE("Kharoshthi"), 0x010a00, 0x010a5f, false, kNoBlock
},
209 {B_TRANSLATE("Old Hungarian"), 0x010c80, 0x010cff, false, kNoBlock
},
210 {B_TRANSLATE("Multani"), 0x011280, 0x0112af, false, kNoBlock
},
211 {B_TRANSLATE("Newa"), 0x011400, 0x01147f, false, kNoBlock
},
212 {B_TRANSLATE("Mongolian supplement"), 0x011660, 0x01167f, false, kNoBlock
},
213 {B_TRANSLATE("Ahom"), 0x011700, 0x01173f, false, kNoBlock
},
214 {B_TRANSLATE("Bhaiksuki"), 0x011c00, 0x011c6f, false, kNoBlock
},
215 {B_TRANSLATE("Marchen"), 0x011c70, 0x011cbf, false, kNoBlock
},
216 {B_TRANSLATE("Cuneiform"), 0x012000, 0x0123ff, false, kNoBlock
},
217 {B_TRANSLATE("Cuneiform numbers and punctuation"),
218 0x012400, 0x01247f, false, kNoBlock
},
219 {B_TRANSLATE("Early Dynastic Cuneiform"),
220 0x012480, 0x01254f, false, kNoBlock
},
221 {B_TRANSLATE("Anatolian hieroglyphs"), 0x014400, 0x01467f, false, kNoBlock
},
222 {B_TRANSLATE("Ideographic symbols and punctuation"),
223 0x016fe0, 0x016fff, false, kNoBlock
},
224 {B_TRANSLATE("Tangut"), 0x017000, 0x0187ff, false, kNoBlock
},
225 {B_TRANSLATE("Tangut components"), 0x018800, 0x018aff, false, kNoBlock
},
226 {B_TRANSLATE("Byzantine musical symbols"),
227 0x01d000, 0x01d0ff, false, kNoBlock
},
228 {B_TRANSLATE("Musical symbols"), 0x01d100, 0x01d1ff, false, kNoBlock
},
229 {B_TRANSLATE("Ancient Greek musical notation"),
230 0x01d200, 0x01d24f, false, kNoBlock
},
231 {B_TRANSLATE("Tai Xuan Jing symbols"),
232 0x01d300, 0x01d35f, false, kNoBlock
},
233 {B_TRANSLATE("Counting rod numerals"),
234 0x01d360, 0x01d37f, false, kNoBlock
},
235 {B_TRANSLATE("Mathematical alphanumeric symbols"),
236 0x01d400, 0x01d7ff, false, kNoBlock
},
237 {B_TRANSLATE("Sutton SignWriting"), 0x01d800, 0x01daaf, false, kNoBlock
},
238 {B_TRANSLATE("Glagolitic supplement"), 0x01e000, 0x01e02f, false, kNoBlock
},
239 {B_TRANSLATE("Adlam"), 0x01e900, 0x01e95f, false, kNoBlock
},
240 {B_TRANSLATE("Mahjong tiles"), 0x01f000, 0x01f02f, false, kNoBlock
},
241 {B_TRANSLATE("Domino tiles"), 0x01f030, 0x01f09f, false, kNoBlock
},
242 {B_TRANSLATE("Supplemental Symbols and Pictographs"), 0x01f900, 0x01f9ff, false, kNoBlock
},
243 {B_TRANSLATE("CJK unified ideographs extension B"),
244 0x020000, 0x02a6df, false, kNoBlock
},
245 {B_TRANSLATE("CJK unified ideographs extension E"),
246 0x02b820, 0x02ceaf, false, kNoBlock
},
247 {B_TRANSLATE("CJK compatibility ideographs Supplement"),
248 0x02f800, 0x02fa1f, false, kNoBlock
},
249 {B_TRANSLATE("Tags"), 0x0e0000, 0x0e007f, false, kNoBlock
},
250 {B_TRANSLATE("Variation selectors supplement"),
251 0x0e0100, 0x0e01ef, false, kNoBlock
},
252 {B_TRANSLATE("Supplementary private use area A"),
253 0x0f0000, 0x0fffff, true, kNoBlock
},
254 {B_TRANSLATE("Supplementary private use area B"),
255 0x100000, 0x10ffff, true, kNoBlock
},
258 const uint32 kNumUnicodeBlocks
259 = sizeof(kUnicodeBlocks
) / sizeof(kUnicodeBlocks
[0]);
263 BlockForCharacter(const uint32 character
)
266 uint32 max
= kNumUnicodeBlocks
;
267 uint32 guess
= (max
+ min
) / 2;
269 while ((max
>= min
) && (guess
< kNumUnicodeBlocks
)) {
270 uint32 start
= kUnicodeBlocks
[guess
].start
;
271 uint32 end
= kUnicodeBlocks
[guess
].end
;
273 if (start
<= character
&& end
>= character
)
276 if (end
< character
) {
282 guess
= (max
+ min
) / 2;