4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
15 ** DO NOT EDIT THIS MACHINE GENERATED FILE.
18 #if defined(SQLITE_ENABLE_FTS4_UNICODE61)
19 #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
24 ** Return true if the argument corresponds to a unicode codepoint
25 ** classified as either a letter or a number. Otherwise false.
27 ** The results are undefined if the value passed to this function
30 int sqlite3FtsUnicodeIsalnum(int c
){
31 /* Each unsigned integer in the following array corresponds to a contiguous
32 ** range of unicode codepoints that are not either letters or numbers (i.e.
33 ** codepoints for which this function should return 0).
35 ** The most significant 22 bits in each 32-bit value contain the first
36 ** codepoint in the range. The least significant 10 bits are used to store
37 ** the size of the range (always at least 1). In other words, the value
38 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
39 ** C. It is not possible to represent a range larger than 1023 codepoints
42 const static unsigned int aEntry
[] = {
43 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
44 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
45 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
46 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
47 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
48 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
49 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
50 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
51 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
52 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
53 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
54 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
55 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
56 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
57 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
58 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
59 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
60 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
61 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
62 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
63 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
64 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
65 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
66 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
67 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
68 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
69 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
70 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
71 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
72 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
73 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
74 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
75 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
76 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
77 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
78 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
79 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
80 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
81 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
82 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
83 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
84 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
85 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
86 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
87 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
88 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
89 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
90 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
91 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
92 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
93 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
94 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
95 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
96 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
97 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
98 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
99 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
100 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
101 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
102 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
103 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
104 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810,
105 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023,
106 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
107 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
108 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E,
109 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01,
110 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01,
111 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016,
112 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004,
113 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004,
114 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5,
115 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01,
116 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401,
117 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064,
118 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F,
119 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009,
120 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014,
121 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001,
122 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018,
123 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401,
124 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
127 static const unsigned int aAscii
[4] = {
128 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
132 return ( (aAscii
[c
>> 5] & (1 << (c
& 0x001F)))==0 );
133 }else if( c
<(1<<22) ){
134 unsigned int key
= (((unsigned int)c
)<<10) | 0x000003FF;
136 int iHi
= sizeof(aEntry
)/sizeof(aEntry
[0]) - 1;
139 int iTest
= (iHi
+ iLo
) / 2;
140 if( key
>= aEntry
[iTest
] ){
147 assert( aEntry
[0]<key
);
148 assert( key
>=aEntry
[iRes
] );
149 return (c
>= ((aEntry
[iRes
]>>10) + (aEntry
[iRes
]&0x3FF)));
156 ** If the argument is a codepoint corresponding to a lowercase letter
157 ** in the ASCII range with a diacritic added, return the codepoint
158 ** of the ASCII letter only. For example, if passed 235 - "LATIN
159 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
160 ** E"). The resuls of passing a codepoint that corresponds to an
161 ** uppercase letter are undefined.
163 static int remove_diacritic(int c
){
164 unsigned short aDia
[] = {
165 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
166 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
167 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
168 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
169 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
170 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
171 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
172 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
173 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
174 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
175 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
176 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
177 62924, 63050, 63082, 63274, 63390,
180 '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
181 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
182 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
183 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
184 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
185 '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
186 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
187 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
188 'e', 'i', 'o', 'u', 'y',
191 unsigned int key
= (((unsigned int)c
)<<3) | 0x00000007;
193 int iHi
= sizeof(aDia
)/sizeof(aDia
[0]) - 1;
196 int iTest
= (iHi
+ iLo
) / 2;
197 if( key
>= aDia
[iTest
] ){
204 assert( key
>=aDia
[iRes
] );
205 return ((c
> (aDia
[iRes
]>>3) + (aDia
[iRes
]&0x07)) ? c
: (int)aChar
[iRes
]);
210 ** Return true if the argument interpreted as a unicode codepoint
211 ** is a diacritical modifier character.
213 int sqlite3FtsUnicodeIsdiacritic(int c
){
214 unsigned int mask0
= 0x08029FDF;
215 unsigned int mask1
= 0x000361F8;
216 if( c
<768 || c
>817 ) return 0;
217 return (c
< 768+32) ?
218 (mask0
& (1 << (c
-768))) :
219 (mask1
& (1 << (c
-768-32)));
224 ** Interpret the argument as a unicode codepoint. If the codepoint
225 ** is an upper case character that has a lower case equivalent,
226 ** return the codepoint corresponding to the lower case version.
227 ** Otherwise, return a copy of the argument.
229 ** The results are undefined if the value passed to this function
230 ** is less than zero.
232 int sqlite3FtsUnicodeFold(int c
, int bRemoveDiacritic
){
233 /* Each entry in the following array defines a rule for folding a range
234 ** of codepoints to lower case. The rule applies to a range of nRange
235 ** codepoints starting at codepoint iCode.
237 ** If the least significant bit in flags is clear, then the rule applies
238 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
239 ** need to be folded). Or, if it is set, then the rule only applies to
240 ** every second codepoint in the range, starting with codepoint C.
242 ** The 7 most significant bits in flags are an index into the aiOff[]
243 ** array. If a specific codepoint C does require folding, then its lower
244 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
246 ** The contents of this array are generated by parsing the CaseFolding.txt
247 ** file distributed as part of the "Unicode Character Database". See
248 ** http://www.unicode.org for details.
250 static const struct TableEntry
{
251 unsigned short iCode
;
253 unsigned char nRange
;
255 {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
256 {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
257 {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
258 {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
259 {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
260 {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
261 {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
262 {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
263 {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
264 {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
265 {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
266 {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
267 {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
268 {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
269 {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
270 {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
271 {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
272 {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
273 {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
274 {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
275 {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
276 {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
277 {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
278 {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
279 {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
280 {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
281 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
282 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
283 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
284 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
285 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
286 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
287 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
288 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
289 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
290 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
291 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
292 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
293 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
294 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
295 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
296 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
297 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
298 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
299 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
300 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
301 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
302 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
303 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
304 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
305 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
306 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
307 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
308 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
311 static const unsigned short aiOff
[] = {
312 1, 2, 8, 15, 16, 26, 28, 32,
313 37, 38, 40, 48, 63, 64, 69, 71,
314 79, 80, 116, 202, 203, 205, 206, 207,
315 209, 210, 211, 213, 214, 217, 218, 219,
316 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
317 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
318 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
319 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
320 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
321 65514, 65521, 65527, 65528, 65529,
327 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
330 if( c
>='A' && c
<='Z' ) ret
= c
+ ('a' - 'A');
332 int iHi
= sizeof(aEntry
)/sizeof(aEntry
[0]) - 1;
337 int iTest
= (iHi
+ iLo
) / 2;
338 int cmp
= (c
- aEntry
[iTest
].iCode
);
346 assert( iRes
<0 || c
>=aEntry
[iRes
].iCode
);
349 const struct TableEntry
*p
= &aEntry
[iRes
];
350 if( c
<(p
->iCode
+ p
->nRange
) && 0==(0x01 & p
->flags
& (p
->iCode
^ c
)) ){
351 ret
= (c
+ (aiOff
[p
->flags
>>1])) & 0x0000FFFF;
356 if( bRemoveDiacritic
) ret
= remove_diacritic(ret
);
359 else if( c
>=66560 && c
<66600 ){
365 #endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
366 #endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */