Update mojo sdk to rev 1dc8a9a5db73d3718d99917fadf31f5fb2ebad4f
[chromium-blink-merge.git] / third_party / sqlite / src / ext / fts3 / unicode / mkunicode.tcl
blobc3083ee36863c29a98a61f4af3c7ea9932ff1c18
3 # Parameter $zName must be a path to the file UnicodeData.txt. This command
4 # reads the file and returns a list of mappings required to remove all
5 # diacritical marks from a unicode string. Each mapping is itself a list
6 # consisting of two elements - the unicode codepoint and the single ASCII
7 # character that it should be replaced with, or an empty string if the
8 # codepoint should simply be removed from the input. Examples:
10 # { 224 a } (replace codepoint 224 to "a")
11 # { 769 "" } (remove codepoint 769 from input)
13 # Mappings are only returned for non-upper case codepoints. It is assumed
14 # that the input has already been folded to lower case.
16 proc rd_load_unicodedata_text {zName} {
17 global tl_lookup_table
19 set fd [open $zName]
20 set lField {
21 code
22 character_name
23 general_category
24 canonical_combining_classes
25 bidirectional_category
26 character_decomposition_mapping
27 decimal_digit_value
28 digit_value
29 numeric_value
30 mirrored
31 unicode_1_name
32 iso10646_comment_field
33 uppercase_mapping
34 lowercase_mapping
35 titlecase_mapping
37 set lRet [list]
39 while { ![eof $fd] } {
40 set line [gets $fd]
41 if {$line == ""} continue
43 set fields [split $line ";"]
44 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
45 foreach $lField $fields {}
46 if { [llength $character_decomposition_mapping]!=2
47 || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
48 } {
49 continue
52 set iCode [expr "0x$code"]
53 set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
54 set iDia [expr "0x[lindex $character_decomposition_mapping 1]"]
56 if {[info exists tl_lookup_table($iCode)]} continue
58 if { ($iAscii >= 97 && $iAscii <= 122)
59 || ($iAscii >= 65 && $iAscii <= 90)
60 } {
61 lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
62 set dia($iDia) 1
66 foreach d [array names dia] {
67 lappend lRet [list $d ""]
69 set lRet [lsort -integer -index 0 $lRet]
71 close $fd
72 set lRet
76 proc print_rd {map} {
77 global tl_lookup_table
78 set aChar [list]
79 set lRange [list]
81 set nRange 1
82 set iFirst [lindex $map 0 0]
83 set cPrev [lindex $map 0 1]
85 foreach m [lrange $map 1 end] {
86 foreach {i c} $m {}
88 if {$cPrev == $c} {
89 for {set j [expr $iFirst+$nRange]} {$j<$i} {incr j} {
90 if {[info exists tl_lookup_table($j)]==0} break
93 if {$j==$i} {
94 set nNew [expr {(1 + $i - $iFirst)}]
95 if {$nNew<=8} {
96 set nRange $nNew
97 continue
102 lappend lRange [list $iFirst $nRange]
103 lappend aChar $cPrev
105 set iFirst $i
106 set cPrev $c
107 set nRange 1
109 lappend lRange [list $iFirst $nRange]
110 lappend aChar $cPrev
112 puts "/*"
113 puts "** If the argument is a codepoint corresponding to a lowercase letter"
114 puts "** in the ASCII range with a diacritic added, return the codepoint"
115 puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
116 puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
117 puts "** E\"). The resuls of passing a codepoint that corresponds to an"
118 puts "** uppercase letter are undefined."
119 puts "*/"
120 puts "static int remove_diacritic(int c)\{"
121 puts " unsigned short aDia\[\] = \{"
122 puts -nonewline " 0, "
123 set i 1
124 foreach r $lRange {
125 foreach {iCode nRange} $r {}
126 if {($i % 8)==0} {puts "" ; puts -nonewline " " }
127 incr i
129 puts -nonewline [format "%5d" [expr ($iCode<<3) + $nRange-1]]
130 puts -nonewline ", "
132 puts ""
133 puts " \};"
134 puts " char aChar\[\] = \{"
135 puts -nonewline " '\\0', "
136 set i 1
137 foreach c $aChar {
138 set str "'$c', "
139 if {$c == ""} { set str "'\\0', " }
141 if {($i % 12)==0} {puts "" ; puts -nonewline " " }
142 incr i
143 puts -nonewline "$str"
145 puts ""
146 puts " \};"
147 puts {
148 unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
149 int iRes = 0;
150 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
151 int iLo = 0;
152 while( iHi>=iLo ){
153 int iTest = (iHi + iLo) / 2;
154 if( key >= aDia[iTest] ){
155 iRes = iTest;
156 iLo = iTest+1;
157 }else{
158 iHi = iTest-1;
161 assert( key>=aDia[iRes] );
162 return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);}
163 puts "\}"
166 proc print_isdiacritic {zFunc map} {
168 set lCode [list]
169 foreach m $map {
170 foreach {code char} $m {}
171 if {$code && $char == ""} { lappend lCode $code }
173 set lCode [lsort -integer $lCode]
174 set iFirst [lindex $lCode 0]
175 set iLast [lindex $lCode end]
177 set i1 0
178 set i2 0
180 foreach c $lCode {
181 set i [expr $c - $iFirst]
182 if {$i < 32} {
183 set i1 [expr {$i1 | (1<<$i)}]
184 } else {
185 set i2 [expr {$i2 | (1<<($i-32))}]
189 puts "/*"
190 puts "** Return true if the argument interpreted as a unicode codepoint"
191 puts "** is a diacritical modifier character."
192 puts "*/"
193 puts "int ${zFunc}\(int c)\{"
194 puts " unsigned int mask0 = [format "0x%08X" $i1];"
195 puts " unsigned int mask1 = [format "0x%08X" $i2];"
197 puts " if( c<$iFirst || c>$iLast ) return 0;"
198 puts " return (c < $iFirst+32) ?"
199 puts " (mask0 & (1 << (c-$iFirst))) :"
200 puts " (mask1 & (1 << (c-$iFirst-32)));"
201 puts "\}"
205 #-------------------------------------------------------------------------
207 # Parameter $zName must be a path to the file UnicodeData.txt. This command
208 # reads the file and returns a list of codepoints (integers). The list
209 # contains all codepoints in the UnicodeData.txt assigned to any "General
210 # Category" that is not a "Letter" or "Number".
212 proc an_load_unicodedata_text {zName} {
213 set fd [open $zName]
214 set lField {
215 code
216 character_name
217 general_category
218 canonical_combining_classes
219 bidirectional_category
220 character_decomposition_mapping
221 decimal_digit_value
222 digit_value
223 numeric_value
224 mirrored
225 unicode_1_name
226 iso10646_comment_field
227 uppercase_mapping
228 lowercase_mapping
229 titlecase_mapping
231 set lRet [list]
233 while { ![eof $fd] } {
234 set line [gets $fd]
235 if {$line == ""} continue
237 set fields [split $line ";"]
238 if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
239 foreach $lField $fields {}
241 set iCode [expr "0x$code"]
242 set bAlnum [expr {
243 [lsearch {L N} [string range $general_category 0 0]] >= 0
244 || $general_category=="Co"
247 if { !$bAlnum } { lappend lRet $iCode }
250 close $fd
251 set lRet
254 proc an_load_separator_ranges {} {
255 global unicodedata.txt
256 set lSep [an_load_unicodedata_text ${unicodedata.txt}]
257 unset -nocomplain iFirst
258 unset -nocomplain nRange
259 set lRange [list]
260 foreach sep $lSep {
261 if {0==[info exists iFirst]} {
262 set iFirst $sep
263 set nRange 1
264 } elseif { $sep == ($iFirst+$nRange) } {
265 incr nRange
266 } else {
267 lappend lRange [list $iFirst $nRange]
268 set iFirst $sep
269 set nRange 1
272 lappend lRange [list $iFirst $nRange]
273 set lRange
276 proc an_print_range_array {lRange} {
277 set iFirstMax 0
278 set nRangeMax 0
279 foreach range $lRange {
280 foreach {iFirst nRange} $range {}
281 if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}
282 if {$nRange > $nRangeMax} {set nRangeMax $nRange}
284 if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}
285 if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}
287 puts -nonewline " "
288 puts [string trim {
289 /* Each unsigned integer in the following array corresponds to a contiguous
290 ** range of unicode codepoints that are not either letters or numbers (i.e.
291 ** codepoints for which this function should return 0).
293 ** The most significant 22 bits in each 32-bit value contain the first
294 ** codepoint in the range. The least significant 10 bits are used to store
295 ** the size of the range (always at least 1). In other words, the value
296 ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
297 ** C. It is not possible to represent a range larger than 1023 codepoints
298 ** using this format.
301 puts -nonewline " static const unsigned int aEntry\[\] = \{"
302 set i 0
303 foreach range $lRange {
304 foreach {iFirst nRange} $range {}
305 set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]
307 if {($i % 5)==0} {puts "" ; puts -nonewline " "}
308 puts -nonewline " $u32,"
309 incr i
311 puts ""
312 puts " \};"
315 proc an_print_ascii_bitmap {lRange} {
316 foreach range $lRange {
317 foreach {iFirst nRange} $range {}
318 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {
319 if {$i<=127} { set a($i) 1 }
323 set aAscii [list 0 0 0 0]
324 foreach key [array names a] {
325 set idx [expr $key >> 5]
326 lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))]
329 puts " static const unsigned int aAscii\[4\] = \{"
330 puts -nonewline " "
331 foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }
332 puts ""
333 puts " \};"
336 proc print_isalnum {zFunc lRange} {
337 puts "/*"
338 puts "** Return true if the argument corresponds to a unicode codepoint"
339 puts "** classified as either a letter or a number. Otherwise false."
340 puts "**"
341 puts "** The results are undefined if the value passed to this function"
342 puts "** is less than zero."
343 puts "*/"
344 puts "int ${zFunc}\(int c)\{"
345 an_print_range_array $lRange
346 an_print_ascii_bitmap $lRange
347 puts {
348 if( c<128 ){
349 return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
350 }else if( c<(1<<22) ){
351 unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
352 int iRes = 0;
353 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
354 int iLo = 0;
355 while( iHi>=iLo ){
356 int iTest = (iHi + iLo) / 2;
357 if( key >= aEntry[iTest] ){
358 iRes = iTest;
359 iLo = iTest+1;
360 }else{
361 iHi = iTest-1;
364 assert( aEntry[0]<key );
365 assert( key>=aEntry[iRes] );
366 return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
368 return 1;}
369 puts "\}"
372 proc print_test_isalnum {zFunc lRange} {
373 foreach range $lRange {
374 foreach {iFirst nRange} $range {}
375 for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }
378 puts "static int isalnum_test(int *piCode)\{"
379 puts -nonewline " unsigned char aAlnum\[\] = \{"
380 for {set i 0} {$i < 70000} {incr i} {
381 if {($i % 32)==0} { puts "" ; puts -nonewline " " }
382 set bFlag [expr ![info exists a($i)]]
383 puts -nonewline "${bFlag},"
385 puts ""
386 puts " \};"
388 puts -nonewline " int aLargeSep\[\] = \{"
389 set i 0
390 foreach iSep [lsort -integer [array names a]] {
391 if {$iSep<70000} continue
392 if {($i % 8)==0} { puts "" ; puts -nonewline " " }
393 puts -nonewline " $iSep,"
394 incr i
396 puts ""
397 puts " \};"
398 puts -nonewline " int aLargeOther\[\] = \{"
399 set i 0
400 foreach iSep [lsort -integer [array names a]] {
401 if {$iSep<70000} continue
402 if {[info exists a([expr $iSep-1])]==0} {
403 if {($i % 8)==0} { puts "" ; puts -nonewline " " }
404 puts -nonewline " [expr $iSep-1],"
405 incr i
407 if {[info exists a([expr $iSep+1])]==0} {
408 if {($i % 8)==0} { puts "" ; puts -nonewline " " }
409 puts -nonewline " [expr $iSep+1],"
410 incr i
413 puts ""
414 puts " \};"
416 puts [subst -nocommands {
417 int i;
418 for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){
419 if( ${zFunc}(i)!=aAlnum[i] ){
420 *piCode = i;
421 return 1;
424 for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){
425 if( ${zFunc}(aLargeSep[i])!=0 ){
426 *piCode = aLargeSep[i];
427 return 1;
430 for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){
431 if( ${zFunc}(aLargeOther[i])!=1 ){
432 *piCode = aLargeOther[i];
433 return 1;
437 puts " return 0;"
438 puts "\}"
441 #-------------------------------------------------------------------------
443 proc tl_load_casefolding_txt {zName} {
444 global tl_lookup_table
446 set fd [open $zName]
447 while { ![eof $fd] } {
448 set line [gets $fd]
449 if {[string range $line 0 0] == "#"} continue
450 if {$line == ""} continue
452 foreach x {a b c d} {unset -nocomplain $x}
453 foreach {a b c d} [split $line ";"] {}
455 set a2 [list]
456 set c2 [list]
457 foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
458 foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
459 set b [string trim $b]
460 set d [string trim $d]
462 if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
466 proc tl_create_records {} {
467 global tl_lookup_table
469 set iFirst ""
470 set nOff 0
471 set nRange 0
472 set nIncr 0
474 set lRecord [list]
475 foreach code [lsort -integer [array names tl_lookup_table]] {
476 set mapping $tl_lookup_table($code)
477 if {$iFirst == ""} {
478 set iFirst $code
479 set nOff [expr $mapping - $code]
480 set nRange 1
481 set nIncr 1
482 } else {
483 set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]
484 if { $nRange==1 && ($diff==1 || $diff==2) } {
485 set nIncr $diff
488 if {$diff != $nIncr || ($mapping - $code)!=$nOff} {
489 if { $nRange==1 } {set nIncr 1}
490 lappend lRecord [list $iFirst $nIncr $nRange $nOff]
491 set iFirst $code
492 set nOff [expr $mapping - $code]
493 set nRange 1
494 set nIncr 1
495 } else {
496 incr nRange
501 lappend lRecord [list $iFirst $nIncr $nRange $nOff]
503 set lRecord
506 proc tl_print_table_header {} {
507 puts -nonewline " "
508 puts [string trim {
509 /* Each entry in the following array defines a rule for folding a range
510 ** of codepoints to lower case. The rule applies to a range of nRange
511 ** codepoints starting at codepoint iCode.
513 ** If the least significant bit in flags is clear, then the rule applies
514 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
515 ** need to be folded). Or, if it is set, then the rule only applies to
516 ** every second codepoint in the range, starting with codepoint C.
518 ** The 7 most significant bits in flags are an index into the aiOff[]
519 ** array. If a specific codepoint C does require folding, then its lower
520 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
522 ** The contents of this array are generated by parsing the CaseFolding.txt
523 ** file distributed as part of the "Unicode Character Database". See
524 ** http://www.unicode.org for details.
527 puts " static const struct TableEntry \{"
528 puts " unsigned short iCode;"
529 puts " unsigned char flags;"
530 puts " unsigned char nRange;"
531 puts " \} aEntry\[\] = \{"
534 proc tl_print_table_entry {togglevar entry liOff} {
535 upvar $togglevar t
536 foreach {iFirst nIncr nRange nOff} $entry {}
538 if {$iFirst > (1<<16)} { return 1 }
540 if {[info exists t]==0} {set t 0}
541 if {$t==0} { puts -nonewline " " }
543 set flags 0
544 if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}
545 if {$nOff<0} { incr nOff [expr (1<<16)] }
547 set idx [lsearch $liOff $nOff]
548 if {$idx<0} {error "malfunction generating aiOff"}
549 set flags [expr $flags + $idx*2]
551 set txt "{$iFirst, $flags, $nRange},"
552 if {$t==2} {
553 puts $txt
554 } else {
555 puts -nonewline [format "% -23s" $txt]
557 set t [expr ($t+1)%3]
559 return 0
562 proc tl_print_table_footer {togglevar} {
563 upvar $togglevar t
564 if {$t!=0} {puts ""}
565 puts " \};"
568 proc tl_print_if_entry {entry} {
569 foreach {iFirst nIncr nRange nOff} $entry {}
570 if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}
572 puts " else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"
573 puts " ret = c + $nOff;"
574 puts " \}"
577 proc tl_generate_ioff_table {lRecord} {
578 foreach entry $lRecord {
579 foreach {iFirst nIncr nRange iOff} $entry {}
580 if {$iOff<0} { incr iOff [expr (1<<16)] }
581 if {[info exists a($iOff)]} continue
582 set a($iOff) 1
585 set liOff [lsort -integer [array names a]]
586 if {[llength $liOff]>128} { error "Too many distinct ioffs" }
587 return $liOff
590 proc tl_print_ioff_table {liOff} {
591 puts -nonewline " static const unsigned short aiOff\[\] = \{"
592 set i 0
593 foreach off $liOff {
594 if {($i % 8)==0} {puts "" ; puts -nonewline " "}
595 puts -nonewline [format "% -7s" "$off,"]
596 incr i
598 puts ""
599 puts " \};"
603 proc print_fold {zFunc} {
605 set lRecord [tl_create_records]
607 set lHigh [list]
608 puts "/*"
609 puts "** Interpret the argument as a unicode codepoint. If the codepoint"
610 puts "** is an upper case character that has a lower case equivalent,"
611 puts "** return the codepoint corresponding to the lower case version."
612 puts "** Otherwise, return a copy of the argument."
613 puts "**"
614 puts "** The results are undefined if the value passed to this function"
615 puts "** is less than zero."
616 puts "*/"
617 puts "int ${zFunc}\(int c, int bRemoveDiacritic)\{"
619 set liOff [tl_generate_ioff_table $lRecord]
620 tl_print_table_header
621 foreach entry $lRecord {
622 if {[tl_print_table_entry toggle $entry $liOff]} {
623 lappend lHigh $entry
626 tl_print_table_footer toggle
627 tl_print_ioff_table $liOff
629 puts {
630 int ret = c;
632 assert( c>=0 );
633 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
635 if( c<128 ){
636 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
637 }else if( c<65536 ){
638 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
639 int iLo = 0;
640 int iRes = -1;
642 while( iHi>=iLo ){
643 int iTest = (iHi + iLo) / 2;
644 int cmp = (c - aEntry[iTest].iCode);
645 if( cmp>=0 ){
646 iRes = iTest;
647 iLo = iTest+1;
648 }else{
649 iHi = iTest-1;
652 assert( iRes<0 || c>=aEntry[iRes].iCode );
654 if( iRes>=0 ){
655 const struct TableEntry *p = &aEntry[iRes];
656 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
657 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
658 assert( ret>0 );
662 if( bRemoveDiacritic ) ret = remove_diacritic(ret);
666 foreach entry $lHigh {
667 tl_print_if_entry $entry
670 puts ""
671 puts " return ret;"
672 puts "\}"
675 proc print_fold_test {zFunc mappings} {
676 global tl_lookup_table
678 foreach m $mappings {
679 set c [lindex $m 1]
680 if {$c == ""} {
681 set extra([lindex $m 0]) 0
682 } else {
683 scan $c %c i
684 set extra([lindex $m 0]) $i
688 puts "static int fold_test(int *piCode)\{"
689 puts -nonewline " static int aLookup\[\] = \{"
690 for {set i 0} {$i < 70000} {incr i} {
692 set expected $i
693 catch { set expected $tl_lookup_table($i) }
694 set expected2 $expected
695 catch { set expected2 $extra($expected2) }
697 if {($i % 4)==0} { puts "" ; puts -nonewline " " }
698 puts -nonewline "$expected, $expected2, "
700 puts " \};"
701 puts " int i;"
702 puts " for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"
703 puts " int iCode = (i/2);"
704 puts " int bFlag = i & 0x0001;"
705 puts " if( ${zFunc}\(iCode, bFlag)!=aLookup\[i\] )\{"
706 puts " *piCode = iCode;"
707 puts " return 1;"
708 puts " \}"
709 puts " \}"
710 puts " return 0;"
711 puts "\}"
715 proc print_fileheader {} {
716 puts [string trim {
718 ** 2012 May 25
720 ** The author disclaims copyright to this source code. In place of
721 ** a legal notice, here is a blessing:
723 ** May you do good and not evil.
724 ** May you find forgiveness for yourself and forgive others.
725 ** May you share freely, never taking more than you give.
727 ******************************************************************************
731 ** DO NOT EDIT THIS MACHINE GENERATED FILE.
734 puts ""
735 puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
736 puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
737 puts ""
738 puts "#include <assert.h>"
739 puts ""
742 proc print_test_main {} {
743 puts ""
744 puts "#include <stdio.h>"
745 puts ""
746 puts "int main(int argc, char **argv)\{"
747 puts " int r1, r2;"
748 puts " int code;"
749 puts " r1 = isalnum_test(&code);"
750 puts " if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"
751 puts " else printf(\"isalnum(): test passed\\n\");"
752 puts " r2 = fold_test(&code);"
753 puts " if( r2 ) printf(\"fold(): Problem with code %d\\n\",code);"
754 puts " else printf(\"fold(): test passed\\n\");"
755 puts " return (r1 || r2);"
756 puts "\}"
759 # Proces the command line arguments. Exit early if they are not to
760 # our liking.
762 proc usage {} {
763 puts -nonewline stderr "Usage: $::argv0 ?-test? "
764 puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
765 exit 1
767 if {[llength $argv]!=2 && [llength $argv]!=3} usage
768 if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
769 set unicodedata.txt [lindex $argv end]
770 set casefolding.txt [lindex $argv end-1]
771 set generate_test_code [expr {[llength $argv]==3}]
773 print_fileheader
775 # Print the isalnum() function to stdout.
777 set lRange [an_load_separator_ranges]
778 print_isalnum sqlite3FtsUnicodeIsalnum $lRange
780 # Leave a gap between the two generated C functions.
782 puts ""
783 puts ""
785 # Load the fold data. This is used by the [rd_XXX] commands
786 # as well as [print_fold].
787 tl_load_casefolding_txt ${casefolding.txt}
789 set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
790 print_rd $mappings
791 puts ""
792 puts ""
793 print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
794 puts ""
795 puts ""
797 # Print the fold() function to stdout.
799 print_fold sqlite3FtsUnicodeFold
801 # Print the test routines and main() function to stdout, if -test
802 # was specified.
804 if {$::generate_test_code} {
805 print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
806 print_fold_test sqlite3FtsUnicodeFold $mappings
807 print_test_main
810 puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
811 puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"