3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
10 #***********************************************************************
13 if {![info exists testdir
]} {
14 set testdir
[file join [file dirname
[info script
]] .. .. .. test
]
16 source $testdir/tester.tcl
19 proc return_if_no_fts5
{} {
25 proc return_if_no_fts5
{} {}
29 sqlite3_fts5_may_be_corrupt
0
33 proc fts5_test_poslist
{cmd
} {
35 for {set i
0} {$i < [$cmd xInstCount
]} {incr i
} {
36 lappend res
[string map
{{ } .
} [$cmd xInst
$i]]
41 proc fts5_test_poslist2
{cmd
} {
44 for {set i
0} {$i < [$cmd xPhraseCount
]} {incr i
} {
45 $cmd xPhraseForeach
$i c o
{
54 proc fts5_test_collist
{cmd
} {
57 for {set i
0} {$i < [$cmd xPhraseCount
]} {incr i
} {
58 $cmd xPhraseColumnForeach
$i c
{ lappend res
$i.
$c }
64 proc fts5_collist
{cmd iPhrase
} {
66 $cmd xPhraseColumnForeach
$iPhrase c
{ lappend res
$c }
70 proc fts5_test_columnsize
{cmd
} {
72 for {set i
0} {$i < [$cmd xColumnCount
]} {incr i
} {
73 lappend res
[$cmd xColumnSize
$i]
78 proc fts5_columntext
{cmd iCol
} {
79 $cmd xColumnText
$iCol
82 proc fts5_test_columntext
{cmd
} {
84 for {set i
0} {$i < [$cmd xColumnCount
]} {incr i
} {
85 lappend res
[$cmd xColumnText
$i]
90 proc fts5_test_columntotalsize
{cmd
} {
92 for {set i
0} {$i < [$cmd xColumnCount
]} {incr i
} {
93 lappend res
[$cmd xColumnTotalSize
$i]
98 proc test_append_token
{varname token iStart iEnd
} {
103 proc fts5_test_tokenize
{cmd
} {
105 for {set i
0} {$i < [$cmd xColumnCount
]} {incr i
} {
107 $cmd xTokenize
[$cmd xColumnText
$i] [list test_append_token tokens
]
113 proc fts5_test_rowcount
{cmd
} {
117 proc test_queryphrase_cb
{cnt cmd
} {
119 for {set i
0} {$i < [$cmd xInstCount
]} {incr i
} {
120 foreach {ip ic io
} [$cmd xInst
$i] break
123 foreach ic
[array names A
] {
124 lset L
$ic [expr {[lindex $L $ic] + 1}]
127 proc fts5_test_queryphrase
{cmd
} {
129 for {set i
0} {$i < [$cmd xPhraseCount
]} {incr i
} {
131 for {set j
0} {$j < [$cmd xColumnCount
]} {incr j
} { lappend cnt
0 }
132 $cmd xQueryPhrase
$i [list test_queryphrase_cb cnt
]
138 proc fts5_queryphrase
{cmd iPhrase
} {
140 for {set j
0} {$j < [$cmd xColumnCount
]} {incr j
} { lappend cnt
0 }
141 $cmd xQueryPhrase
$iPhrase [list test_queryphrase_cb cnt
]
145 proc fts5_test_phrasecount
{cmd
} {
149 proc fts5_test_all
{cmd
} {
151 lappend res columnsize
[fts5_test_columnsize
$cmd]
152 lappend res columntext
[fts5_test_columntext
$cmd]
153 lappend res columntotalsize
[fts5_test_columntotalsize
$cmd]
154 lappend res poslist
[fts5_test_poslist
$cmd]
155 lappend res tokenize
[fts5_test_tokenize
$cmd]
156 lappend res rowcount
[fts5_test_rowcount
$cmd]
160 proc fts5_aux_test_functions
{db
} {
164 fts5_test_columntotalsize
172 fts5_test_queryphrase
173 fts5_test_phrasecount
178 sqlite3_fts5_create_function
$db $f $f
182 proc fts5_segcount
{tbl
} {
184 foreach n
[fts5_level_segs
$tbl] { incr N
$n }
188 proc fts5_level_segs
{tbl
} {
189 set sql
"SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
191 foreach L
[lrange [db one
$sql] 1 end
] {
192 lappend ret
[expr [llength $L] - 3]
197 proc fts5_level_segids
{tbl
} {
198 set sql
"SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
200 foreach L
[lrange [db one
$sql] 1 end
] {
202 foreach S
[lrange $L 3 end
] {
203 regexp {id
=([1234567890]*)} $S -> segid
211 proc fts5_rnddoc
{n
} {
212 set map
[list 0 a
1 b
2 c
3 d
4 e
5 f
6 g
7 h
8 i
9 j
]
214 for {set i
0} {$i < $n} {incr i
} {
215 lappend doc
"x[string map $map [format %.3d [expr int(rand()*1000)]]]"
220 #-------------------------------------------------------------------------
223 # nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
225 # This command is used to test if a document (set of column values) matches
226 # the logical equivalent of a single FTS5 NEAR() clump and, if so, return
227 # the equivalent of an FTS5 position list.
229 # Parameter $aCol is passed a list of the column values for the document
230 # to test. Parameters $phrase1 and so on are the phrases.
232 # The result is a list of phrase hits. Each phrase hit is formatted as
233 # three integers separated by "." characters, in the following format:
235 # <phrase number> . <column number> . <token offset>
239 # -near N (NEAR distance. Default 10)
240 # -col C (List of column indexes to match against)
241 # -pc VARNAME (variable in caller frame to use for phrase numbering)
242 # -dict VARNAME (array in caller frame to use for synonyms)
244 proc nearset
{aCol args
} {
246 # Process the command line options.
253 set nOpt
[lsearch -exact $args --]
254 if {$nOpt<0} { error "no -- option" }
256 # Set $lPhrase to be a list of phrases. $nPhrase its length.
257 set lPhrase
[lrange $args [expr $nOpt+1] end
]
258 set nPhrase
[llength $lPhrase]
260 foreach {k v
} [lrange $args 0 [expr $nOpt-1]] {
261 if {[info exists O
($k)]==0} { error "unrecognized option $k" }
268 upvar $O(-pc) counter
271 if {$O(-dict)!=""} { upvar $O(-dict) aDict
}
273 for {set j
0} {$j < [llength $aCol]} {incr j
} {
274 for {set i
0} {$i < $nPhrase} {incr i
} {
279 # Loop through each column of the current row.
280 for {set iCol
0} {$iCol < [llength $aCol]} {incr iCol
} {
282 # If there is a column filter, test whether this column is excluded. If
283 # so, skip to the next iteration of this loop. Otherwise, set zCol to the
284 # column value and nToken to the number of tokens that comprise it.
285 if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
286 set zCol
[lindex $aCol $iCol]
287 set nToken
[llength $zCol]
289 # Each iteration of the following loop searches a substring of the
290 # column value for phrase matches. The last token of the substring
291 # is token $iLast of the column value. The first token is:
293 # iFirst = ($iLast - $O(-near) - 1)
295 # where $sz is the length of the phrase being searched for. A phrase
296 # counts as matching the substring if its first token lies on or before
297 # $iLast and its last token on or after $iFirst.
299 # For example, if the query is "NEAR(a+b c, 2)" and the column value:
301 # "x x x x A B x x C x"
302 # 0 1 2 3 4 5 6 7 8 9"
304 # when (iLast==8 && iFirst=5) the range will contain both phrases and
305 # so both instances can be added to the output poslists.
307 set iLast
[expr $O(-near) >= $nToken ?
$nToken - 1 : $O(-near)]
308 for { } {$iLast < $nToken} {incr iLast
} {
310 catch { array unset B
}
312 for {set iPhrase
0} {$iPhrase<$nPhrase} {incr iPhrase
} {
313 set p
[lindex $lPhrase $iPhrase]
314 set nPm1
[expr {[llength $p] - 1}]
315 set iFirst
[expr $iLast - $O(-near) - [llength $p]]
317 for {set i
$iFirst} {$i <= $iLast} {incr i
} {
318 set lCand
[lrange $zCol $i [expr $i+$nPm1]]
320 foreach tok
$p term
$lCand {
321 if {[nearset_match aDict
$tok $term]==0} { set bMatch
0 ; break }
323 if {$bMatch} { lappend B
($iPhrase) $i }
326 if {![info exists B
($iPhrase)]} break
329 if {$iPhrase==$nPhrase} {
330 for {set iPhrase
0} {$iPhrase<$nPhrase} {incr iPhrase
} {
331 set A
($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
332 set A
($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
339 #puts [array names A]
341 for {set iPhrase
0} {$iPhrase<$nPhrase} {incr iPhrase
} {
342 for {set iCol
0} {$iCol < [llength $aCol]} {incr iCol
} {
343 foreach a
$A($iCol,$iPhrase) {
344 lappend res
"$counter.$iCol.$a"
350 #puts "$aCol -> $res"
354 proc nearset_match
{aDictVar tok term
} {
355 if {[string match
$tok $term]} { return 1 }
357 upvar $aDictVar aDict
358 if {[info exists aDict
($tok)]} {
359 foreach s
$aDict($tok) {
360 if {[string match
$s $term]} { return 1 }
366 #-------------------------------------------------------------------------
371 # Sort a position list of the type returned by command [nearset]
373 proc sort_poslist
{L
} {
374 lsort -command instcompare
$L
376 proc instcompare
{lhs rhs
} {
377 foreach {p1 c1 o1
} [split $lhs .
] {}
378 foreach {p2 c2 o2
} [split $rhs .
] {}
380 set res
[expr $c1 - $c2]
381 if {$res==0} { set res
[expr $o1 - $o2] }
382 if {$res==0} { set res
[expr $p1 - $p2] }
387 #-------------------------------------------------------------------------
388 # Logical operators used by the commands returned by fts5_tcl_expr().
392 if {[llength $a]==0} { return [list] }
394 sort_poslist
[concat {*}$args]
397 sort_poslist
[concat {*}$args]
400 if {[llength $b]>0} { return [list] }
404 #-------------------------------------------------------------------------
405 # This command is similar to [split], except that it also provides the
406 # start and end offsets of each token. For example:
408 # [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
411 proc gobble_whitespace
{textvar
} {
413 regexp {([ ]*)(.
*)} $t -> space t
414 return [string length
$space]
417 proc gobble_text
{textvar wordvar
} {
420 regexp {([^
]*)(.
*)} $t -> w t
421 return [string length
$w]
424 proc fts5_tokenize_split
{text} {
427 set iOff
[gobble_whitespace
text]
428 while {[set nToken
[gobble_text
text word
]]} {
429 lappend ret
$word $iOff [expr $iOff+$nToken]
431 incr iOff
[gobble_whitespace
text]
437 #-------------------------------------------------------------------------
439 proc foreach_detail_mode
{prefix script
} {
440 set saved
$::testprefix
441 foreach d
[list full col none
] {
442 set s
[string map
[list %DETAIL
% $d] $script]
444 set ::testprefix "$prefix-$d"
449 set ::testprefix $saved
452 proc detail_check
{} {
453 if {$::detail != "none" && $::detail!="full" && $::detail!="col"} {
454 error "not in foreach_detail_mode {...} block"
457 proc detail_is_none
{} { detail_check
; expr {$::detail == "none"} }
458 proc detail_is_col
{} { detail_check
; expr {$::detail == "col" } }
459 proc detail_is_full
{} { detail_check
; expr {$::detail == "full"} }
461 proc foreach_tokenizer_mode
{prefix script
} {
462 set saved
$::testprefix
463 foreach {d mapping
} {
465 "-origintext" {, tokenize
="origintext unicode61", tokendata
=1}
467 set s
[string map
[list %TOKENIZER
% $mapping] $script]
468 set ::testprefix "$prefix$d"
470 sqlite3_fts5_register_origintext db
473 set ::testprefix $saved
476 #-------------------------------------------------------------------------
477 # Convert a poslist of the type returned by fts5_test_poslist() to a
478 # collist as returned by fts5_test_collist().
480 proc fts5_poslist2collist
{poslist
} {
483 regexp {(.
*)\.
[1234567890]+} $h -> cand
486 set res
[lsort -command fts5_collist_elem_compare
-unique $res]
490 # Comparison function used by fts5_poslist2collist to sort collist entries.
491 proc fts5_collist_elem_compare
{a b
} {
492 foreach {a1 a2
} [split $a .
] {}
493 foreach {b1 b2
} [split $b .
] {}
495 if {$a1==$b1} { return [expr $a2 - $b2] }
496 return [expr $a1 - $b1]
500 #--------------------------------------------------------------------------
501 # Construct and return a tcl list equivalent to that returned by the SQL
502 # query executed against database handle [db]:
506 # fts5_test_poslist($tbl),
507 # fts5_test_collist($tbl)
509 # ORDER BY rowid $order;
511 proc fts5_query_data
{expr tbl
{order ASC
} {aDictVar
""}} {
513 # Figure out the set of columns in the FTS5 table. This routine does
514 # not handle tables with UNINDEXED columns, but if it did, it would
516 db
eval "PRAGMA table_info = $tbl" x
{ lappend lCols
$x(name
) }
519 if {$aDictVar != ""} {
520 upvar $aDictVar aDict
525 foreach e
$lCols { append cols
", '$e'" }
526 set tclexpr
[db one
[subst -novar {
527 SELECT fts5_expr_tcl
( $expr, 'nearset
$cols -dict $d -pc ::pc'
[set cols
] )
531 db
eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x
{
533 foreach col
$lCols { lappend cols
$x($col) }
536 set rowdata
[eval $tclexpr]
537 if {$rowdata != ""} {
538 lappend res
$x(rowid
) $rowdata [fts5_poslist2collist
$rowdata]
545 #-------------------------------------------------------------------------
546 # Similar to [fts5_query_data], but omit the collist field.
548 proc fts5_poslist_data
{expr tbl
{order ASC
} {aDictVar
""}} {
552 upvar $aDictVar aDict
558 foreach {rowid poslist collist
} [fts5_query_data
$expr $tbl $order $dict] {
559 lappend res
$rowid $poslist
564 proc fts5_collist_data
{expr tbl
{order ASC
} {aDictVar
""}} {
568 upvar $aDictVar aDict
574 foreach {rowid poslist collist
} [fts5_query_data
$expr $tbl $order $dict] {
575 lappend res
$rowid $collist
580 #-------------------------------------------------------------------------
583 # This command will only work inside a [foreach_detail_mode] block. It tests
584 # whether or not expression $expr run on FTS5 table $tbl is supported by
585 # the current mode. If so, 1 is returned. If not, 0.
587 # detail=full (all queries supported)
588 # detail=col (all but phrase queries and NEAR queries)
589 # detail=none (all but phrase queries, NEAR queries, and column filters)
591 proc fts5_expr_ok
{expr tbl
} {
593 if {![detail_is_full
]} {
594 set nearset
"nearset_rc"
595 if {[detail_is_col
]} { set nearset
"nearset_rf" }
598 db
eval "PRAGMA table_info = $tbl" x
{ lappend lCols
$x(name
) }
601 foreach e
$lCols { append cols
", '$e'" }
603 set tclexpr
[db one
[subst -novar {
604 SELECT fts5_expr_tcl
( $expr, '
[set nearset
] $cols -pc ::pc'
[set cols
] )
607 if {$::expr_not_ok} { return 0 }
613 # Helper for [fts5_expr_ok]
614 proc nearset_rf
{aCol args
} {
615 set idx
[lsearch -exact $args --]
616 if {$idx != [llength $args]-2 ||
[llength [lindex $args end
]]!=1} {
622 # Helper for [fts5_expr_ok]
623 proc nearset_rc
{aCol args
} {
624 nearset_rf
$aCol {*}$args
625 if {[lsearch $args -col]>=0} {
632 execsql_pp
"SELECT * FROM ${tname}_idx"
633 execsql_pp
"SELECT id, quote(block), fts5_decode(id,block) FROM ${tname}_data"
636 #-------------------------------------------------------------------------
637 # Code for a simple Tcl tokenizer that supports synonyms at query time.
639 proc tclnum_tokenize
{mode tflags
text} {
640 foreach {w iStart iEnd
} [fts5_tokenize_split
$text] {
641 sqlite3_fts5_token
$w $iStart $iEnd
642 if {$tflags == $mode && [info exists
::tclnum_syn($w)]} {
643 foreach s
$::tclnum_syn($w) { sqlite3_fts5_token
-colo $s $iStart $iEnd }
648 proc tclnum_create
{args
} {
650 if {[llength $args]} {
651 set mode
[lindex $args 0]
653 if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }
654 return [list tclnum_tokenize
$mode]
657 proc fts5_tclnum_register
{db
} {
670 {a1 a2 a3 a4 a5 a6 a7 a8 a9
}
671 {b1 b2 b3 b4 b5 b6 b7 b8 b9
}
672 {c1 c2 c3 c4 c5 c6 c7 c8 c9
}
676 foreach x
$SYNDICT {if {$x!=$s} {lappend o
$x}}
677 set ::tclnum_syn($s) $o
680 sqlite3_fts5_create_tokenizer db tclnum tclnum_create
683 # End of tokenizer code.
684 #-------------------------------------------------------------------------