3 # May you do good and not evil.
4 # May you find forgiveness for yourself and forgive others.
5 # May you share freely, never taking more than you give.
7 #***********************************************************************
9 # Brute force (random data) tests for FTS3.
12 #-------------------------------------------------------------------------
14 # The FTS3 tests implemented in this file focus on testing that FTS3
15 # returns the correct set of documents for various types of full-text
16 # query. This is done using pseudo-randomly generated data and queries.
17 # The expected result of each query is calculated using Tcl code.
19 # 1. The database is initialized to contain a single table with three
20 # columns. 100 rows are inserted into the table. Each of the three
21 # values in each row is a document consisting of between 0 and 100
22 # terms. Terms are selected from a vocabulary of $G(nVocab) terms.
24 # 2. The following is performed 100 times:
26 # a. A row is inserted into the database. The row contents are
27 # generated as in step 1. The docid is a pseudo-randomly selected
28 # value between 0 and 1000000.
30 # b. A psuedo-randomly selected row is updated. One of its columns is
31 # set to contain a new document generated in the same way as the
32 # documents in step 1.
34 # c. A psuedo-randomly selected row is deleted.
36 # d. For each of several types of fts3 queries, 10 SELECT queries
39 # SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>'
41 # are evaluated. The results are compared to those calculated by
42 # Tcl code in this file. The patterns used for the different query
47 # 3. query = "<term> <term>"
48 # 4. query = "<term> <term> <term>"
49 # 5. query = "<prefix> <prefix> <prefix>"
50 # 6. query = <term> NEAR <term>
51 # 7. query = <term> NEAR/11 <term> NEAR/11 <term>
52 # 8. query = <term> OR <term>
53 # 9. query = <term> NOT <term>
54 # 10. query = <term> AND <term>
55 # 11. query = <term> NEAR <term> OR <term> NEAR <term>
56 # 12. query = <term> NEAR <term> NOT <term> NEAR <term>
57 # 13. query = <term> NEAR <term> AND <term> NEAR <term>
59 # where <term> is a term psuedo-randomly selected from the vocabulary
60 # and prefix is the first 2 characters of such a term followed by
63 # Every second iteration, steps (a) through (d) above are performed
64 # within a single transaction. This forces the queries in (d) to
65 # read data from both the database and the in-memory hash table
66 # that caches the full-text index entries created by steps (a), (b)
67 # and (c) until the transaction is committed.
69 # The procedure above is run 5 times, using advisory fts3 node sizes of 50,
70 # 500, 1000 and 2000 bytes.
72 # After the test using an advisory node-size of 50, an OOM test is run using
73 # the database. This test is similar to step (d) above, except that it tests
74 # the effects of transient and persistent OOM conditions encountered while
75 # executing each query.
78 set testdir [file dirname $argv0]
79 source $testdir/tester.tcl
81 # If this build does not include FTS3, skip the tests in this file.
83 ifcapable !fts3 { finish_test ; return }
84 source $testdir/fts3_common.tcl
85 source $testdir/malloc_common.tcl
94 # Generate a vocabulary of nVocab words. Each word is 3 characters long.
96 set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z}
97 for {set i 0} {$i < $nVocab} {incr i} {
98 set len [expr int(rand()*3)+2]
99 set word [lindex $lChar [expr int(rand()*26)]]
100 append word [lindex $lChar [expr int(rand()*26)]]
101 if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] }
102 if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] }
106 proc random_term {} {
107 lindex $::lVocab [expr {int(rand()*$::nVocab)}]
110 # Return a document consisting of $nWord arbitrarily selected terms
111 # from the $::lVocab list.
113 proc generate_doc {nWord} {
115 for {set i 0} {$i < $nWord} {incr i} {
116 lappend doc [random_term]
123 # Primitives to update the table.
126 proc insert_row {rowid} {
127 set a [generate_doc [expr int((rand()*100))]]
128 set b [generate_doc [expr int((rand()*100))]]
129 set c [generate_doc [expr int((rand()*100))]]
130 execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) }
131 set ::t1($rowid) [list $a $b $c]
133 proc delete_row {rowid} {
134 execsql { DELETE FROM t1 WHERE rowid = $rowid }
135 catch {unset ::t1($rowid)}
137 proc update_row {rowid} {
139 set iCol [expr int(rand()*3)]
140 set doc [generate_doc [expr int((rand()*100))]]
141 lset ::t1($rowid) $iCol $doc
142 execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid"
145 proc simple_phrase {zPrefix} {
148 set reg [string map {* {[^ ]*}} $zPrefix]
151 foreach key [lsort -integer [array names ::t1]] {
152 set value $::t1($key)
155 if {[regexp $reg " $col "]} { lappend ret $key ; break }
159 #lsort -uniq -integer $ret
163 # This [proc] is used to test the FTS3 matchinfo() function.
165 proc simple_token_matchinfo {zToken} {
175 foreach key [array names ::t1] {
176 set value $::t1($key)
178 foreach i {0 1 2} col $value {
179 set hit [llength [lsearch -all $col $zToken]]
182 if {$hit>0} { incr nDoc($i) }
187 foreach docid [lsort -integer [array names a]] {
188 if { [lindex [lsort -integer $a($docid)] end] } {
189 set matchinfo [list 1 3]
190 foreach i {0 1 2} hit $a($docid) {
191 lappend matchinfo $hit $nHit($i) $nDoc($i)
193 lappend ret $docid $matchinfo
200 proc simple_near {termlist nNear} {
203 foreach {key value} [array get ::t1] {
206 set l [lsearch -exact -all $v [lindex $termlist 0]]
207 foreach T [lrange $termlist 1 end] {
210 set iStart [expr $i - $nNear - 1]
211 set iEnd [expr $i + $nNear + 1]
212 if {$iStart < 0} {set iStart 0}
213 foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] {
215 if {$i2 != $i} { lappend l2 $i2 }
218 set l [lsort -uniq -integer $l2]
222 #puts "MATCH($key): $v"
228 lsort -unique -integer $ret
231 # The following three procs:
237 # each take two arguments. Both arguments must be lists of integer values
238 # sorted by value. The return value is the list produced by evaluating
239 # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or
242 proc setop_not {A B} {
243 foreach b $B { set n($b) {} }
245 foreach a $A { if {![info exists n($a)]} {lappend ret $a} }
248 proc setop_or {A B} {
249 lsort -integer -uniq [concat $A $B]
251 proc setop_and {A B} {
252 foreach b $B { set n($b) {} }
254 foreach a $A { if {[info exists n($a)]} {lappend ret $a} }
259 set scan(littleEndian) i*
260 set scan(bigEndian) I*
261 binary scan $blob $scan($::tcl_platform(byteOrder)) r
266 set sqlite_fts3_enable_parentheses 1
268 foreach nodesize {50 500 1000 2000} {
269 catch { array unset ::t1 }
271 # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows.
274 catchsql { DROP TABLE t1 }
275 execsql "CREATE VIRTUAL TABLE t1 USING fts3(a, b, c)"
276 execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')"
277 for {set i 0} {$i < 100} {incr i} { insert_row $i }
280 for {set iTest 1} {$iTest <= 100} {incr iTest} {
285 if {$iTest==100 && $nodesize==50} {
290 # Delete one row, update one row and insert one row.
292 set rows [array names ::t1]
293 set nRow [llength $rows]
294 set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]]
296 while {$iDelete == $iUpdate} {
297 set iDelete [lindex $rows [expr {int(rand()*$nRow)}]]
300 while {[info exists ::t1($iInsert)]} {
301 set iInsert [expr {int(rand()*1000000)}]
307 if {0==($iTest%2)} { execsql COMMIT }
310 do_test fts3rnd-1.$nodesize.$iTest.0 { fts3_integrity_check t1 } ok
313 # Pick 10 terms from the vocabulary. Check that the results of querying
314 # the database for the set of documents containing each of these terms
315 # is the same as the result obtained by scanning the contents of the Tcl
316 # array for each term.
318 for {set i 0} {$i < 10} {incr i} {
319 set term [random_term]
320 do_select_test fts3rnd-1.$nodesize.$iTest.1.$i {
321 SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term
322 } [simple_token_matchinfo $term]
325 # This time, use the first two characters of each term as a term prefix
326 # to query for. Test that querying the Tcl array produces the same results
327 # as querying the FTS3 table for the prefix.
329 for {set i 0} {$i < $nRep} {incr i} {
330 set prefix [string range [random_term] 0 end-1]
331 set match "${prefix}*"
332 do_select_test fts3rnd-1.$nodesize.$iTest.2.$i {
333 SELECT docid FROM t1 WHERE t1 MATCH $match
334 } [simple_phrase $match]
337 # Similar to the above, except for phrase queries.
339 for {set i 0} {$i < $nRep} {incr i} {
340 set term [list [random_term] [random_term]]
341 set match "\"$term\""
342 do_select_test fts3rnd-1.$nodesize.$iTest.3.$i {
343 SELECT docid FROM t1 WHERE t1 MATCH $match
344 } [simple_phrase $term]
347 # Three word phrases.
349 for {set i 0} {$i < $nRep} {incr i} {
350 set term [list [random_term] [random_term] [random_term]]
351 set match "\"$term\""
352 do_select_test fts3rnd-1.$nodesize.$iTest.4.$i {
353 SELECT docid FROM t1 WHERE t1 MATCH $match
354 } [simple_phrase $term]
357 # Three word phrases made up of term-prefixes.
359 for {set i 0} {$i < $nRep} {incr i} {
360 set query "[string range [random_term] 0 end-1]* "
361 append query "[string range [random_term] 0 end-1]* "
362 append query "[string range [random_term] 0 end-1]*"
364 set match "\"$query\""
365 do_select_test fts3rnd-1.$nodesize.$iTest.5.$i {
366 SELECT docid FROM t1 WHERE t1 MATCH $match
367 } [simple_phrase $query]
370 # A NEAR query with terms as the arguments.
372 for {set i 0} {$i < $nRep} {incr i} {
373 set terms [list [random_term] [random_term]]
374 set match [join $terms " NEAR "]
375 do_select_test fts3rnd-1.$nodesize.$iTest.6.$i {
376 SELECT docid FROM t1 WHERE t1 MATCH $match
377 } [simple_near $terms 10]
380 # A 3-way NEAR query with terms as the arguments.
382 for {set i 0} {$i < $nRep} {incr i} {
383 set terms [list [random_term] [random_term] [random_term]]
385 set match [join $terms " NEAR/$nNear "]
386 do_select_test fts3rnd-1.$nodesize.$iTest.7.$i {
387 SELECT docid FROM t1 WHERE t1 MATCH $match
388 } [simple_near $terms $nNear]
391 # Set operations on simple term queries.
393 foreach {tn op proc} {
398 for {set i 0} {$i < $nRep} {incr i} {
399 set term1 [random_term]
400 set term2 [random_term]
401 set match "$term1 $op $term2"
402 do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {
403 SELECT docid FROM t1 WHERE t1 MATCH $match
404 } [$proc [simple_phrase $term1] [simple_phrase $term2]]
408 # Set operations on NEAR queries.
410 foreach {tn op proc} {
415 for {set i 0} {$i < $nRep} {incr i} {
416 set term1 [random_term]
417 set term2 [random_term]
418 set term3 [random_term]
419 set term4 [random_term]
420 set match "$term1 NEAR $term2 $op $term3 NEAR $term4"
421 do_select_test fts3rnd-1.$nodesize.$iTest.$tn.$i {
422 SELECT docid FROM t1 WHERE t1 MATCH $match
424 [simple_near [list $term1 $term2] 10] \
425 [simple_near [list $term3 $term4] 10]