3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
10 #***********************************************************************
12 # This file contains common code used the fts3 tests. At one point
13 # equivalent functionality was implemented in C code. But it is easier
17 #-------------------------------------------------------------------------
18 # USAGE: fts3_integrity_check TBL
20 # This proc is used to verify that the full-text index is consistent with
21 # the contents of the fts3 table. In other words, it checks that the
22 # data in the %_contents table matches that in the %_segdir and %_segments
25 # This is not an efficient procedure. It uses a lot of memory and a lot
26 # of CPU. But it is better than not checking at all.
30 # 1) Read the entire full-text index from the %_segdir and %_segments
31 # tables into memory. For each entry in the index, the following is
34 # set C($iDocid,$iCol,$iPosition) $zTerm
36 # 2) Iterate through each column of each row of the %_content table.
37 # Tokenize all documents, and check that for each token there is
38 # a corresponding entry in the $C array. After checking a token,
39 # [unset] the $C array entry.
41 # 3) Check that array $C is now empty.
44 proc fts3_integrity_check
{tbl
} {
48 foreach zTerm
[array names A
] {
49 foreach doclist
$A($zTerm) {
51 while {[string length
$doclist]>0} {
57 # First varint of a doclist-entry is the docid. Delta-compressed
58 # with respect to the docid of the previous entry.
60 incr docid
[gobble_varint doclist
]
61 if {[info exists D
($zTerm,$docid)]} {
62 while {[set iDelta
[gobble_varint doclist
]] != 0} {}
65 set D
($zTerm,$docid) 1
67 # Gobble varints until the 0x00 that terminates the doclist-entry
69 while {[set iDelta
[gobble_varint doclist
]] > 0} {
71 set iCol
[gobble_varint doclist
]
76 set C
($docid,$iCol,$iPos) $zTerm
83 foreach key
[array names C
] {
84 #puts "$key -> $C($key)"
88 db
eval "SELECT * FROM ${tbl}_content" E
{
91 foreach col
[lrange $E(*) 1 end
] {
93 set sql
{SELECT fts3_tokenizer_test
('simple'
, $c)}
95 foreach {pos term dummy
} [db one
$sql] {
96 if {![info exists C
($iDoc,$iCol,$pos)]} {
97 set es
"Error at docid=$iDoc col=$iCol pos=$pos. Index is missing"
100 if {$C($iDoc,$iCol,$pos) != "$term"} {
101 set es
"Error at docid=$iDoc col=$iCol pos=$pos. Index "
102 append es
"has \"$C($iDoc,$iCol,$pos)\", document has \"$term\""
105 unset C
($iDoc,$iCol,$pos)
112 foreach c
[array names C
] {
113 lappend errors
"Bad index entry: $c -> $C($c)"
116 if {[info exists errors
]} { return [join $errors "\n"] }
120 # USAGE: fts3_terms TBL WHERE
122 # Argument TBL must be the name of an FTS3 table. Argument WHERE is an
123 # SQL expression that will be used as the WHERE clause when scanning
124 # the %_segdir table. As in the following query:
126 # "SELECT * FROM ${TBL}_segdir WHERE ${WHERE}"
128 # This function returns a list of all terms present in the segments
129 # selected by the statement above.
131 proc fts3_terms
{tbl where
} {
132 fts3_read
$tbl $where a
133 return [lsort [array names a
]]
137 # USAGE: fts3_doclist TBL TERM WHERE
139 # Argument TBL must be the name of an FTS3 table. TERM is a term that may
140 # or may not be present in the table. Argument WHERE is used to select a
141 # subset of the b-tree segments in the associated full-text index as
142 # described above for [fts3_terms].
144 # This function returns the results of merging the doclists associated
145 # with TERM in the selected segments. Each doclist is an element of the
146 # returned list. Each doclist is formatted as follows:
148 # [$docid ?$col[$off1 $off2...]?...]
150 # The formatting is odd for a Tcl command in order to be compatible with
151 # the original C-language implementation. If argument WHERE is "1", then
152 # any empty doclists are omitted from the returned list.
154 proc fts3_doclist
{tbl term where
} {
155 fts3_read
$tbl $where a
158 foreach doclist
$a($term) {
161 while {[string length
$doclist]>0} {
166 incr docid
[gobble_varint doclist
]
168 while {[set iDelta
[gobble_varint doclist
]] > 0} {
170 lappend lCol
[list $iCol $lPos]
173 set iCol
[gobble_varint doclist
]
181 if {[llength $lPos]>0} {
182 lappend lCol
[list $iCol $lPos]
185 if {$where != "1" ||
[llength $lCol]>0} {
186 set ret
($docid) $lCol
188 unset -nocomplain ret
($docid)
194 foreach docid
[lsort -integer [array names ret
]] {
197 foreach col
$ret($docid) {
198 foreach {iCol lPos
} $col {}
199 append cols
" $iCol\[[join $lPos { }]\]"
201 lappend lDoc
"\[${docid}${cols}\]"
207 ###########################################################################
209 proc gobble_varint
{varname
} {
211 set n
[read_fts3varint
$blob ret
]
212 set blob
[string range
$blob $n end
]
215 proc gobble_string
{varname nLength
} {
217 set ret
[string range
$blob 0 [expr $nLength-1]]
218 set blob
[string range
$blob $nLength end
]
222 # The argument is a blob of data representing an FTS3 segment leaf.
223 # Return a list consisting of alternating terms (strings) and doclists
226 proc fts3_readleaf
{blob
} {
230 while {[string length
$blob] > 0} {
231 set nPrefix
[gobble_varint blob
]
232 set nSuffix
[gobble_varint blob
]
234 set zTerm
[string range
$zPrev 0 [expr $nPrefix-1]]
235 append zTerm
[gobble_string blob
$nSuffix]
236 set doclist
[gobble_string blob
[gobble_varint blob
]]
238 lappend terms
$zTerm $doclist
245 proc fts3_read2
{tbl where varname
} {
248 db
eval " SELECT start_block, leaves_end_block, root
249 FROM ${tbl}_segdir WHERE $where
250 ORDER BY level ASC, idx DESC
252 if {$start_block == 0} {
253 foreach {t d
} [fts3_readleaf
$root] { lappend a
($t) $d }
255 db
eval " SELECT block
257 WHERE blockid>=$start_block AND blockid<=$leaves_end_block
260 foreach {t d
} [fts3_readleaf
$block] { lappend a
($t) $d }
267 proc fts3_read
{tbl where varname
} {
270 db
eval " SELECT start_block, leaves_end_block, root
271 FROM ${tbl}_segdir WHERE $where
272 ORDER BY level DESC, idx ASC
274 if {$start_block == 0} {
275 foreach {t d
} [fts3_readleaf
$root] { lappend a
($t) $d }
277 db
eval " SELECT block
279 WHERE blockid>=$start_block AND blockid<$leaves_end_block
282 foreach {t d
} [fts3_readleaf
$block] { lappend a
($t) $d }
289 ##########################################################################