4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
29 ** * all segment b-tree leaf data is stored in fixed size page records
30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 ** taken to ensure it is possible to iterate in either direction through
32 ** the entries in a doclist, or to seek to a specific entry within a
33 ** doclist, without loading it into memory.
35 ** * large doclists that span many pages have associated "doclist index"
36 ** records that contain a copy of the first rowid on each page spanned by
37 ** the doclist. This is used to speed up seek operations, and merges of
38 ** large doclists with very small doclists.
40 ** * extra fields in the "structure record" record the state of ongoing
41 ** incremental merge operations.
46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
51 #define FTS5_MAIN_PREFIX '0'
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
57 #define FTS5_MAX_LEVEL 64
60 ** There are two versions of the format used for the structure record:
62 ** 1. the legacy format, that may be read by all fts5 versions, and
64 ** 2. the V2 format, which is used by contentless_delete=1 databases.
66 ** Both begin with a 4-byte "configuration cookie" value. Then, a legacy
67 ** format structure record contains a varint - the number of levels in
68 ** the structure. Whereas a V2 structure record contains the constant
69 ** 4 bytes [0xff 0x00 0x00 0x01]. This is unambiguous as the value of a
70 ** varint has to be at least 16256 to begin with "0xFF". And the default
71 ** maximum number of levels is 64.
73 ** See below for more on structure record formats.
75 #define FTS5_STRUCTURE_V2 "\xFF\x00\x00\x01"
80 ** The %_data table managed by this module,
82 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
84 ** , contains the following 6 types of records. See the comments surrounding
85 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
86 ** assigned to each fo them.
88 ** 1. Structure Records:
90 ** The set of segments that make up an index - the index structure - are
91 ** recorded in a single record within the %_data table. The record consists
92 ** of a single 32-bit configuration cookie value followed by a list of
95 ** If the structure record is a V2 record, the configuration cookie is
96 ** followed by the following 4 bytes: [0xFF 0x00 0x00 0x01].
98 ** Next, the record continues with three varints:
100 ** + number of levels,
101 ** + total number of segments on all levels,
102 ** + value of write counter.
104 ** Then, for each level from 0 to nMax:
106 ** + number of input segments in ongoing merge.
107 ** + total number of segments in level.
108 ** + for each segment from oldest to newest:
109 ** + segment id (always > 0)
110 ** + first leaf page number (often 1, always greater than 0)
111 ** + final leaf page number
113 ** Then, for V2 structures only:
115 ** + lower origin counter value,
116 ** + upper origin counter value,
117 ** + the number of tombstone hash pages.
119 ** 2. The Averages Record:
121 ** A single record within the %_data table. The data is a list of varints.
122 ** The first value is the number of rows in the index. Then, for each column
123 ** from left to right, the total number of tokens in the column for all
124 ** rows of the table.
126 ** 3. Segment leaves:
128 ** TERM/DOCLIST FORMAT:
130 ** Most of each segment leaf is taken up by term/doclist data. The
131 ** general format of term/doclist, starting with the first term
132 ** on the leaf page, is:
134 ** varint : size of first term
135 ** blob: first term data
136 ** doclist: first doclist
138 ** varint: number of bytes in common with previous term
139 ** varint: number of bytes of new term data (nNew)
140 ** blob: nNew bytes of new term data
141 ** doclist: next doclist
146 ** varint: first rowid
147 ** poslist: first poslist
149 ** varint: rowid delta (always > 0)
150 ** poslist: next poslist
155 ** varint: size of poslist in bytes multiplied by 2, not including
156 ** this field. Plus 1 if this entry carries the "delete" flag.
157 ** collist: collist for column 0
160 ** varint: column number (I)
161 ** collist: collist for column I
166 ** varint: first offset + 2
168 ** varint: offset delta + 2
173 ** Each leaf page begins with a 4-byte header containing 2 16-bit
174 ** unsigned integer fields in big-endian format. They are:
176 ** * The byte offset of the first rowid on the page, if it exists
177 ** and occurs before the first term (otherwise 0).
179 ** * The byte offset of the start of the page footer. If the page
180 ** footer is 0 bytes in size, then this field is the same as the
181 ** size of the leaf page in bytes.
183 ** The page footer consists of a single varint for each term located
184 ** on the page. Each varint is the byte offset of the current term
185 ** within the page, delta-compressed against the previous value. In
186 ** other words, the first varint in the footer is the byte offset of
187 ** the first term, the second is the byte offset of the second less that
188 ** of the first, and so on.
190 ** The term/doclist format described above is accurate if the entire
191 ** term/doclist data fits on a single leaf page. If this is not the case,
192 ** the format is changed in two ways:
194 ** + if the first rowid on a page occurs before the first term, it
195 ** is stored as a literal value:
197 ** varint: first rowid
199 ** + the first term on each page is stored in the same way as the
200 ** very first term of the segment:
202 ** varint : size of first term
203 ** blob: first term data
205 ** 5. Segment doclist indexes:
207 ** Doclist indexes are themselves b-trees, however they usually consist of
208 ** a single leaf record only. The format of each doclist index leaf page
211 ** * Flags byte. Bits are:
212 ** 0x01: Clear if leaf is also the root page, otherwise set.
214 ** * Page number of fts index leaf page. As a varint.
216 ** * First rowid on page indicated by previous field. As a varint.
218 ** * A list of varints, one for each subsequent termless page. A
219 ** positive delta if the termless page contains at least one rowid,
220 ** or an 0x00 byte otherwise.
222 ** Internal doclist index nodes are:
224 ** * Flags byte. Bits are:
225 ** 0x01: Clear for root page, otherwise set.
227 ** * Page number of first child page. As a varint.
229 ** * Copy of first rowid on page indicated by previous field. As a varint.
231 ** * A list of delta-encoded varints - the first rowid on each subsequent
234 ** 6. Tombstone Hash Page
236 ** These records are only ever present in contentless_delete=1 tables.
237 ** There are zero or more of these associated with each segment. They
238 ** are used to store the tombstone rowids for rows contained in the
239 ** associated segments.
241 ** The set of nHashPg tombstone hash pages associated with a single
242 ** segment together form a single hash table containing tombstone rowids.
243 ** To find the page of the hash on which a key might be stored:
245 ** iPg = (rowid % nHashPg)
247 ** Then, within page iPg, which has nSlot slots:
249 ** iSlot = (rowid / nHashPg) % nSlot
251 ** Each tombstone hash page begins with an 8 byte header:
253 ** 1-byte: Key-size (the size in bytes of each slot). Either 4 or 8.
254 ** 1-byte: rowid-0-tombstone flag. This flag is only valid on the
255 ** first tombstone hash page for each segment (iPg=0). If set,
256 ** the hash table contains rowid 0. If clear, it does not.
257 ** Rowid 0 is handled specially.
259 ** 4-bytes: Big-endian integer containing number of entries on page.
261 ** Following this are nSlot 4 or 8 byte slots (depending on the key-size
262 ** in the first byte of the page header). The number of slots may be
263 ** determined based on the size of the page record and the key-size:
265 ** nSlot = (nByte - 8) / key-size
269 ** Rowids for the averages and structure records in the %_data table.
271 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
272 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */
275 ** Macros determining the rowids used by segment leaves and dlidx leaves
276 ** and nodes. All nodes and leaves are stored in the %_data table with large
279 ** Each segment has a unique non-zero 16-bit id.
281 ** The rowid for each segment leaf is found by passing the segment id and
282 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
283 ** sequentially starting from 1.
285 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
286 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
287 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
288 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
290 #define fts5_dri(segid, dlidx, height, pgno) ( \
291 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
292 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
293 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
297 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
298 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
299 #define FTS5_TOMBSTONE_ROWID(segid,ipg) fts5_dri(segid+(1<<16), 0, 0, ipg)
302 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB
; }
307 ** Each time a blob is read from the %_data table, it is padded with this
308 ** many zero bytes. This makes it easier to decode the various record formats
309 ** without overreading if the records are corrupt.
311 #define FTS5_DATA_ZERO_PADDING 8
312 #define FTS5_DATA_PADDING 20
314 typedef struct Fts5Data Fts5Data
;
315 typedef struct Fts5DlidxIter Fts5DlidxIter
;
316 typedef struct Fts5DlidxLvl Fts5DlidxLvl
;
317 typedef struct Fts5DlidxWriter Fts5DlidxWriter
;
318 typedef struct Fts5Iter Fts5Iter
;
319 typedef struct Fts5PageWriter Fts5PageWriter
;
320 typedef struct Fts5SegIter Fts5SegIter
;
321 typedef struct Fts5DoclistIter Fts5DoclistIter
;
322 typedef struct Fts5SegWriter Fts5SegWriter
;
323 typedef struct Fts5Structure Fts5Structure
;
324 typedef struct Fts5StructureLevel Fts5StructureLevel
;
325 typedef struct Fts5StructureSegment Fts5StructureSegment
;
326 typedef struct Fts5TokenDataIter Fts5TokenDataIter
;
327 typedef struct Fts5TokenDataMap Fts5TokenDataMap
;
328 typedef struct Fts5TombstoneArray Fts5TombstoneArray
;
331 u8
*p
; /* Pointer to buffer containing record */
332 int nn
; /* Size of record in bytes */
333 int szLeaf
; /* Size of leaf without page-index */
337 ** One object per %_data table.
339 ** nContentlessDelete:
340 ** The number of contentless delete operations since the most recent
341 ** call to fts5IndexFlush() or fts5IndexDiscardData(). This is tracked
342 ** so that extra auto-merge work can be done by fts5IndexFlush() to
343 ** account for the delete operations.
346 Fts5Config
*pConfig
; /* Virtual table configuration */
347 char *zDataTbl
; /* Name of %_data table */
348 int nWorkUnit
; /* Leaf pages in a "unit" of work */
351 ** Variables related to the accumulation of tokens and doclists within the
352 ** in-memory hash tables before they are flushed to disk.
354 Fts5Hash
*pHash
; /* Hash table for in-memory data */
355 int nPendingData
; /* Current bytes of pending data */
356 i64 iWriteRowid
; /* Rowid for current doc being written */
357 int bDelete
; /* Current write is a delete */
358 int nContentlessDelete
; /* Number of contentless delete ops */
359 int nPendingRow
; /* Number of INSERT in hash table */
362 int rc
; /* Current error code */
365 /* State used by the fts5DataXXX() functions. */
366 sqlite3_blob
*pReader
; /* RO incr-blob open on %_data table */
367 sqlite3_stmt
*pWriter
; /* "INSERT ... %_data VALUES(?,?)" */
368 sqlite3_stmt
*pDeleter
; /* "DELETE FROM %_data ... id>=? AND id<=?" */
369 sqlite3_stmt
*pIdxWriter
; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
370 sqlite3_stmt
*pIdxDeleter
; /* "DELETE FROM %_idx WHERE segid=?" */
371 sqlite3_stmt
*pIdxSelect
;
372 sqlite3_stmt
*pIdxNextSelect
;
373 int nRead
; /* Total number of blocks read */
375 sqlite3_stmt
*pDeleteFromIdx
;
377 sqlite3_stmt
*pDataVersion
;
378 i64 iStructVersion
; /* data_version when pStruct read */
379 Fts5Structure
*pStruct
; /* Current db structure (or NULL) */
382 struct Fts5DoclistIter
{
383 u8
*aEof
; /* Pointer to 1 byte past end of doclist */
385 /* Output variables. aPoslist==0 at EOF */
393 ** The contents of the "structure" record for each index are represented
394 ** using an Fts5Structure record in memory. Which uses instances of the
395 ** other Fts5StructureXXX types as components.
398 ** This value is set to non-zero for structure records created for
399 ** contentlessdelete=1 tables only. In that case it represents the
400 ** origin value to apply to the next top-level segment created.
402 struct Fts5StructureSegment
{
403 int iSegid
; /* Segment id */
404 int pgnoFirst
; /* First leaf page number in segment */
405 int pgnoLast
; /* Last leaf page number in segment */
407 /* contentlessdelete=1 tables only: */
410 int nPgTombstone
; /* Number of tombstone hash table pages */
411 u64 nEntryTombstone
; /* Number of tombstone entries that "count" */
412 u64 nEntry
; /* Number of rows in this segment */
414 struct Fts5StructureLevel
{
415 int nMerge
; /* Number of segments in incr-merge */
416 int nSeg
; /* Total number of segments on level */
417 Fts5StructureSegment
*aSeg
; /* Array of segments. aSeg[0] is oldest. */
419 struct Fts5Structure
{
420 int nRef
; /* Object reference count */
421 u64 nWriteCounter
; /* Total leaves written to level 0 */
422 u64 nOriginCntr
; /* Origin value for next top-level segment */
423 int nSegment
; /* Total segments in this structure */
424 int nLevel
; /* Number of levels in this index */
425 Fts5StructureLevel aLevel
[1]; /* Array of nLevel level objects */
429 ** An object of type Fts5SegWriter is used to write to segments.
431 struct Fts5PageWriter
{
432 int pgno
; /* Page number for this page */
433 int iPrevPgidx
; /* Previous value written into pgidx */
434 Fts5Buffer buf
; /* Buffer containing leaf data */
435 Fts5Buffer pgidx
; /* Buffer containing page-index */
436 Fts5Buffer term
; /* Buffer containing previous term on page */
438 struct Fts5DlidxWriter
{
439 int pgno
; /* Page number for this page */
440 int bPrevValid
; /* True if iPrev is valid */
441 i64 iPrev
; /* Previous rowid value written to page */
442 Fts5Buffer buf
; /* Buffer containing page data */
444 struct Fts5SegWriter
{
445 int iSegid
; /* Segid to write to */
446 Fts5PageWriter writer
; /* PageWriter object */
447 i64 iPrevRowid
; /* Previous rowid written to current leaf */
448 u8 bFirstRowidInDoclist
; /* True if next rowid is first in doclist */
449 u8 bFirstRowidInPage
; /* True if next rowid is first in page */
450 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
451 u8 bFirstTermInPage
; /* True if next term will be first in leaf */
452 int nLeafWritten
; /* Number of leaf pages written */
453 int nEmpty
; /* Number of contiguous term-less nodes */
455 int nDlidx
; /* Allocated size of aDlidx[] array */
456 Fts5DlidxWriter
*aDlidx
; /* Array of Fts5DlidxWriter objects */
458 /* Values to insert into the %_idx table */
459 Fts5Buffer btterm
; /* Next term to insert into %_idx table */
460 int iBtPage
; /* Page number corresponding to btterm */
463 typedef struct Fts5CResult Fts5CResult
;
465 u16 iFirst
; /* aSeg[] index of firstest iterator */
466 u8 bTermEq
; /* True if the terms are equal */
470 ** Object for iterating through a single segment, visiting each term/rowid
471 ** pair in the segment.
474 ** The segment to iterate through.
477 ** Current leaf page number within segment.
480 ** Byte offset within the current leaf that is the first byte of the
481 ** position list data (one byte passed the position-list size field).
484 ** Buffer containing current leaf page data. Set to NULL at EOF.
486 ** iTermLeafPgno, iTermLeafOffset:
487 ** Leaf page number containing the last term read from the segment. And
488 ** the offset immediately following the term data.
491 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
493 ** FTS5_SEGITER_ONETERM:
494 ** If set, set the iterator to point to EOF after the current doclist
495 ** has been exhausted. Do not proceed to the next term in the segment.
497 ** FTS5_SEGITER_REVERSE:
498 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
499 ** it is set, iterate through rowid in descending order instead of the
500 ** default ascending order.
502 ** iRowidOffset/nRowidOffset/aRowidOffset:
503 ** These are used if the FTS5_SEGITER_REVERSE flag is set.
505 ** For each rowid on the page corresponding to the current term, the
506 ** corresponding aRowidOffset[] entry is set to the byte offset of the
507 ** start of the "position-list-size" field within the page.
510 ** Index of current term on iTermLeafPgno.
512 ** apTombstone/nTombstone:
513 ** These are used for contentless_delete=1 tables only. When the cursor
514 ** is first allocated, the apTombstone[] array is allocated so that it
515 ** is large enough for all tombstones hash pages associated with the
516 ** segment. The pages themselves are loaded lazily from the database as
517 ** they are required.
520 Fts5StructureSegment
*pSeg
; /* Segment to iterate through */
521 int flags
; /* Mask of configuration flags */
522 int iLeafPgno
; /* Current leaf page number */
523 Fts5Data
*pLeaf
; /* Current leaf data */
524 Fts5Data
*pNextLeaf
; /* Leaf page (iLeafPgno+1) */
525 i64 iLeafOffset
; /* Byte offset within current leaf */
526 Fts5TombstoneArray
*pTombArray
; /* Array of tombstone pages */
529 void (*xNext
)(Fts5Index
*, Fts5SegIter
*, int*);
531 /* The page and offset from which the current term was read. The offset
532 ** is the offset of the first rowid in the current doclist. */
536 int iPgidxOff
; /* Next offset in pgidx */
539 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
540 int iRowidOffset
; /* Current entry in aRowidOffset[] */
541 int nRowidOffset
; /* Allocated size of aRowidOffset[] array */
542 int *aRowidOffset
; /* Array of offset to rowid fields */
544 Fts5DlidxIter
*pDlidx
; /* If there is a doclist-index */
546 /* Variables populated based on current entry. */
547 Fts5Buffer term
; /* Current term */
548 i64 iRowid
; /* Current rowid */
549 int nPos
; /* Number of bytes in current position list */
550 u8 bDel
; /* True if the delete flag is set */
554 ** Array of tombstone pages. Reference counted.
556 struct Fts5TombstoneArray
{
557 int nRef
; /* Number of pointers to this object */
559 Fts5Data
*apTombstone
[1]; /* Array of tombstone pages */
563 ** Argument is a pointer to an Fts5Data structure that contains a
566 #define ASSERT_SZLEAF_OK(x) assert( \
567 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
570 #define FTS5_SEGITER_ONETERM 0x01
571 #define FTS5_SEGITER_REVERSE 0x02
574 ** Argument is a pointer to an Fts5Data structure that contains a leaf
575 ** page. This macro evaluates to true if the leaf contains no terms, or
576 ** false if it contains at least one term.
578 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
580 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
582 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
585 ** Object for iterating through the merged results of one or more segments,
586 ** visiting each term/rowid pair in the merged data.
588 ** nSeg is always a power of two greater than or equal to the number of
589 ** segments that this object is merging data from. Both the aSeg[] and
590 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
591 ** with zeroed objects - these are handled as if they were iterators opened
592 ** on empty segments.
594 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
595 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
596 ** comparison in this context is the index of the iterator that currently
597 ** points to the smaller term/rowid combination. Iterators at EOF are
598 ** considered to be greater than all other iterators.
600 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
601 ** the smallest key overall. aFirst[0] is unused.
604 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
605 ** There is no way to tell if this is populated or not.
608 ** If not NULL, points to an object containing a set of column indices.
609 ** Only matches that occur in one of these columns will be returned.
610 ** The Fts5Iter does not own the Fts5Colset object, and so it is not
611 ** freed when the iterator is closed - it is owned by the upper layer.
614 Fts5IndexIter base
; /* Base class containing output vars */
615 Fts5TokenDataIter
*pTokenDataIter
;
617 Fts5Index
*pIndex
; /* Index that owns this iterator */
618 Fts5Buffer poslist
; /* Buffer containing current poslist */
619 Fts5Colset
*pColset
; /* Restrict matches to these columns */
621 /* Invoked to set output variables. */
622 void (*xSetOutputs
)(Fts5Iter
*, Fts5SegIter
*);
624 int nSeg
; /* Size of aSeg[] array */
625 int bRev
; /* True to iterate in reverse order */
626 u8 bSkipEmpty
; /* True to skip deleted entries */
628 i64 iSwitchRowid
; /* Firstest rowid of other than aFirst[1] */
629 Fts5CResult
*aFirst
; /* Current merge state (see above) */
630 Fts5SegIter aSeg
[1]; /* Array of segment iterators */
634 ** An instance of the following type is used to iterate through the contents
635 ** of a doclist-index record.
638 ** Record containing the doclist-index data.
641 ** Set to true once iterator has reached EOF.
644 ** Set to the current offset within record pData.
646 struct Fts5DlidxLvl
{
647 Fts5Data
*pData
; /* Data for current page of this level */
648 int iOff
; /* Current offset into pData */
649 int bEof
; /* At EOF already */
650 int iFirstOff
; /* Used by reverse iterators */
652 /* Output variables */
653 int iLeafPgno
; /* Page number of current leaf page */
654 i64 iRowid
; /* First rowid on leaf iLeafPgno */
656 struct Fts5DlidxIter
{
659 Fts5DlidxLvl aLvl
[1];
662 static void fts5PutU16(u8
*aOut
, u16 iVal
){
664 aOut
[1] = (iVal
&0xFF);
667 static u16
fts5GetU16(const u8
*aIn
){
668 return ((u16
)aIn
[0] << 8) + aIn
[1];
672 ** The only argument points to a buffer at least 8 bytes in size. This
673 ** function interprets the first 8 bytes of the buffer as a 64-bit big-endian
674 ** unsigned integer and returns the result.
676 static u64
fts5GetU64(u8
*a
){
677 return ((u64
)a
[0] << 56)
688 ** The only argument points to a buffer at least 4 bytes in size. This
689 ** function interprets the first 4 bytes of the buffer as a 32-bit big-endian
690 ** unsigned integer and returns the result.
692 static u32
fts5GetU32(const u8
*a
){
693 return ((u32
)a
[0] << 24)
700 ** Write iVal, formated as a 64-bit big-endian unsigned integer, to the
701 ** buffer indicated by the first argument.
703 static void fts5PutU64(u8
*a
, u64 iVal
){
704 a
[0] = ((iVal
>> 56) & 0xFF);
705 a
[1] = ((iVal
>> 48) & 0xFF);
706 a
[2] = ((iVal
>> 40) & 0xFF);
707 a
[3] = ((iVal
>> 32) & 0xFF);
708 a
[4] = ((iVal
>> 24) & 0xFF);
709 a
[5] = ((iVal
>> 16) & 0xFF);
710 a
[6] = ((iVal
>> 8) & 0xFF);
711 a
[7] = ((iVal
>> 0) & 0xFF);
715 ** Write iVal, formated as a 32-bit big-endian unsigned integer, to the
716 ** buffer indicated by the first argument.
718 static void fts5PutU32(u8
*a
, u32 iVal
){
719 a
[0] = ((iVal
>> 24) & 0xFF);
720 a
[1] = ((iVal
>> 16) & 0xFF);
721 a
[2] = ((iVal
>> 8) & 0xFF);
722 a
[3] = ((iVal
>> 0) & 0xFF);
726 ** Allocate and return a buffer at least nByte bytes in size.
728 ** If an OOM error is encountered, return NULL and set the error code in
729 ** the Fts5Index handle passed as the first argument.
731 static void *fts5IdxMalloc(Fts5Index
*p
, sqlite3_int64 nByte
){
732 return sqlite3Fts5MallocZero(&p
->rc
, nByte
);
736 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
738 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
739 ** +ve if pRight is smaller than pLeft. In other words:
741 ** res = *pLeft - *pRight
744 static int fts5BufferCompareBlob(
745 Fts5Buffer
*pLeft
, /* Left hand side of comparison */
746 const u8
*pRight
, int nRight
/* Right hand side of comparison */
748 int nCmp
= MIN(pLeft
->n
, nRight
);
749 int res
= memcmp(pLeft
->p
, pRight
, nCmp
);
750 return (res
==0 ? (pLeft
->n
- nRight
) : res
);
755 ** Compare the contents of the two buffers using memcmp(). If one buffer
756 ** is a prefix of the other, it is considered the lesser.
758 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
759 ** +ve if pRight is smaller than pLeft. In other words:
761 ** res = *pLeft - *pRight
763 static int fts5BufferCompare(Fts5Buffer
*pLeft
, Fts5Buffer
*pRight
){
765 nCmp
= MIN(pLeft
->n
, pRight
->n
);
766 assert( nCmp
<=0 || pLeft
->p
!=0 );
767 assert( nCmp
<=0 || pRight
->p
!=0 );
768 res
= fts5Memcmp(pLeft
->p
, pRight
->p
, nCmp
);
769 return (res
==0 ? (pLeft
->n
- pRight
->n
) : res
);
772 static int fts5LeafFirstTermOff(Fts5Data
*pLeaf
){
774 fts5GetVarint32(&pLeaf
->p
[pLeaf
->szLeaf
], ret
);
779 ** Close the read-only blob handle, if it is open.
781 void sqlite3Fts5IndexCloseReader(Fts5Index
*p
){
783 sqlite3_blob
*pReader
= p
->pReader
;
785 sqlite3_blob_close(pReader
);
790 ** Retrieve a record from the %_data table.
792 ** If an error occurs, NULL is returned and an error left in the
795 static Fts5Data
*fts5DataRead(Fts5Index
*p
, i64 iRowid
){
797 if( p
->rc
==SQLITE_OK
){
801 /* This call may return SQLITE_ABORT if there has been a savepoint
802 ** rollback since it was last used. In this case a new blob handle
804 sqlite3_blob
*pBlob
= p
->pReader
;
806 rc
= sqlite3_blob_reopen(pBlob
, iRowid
);
807 assert( p
->pReader
==0 );
810 sqlite3Fts5IndexCloseReader(p
);
812 if( rc
==SQLITE_ABORT
) rc
= SQLITE_OK
;
815 /* If the blob handle is not open at this point, open it and seek
816 ** to the requested entry. */
817 if( p
->pReader
==0 && rc
==SQLITE_OK
){
818 Fts5Config
*pConfig
= p
->pConfig
;
819 rc
= sqlite3_blob_open(pConfig
->db
,
820 pConfig
->zDb
, p
->zDataTbl
, "block", iRowid
, 0, &p
->pReader
824 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
825 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
826 ** All the reasons those functions might return SQLITE_ERROR - missing
827 ** table, missing row, non-blob/text in block column - indicate
828 ** backing store corruption. */
829 if( rc
==SQLITE_ERROR
) rc
= FTS5_CORRUPT
;
832 u8
*aOut
= 0; /* Read blob data into this buffer */
833 int nByte
= sqlite3_blob_bytes(p
->pReader
);
834 sqlite3_int64 nAlloc
= sizeof(Fts5Data
) + nByte
+ FTS5_DATA_PADDING
;
835 pRet
= (Fts5Data
*)sqlite3_malloc64(nAlloc
);
838 aOut
= pRet
->p
= (u8
*)&pRet
[1];
844 rc
= sqlite3_blob_read(p
->pReader
, aOut
, nByte
, 0);
850 /* TODO1: Fix this */
851 pRet
->p
[nByte
] = 0x00;
852 pRet
->p
[nByte
+1] = 0x00;
853 pRet
->szLeaf
= fts5GetU16(&pRet
->p
[2]);
860 assert( (pRet
==0)==(p
->rc
!=SQLITE_OK
) );
866 ** Release a reference to data record returned by an earlier call to
869 static void fts5DataRelease(Fts5Data
*pData
){
873 static Fts5Data
*fts5LeafRead(Fts5Index
*p
, i64 iRowid
){
874 Fts5Data
*pRet
= fts5DataRead(p
, iRowid
);
876 if( pRet
->nn
<4 || pRet
->szLeaf
>pRet
->nn
){
877 p
->rc
= FTS5_CORRUPT
;
878 fts5DataRelease(pRet
);
885 static int fts5IndexPrepareStmt(
887 sqlite3_stmt
**ppStmt
,
890 if( p
->rc
==SQLITE_OK
){
892 p
->rc
= sqlite3_prepare_v3(p
->pConfig
->db
, zSql
, -1,
893 SQLITE_PREPARE_PERSISTENT
|SQLITE_PREPARE_NO_VTAB
,
896 p
->rc
= SQLITE_NOMEM
;
905 ** INSERT OR REPLACE a record into the %_data table.
907 static void fts5DataWrite(Fts5Index
*p
, i64 iRowid
, const u8
*pData
, int nData
){
908 if( p
->rc
!=SQLITE_OK
) return;
911 Fts5Config
*pConfig
= p
->pConfig
;
912 fts5IndexPrepareStmt(p
, &p
->pWriter
, sqlite3_mprintf(
913 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
914 pConfig
->zDb
, pConfig
->zName
919 sqlite3_bind_int64(p
->pWriter
, 1, iRowid
);
920 sqlite3_bind_blob(p
->pWriter
, 2, pData
, nData
, SQLITE_STATIC
);
921 sqlite3_step(p
->pWriter
);
922 p
->rc
= sqlite3_reset(p
->pWriter
);
923 sqlite3_bind_null(p
->pWriter
, 2);
927 ** Execute the following SQL:
929 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
931 static void fts5DataDelete(Fts5Index
*p
, i64 iFirst
, i64 iLast
){
932 if( p
->rc
!=SQLITE_OK
) return;
934 if( p
->pDeleter
==0 ){
935 Fts5Config
*pConfig
= p
->pConfig
;
936 char *zSql
= sqlite3_mprintf(
937 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
938 pConfig
->zDb
, pConfig
->zName
940 if( fts5IndexPrepareStmt(p
, &p
->pDeleter
, zSql
) ) return;
943 sqlite3_bind_int64(p
->pDeleter
, 1, iFirst
);
944 sqlite3_bind_int64(p
->pDeleter
, 2, iLast
);
945 sqlite3_step(p
->pDeleter
);
946 p
->rc
= sqlite3_reset(p
->pDeleter
);
950 ** Remove all records associated with segment iSegid.
952 static void fts5DataRemoveSegment(Fts5Index
*p
, Fts5StructureSegment
*pSeg
){
953 int iSegid
= pSeg
->iSegid
;
954 i64 iFirst
= FTS5_SEGMENT_ROWID(iSegid
, 0);
955 i64 iLast
= FTS5_SEGMENT_ROWID(iSegid
+1, 0)-1;
956 fts5DataDelete(p
, iFirst
, iLast
);
958 if( pSeg
->nPgTombstone
){
959 i64 iTomb1
= FTS5_TOMBSTONE_ROWID(iSegid
, 0);
960 i64 iTomb2
= FTS5_TOMBSTONE_ROWID(iSegid
, pSeg
->nPgTombstone
-1);
961 fts5DataDelete(p
, iTomb1
, iTomb2
);
963 if( p
->pIdxDeleter
==0 ){
964 Fts5Config
*pConfig
= p
->pConfig
;
965 fts5IndexPrepareStmt(p
, &p
->pIdxDeleter
, sqlite3_mprintf(
966 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
967 pConfig
->zDb
, pConfig
->zName
970 if( p
->rc
==SQLITE_OK
){
971 sqlite3_bind_int(p
->pIdxDeleter
, 1, iSegid
);
972 sqlite3_step(p
->pIdxDeleter
);
973 p
->rc
= sqlite3_reset(p
->pIdxDeleter
);
978 ** Release a reference to an Fts5Structure object returned by an earlier
979 ** call to fts5StructureRead() or fts5StructureDecode().
981 static void fts5StructureRelease(Fts5Structure
*pStruct
){
982 if( pStruct
&& 0>=(--pStruct
->nRef
) ){
984 assert( pStruct
->nRef
==0 );
985 for(i
=0; i
<pStruct
->nLevel
; i
++){
986 sqlite3_free(pStruct
->aLevel
[i
].aSeg
);
988 sqlite3_free(pStruct
);
992 static void fts5StructureRef(Fts5Structure
*pStruct
){
996 void *sqlite3Fts5StructureRef(Fts5Index
*p
){
997 fts5StructureRef(p
->pStruct
);
998 return (void*)p
->pStruct
;
1000 void sqlite3Fts5StructureRelease(void *p
){
1002 fts5StructureRelease((Fts5Structure
*)p
);
1005 int sqlite3Fts5StructureTest(Fts5Index
*p
, void *pStruct
){
1006 if( p
->pStruct
!=(Fts5Structure
*)pStruct
){
1007 return SQLITE_ABORT
;
1013 ** Ensure that structure object (*pp) is writable.
1015 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
1016 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
1018 static void fts5StructureMakeWritable(int *pRc
, Fts5Structure
**pp
){
1019 Fts5Structure
*p
= *pp
;
1020 if( *pRc
==SQLITE_OK
&& p
->nRef
>1 ){
1021 i64 nByte
= sizeof(Fts5Structure
)+(p
->nLevel
-1)*sizeof(Fts5StructureLevel
);
1022 Fts5Structure
*pNew
;
1023 pNew
= (Fts5Structure
*)sqlite3Fts5MallocZero(pRc
, nByte
);
1026 memcpy(pNew
, p
, nByte
);
1027 for(i
=0; i
<p
->nLevel
; i
++) pNew
->aLevel
[i
].aSeg
= 0;
1028 for(i
=0; i
<p
->nLevel
; i
++){
1029 Fts5StructureLevel
*pLvl
= &pNew
->aLevel
[i
];
1030 nByte
= sizeof(Fts5StructureSegment
) * pNew
->aLevel
[i
].nSeg
;
1031 pLvl
->aSeg
= (Fts5StructureSegment
*)sqlite3Fts5MallocZero(pRc
, nByte
);
1032 if( pLvl
->aSeg
==0 ){
1033 for(i
=0; i
<p
->nLevel
; i
++){
1034 sqlite3_free(pNew
->aLevel
[i
].aSeg
);
1039 memcpy(pLvl
->aSeg
, p
->aLevel
[i
].aSeg
, nByte
);
1049 ** Deserialize and return the structure record currently stored in serialized
1050 ** form within buffer pData/nData.
1052 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1053 ** are over-allocated by one slot. This allows the structure contents
1054 ** to be more easily edited.
1056 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
1057 ** returned. Otherwise, *ppOut is set to point to the new object and
1058 ** SQLITE_OK returned.
1060 static int fts5StructureDecode(
1061 const u8
*pData
, /* Buffer containing serialized structure */
1062 int nData
, /* Size of buffer pData in bytes */
1063 int *piCookie
, /* Configuration cookie value */
1064 Fts5Structure
**ppOut
/* OUT: Deserialized object */
1071 sqlite3_int64 nByte
; /* Bytes of space to allocate at pRet */
1072 Fts5Structure
*pRet
= 0; /* Structure object to return */
1073 int bStructureV2
= 0; /* True for FTS5_STRUCTURE_V2 */
1074 u64 nOriginCntr
= 0; /* Largest origin value seen so far */
1076 /* Grab the cookie value */
1077 if( piCookie
) *piCookie
= sqlite3Fts5Get32(pData
);
1080 /* Check if this is a V2 structure record. Set bStructureV2 if it is. */
1081 if( 0==memcmp(&pData
[i
], FTS5_STRUCTURE_V2
, 4) ){
1086 /* Read the total number of levels and segments from the start of the
1087 ** structure record. */
1088 i
+= fts5GetVarint32(&pData
[i
], nLevel
);
1089 i
+= fts5GetVarint32(&pData
[i
], nSegment
);
1090 if( nLevel
>FTS5_MAX_SEGMENT
|| nLevel
<0
1091 || nSegment
>FTS5_MAX_SEGMENT
|| nSegment
<0
1093 return FTS5_CORRUPT
;
1096 sizeof(Fts5Structure
) + /* Main structure */
1097 sizeof(Fts5StructureLevel
) * (nLevel
-1) /* aLevel[] array */
1099 pRet
= (Fts5Structure
*)sqlite3Fts5MallocZero(&rc
, nByte
);
1103 pRet
->nLevel
= nLevel
;
1104 pRet
->nSegment
= nSegment
;
1105 i
+= sqlite3Fts5GetVarint(&pData
[i
], &pRet
->nWriteCounter
);
1107 for(iLvl
=0; rc
==SQLITE_OK
&& iLvl
<nLevel
; iLvl
++){
1108 Fts5StructureLevel
*pLvl
= &pRet
->aLevel
[iLvl
];
1115 i
+= fts5GetVarint32(&pData
[i
], pLvl
->nMerge
);
1116 i
+= fts5GetVarint32(&pData
[i
], nTotal
);
1117 if( nTotal
<pLvl
->nMerge
) rc
= FTS5_CORRUPT
;
1118 pLvl
->aSeg
= (Fts5StructureSegment
*)sqlite3Fts5MallocZero(&rc
,
1119 nTotal
* sizeof(Fts5StructureSegment
)
1124 if( rc
==SQLITE_OK
){
1125 pLvl
->nSeg
= nTotal
;
1126 for(iSeg
=0; iSeg
<nTotal
; iSeg
++){
1127 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
1133 i
+= fts5GetVarint32(&pData
[i
], pSeg
->iSegid
);
1134 i
+= fts5GetVarint32(&pData
[i
], pSeg
->pgnoFirst
);
1135 i
+= fts5GetVarint32(&pData
[i
], pSeg
->pgnoLast
);
1137 i
+= fts5GetVarint(&pData
[i
], &pSeg
->iOrigin1
);
1138 i
+= fts5GetVarint(&pData
[i
], &pSeg
->iOrigin2
);
1139 i
+= fts5GetVarint32(&pData
[i
], pSeg
->nPgTombstone
);
1140 i
+= fts5GetVarint(&pData
[i
], &pSeg
->nEntryTombstone
);
1141 i
+= fts5GetVarint(&pData
[i
], &pSeg
->nEntry
);
1142 nOriginCntr
= MAX(nOriginCntr
, pSeg
->iOrigin2
);
1144 if( pSeg
->pgnoLast
<pSeg
->pgnoFirst
){
1149 if( iLvl
>0 && pLvl
[-1].nMerge
&& nTotal
==0 ) rc
= FTS5_CORRUPT
;
1150 if( iLvl
==nLevel
-1 && pLvl
->nMerge
) rc
= FTS5_CORRUPT
;
1153 if( nSegment
!=0 && rc
==SQLITE_OK
) rc
= FTS5_CORRUPT
;
1155 pRet
->nOriginCntr
= nOriginCntr
+1;
1158 if( rc
!=SQLITE_OK
){
1159 fts5StructureRelease(pRet
);
1169 ** Add a level to the Fts5Structure.aLevel[] array of structure object
1172 static void fts5StructureAddLevel(int *pRc
, Fts5Structure
**ppStruct
){
1173 fts5StructureMakeWritable(pRc
, ppStruct
);
1174 assert( (ppStruct
!=0 && (*ppStruct
)!=0) || (*pRc
)!=SQLITE_OK
);
1175 if( *pRc
==SQLITE_OK
){
1176 Fts5Structure
*pStruct
= *ppStruct
;
1177 int nLevel
= pStruct
->nLevel
;
1178 sqlite3_int64 nByte
= (
1179 sizeof(Fts5Structure
) + /* Main structure */
1180 sizeof(Fts5StructureLevel
) * (nLevel
+1) /* aLevel[] array */
1183 pStruct
= sqlite3_realloc64(pStruct
, nByte
);
1185 memset(&pStruct
->aLevel
[nLevel
], 0, sizeof(Fts5StructureLevel
));
1187 *ppStruct
= pStruct
;
1189 *pRc
= SQLITE_NOMEM
;
1195 ** Extend level iLvl so that there is room for at least nExtra more
1198 static void fts5StructureExtendLevel(
1200 Fts5Structure
*pStruct
,
1205 if( *pRc
==SQLITE_OK
){
1206 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
1207 Fts5StructureSegment
*aNew
;
1208 sqlite3_int64 nByte
;
1210 nByte
= (pLvl
->nSeg
+ nExtra
) * sizeof(Fts5StructureSegment
);
1211 aNew
= sqlite3_realloc64(pLvl
->aSeg
, nByte
);
1214 memset(&aNew
[pLvl
->nSeg
], 0, sizeof(Fts5StructureSegment
) * nExtra
);
1216 int nMove
= pLvl
->nSeg
* sizeof(Fts5StructureSegment
);
1217 memmove(&aNew
[nExtra
], aNew
, nMove
);
1218 memset(aNew
, 0, sizeof(Fts5StructureSegment
) * nExtra
);
1222 *pRc
= SQLITE_NOMEM
;
1227 static Fts5Structure
*fts5StructureReadUncached(Fts5Index
*p
){
1228 Fts5Structure
*pRet
= 0;
1229 Fts5Config
*pConfig
= p
->pConfig
;
1230 int iCookie
; /* Configuration cookie */
1233 pData
= fts5DataRead(p
, FTS5_STRUCTURE_ROWID
);
1234 if( p
->rc
==SQLITE_OK
){
1235 /* TODO: Do we need this if the leaf-index is appended? Probably... */
1236 memset(&pData
->p
[pData
->nn
], 0, FTS5_DATA_PADDING
);
1237 p
->rc
= fts5StructureDecode(pData
->p
, pData
->nn
, &iCookie
, &pRet
);
1238 if( p
->rc
==SQLITE_OK
&& (pConfig
->pgsz
==0 || pConfig
->iCookie
!=iCookie
) ){
1239 p
->rc
= sqlite3Fts5ConfigLoad(pConfig
, iCookie
);
1241 fts5DataRelease(pData
);
1242 if( p
->rc
!=SQLITE_OK
){
1243 fts5StructureRelease(pRet
);
1251 static i64
fts5IndexDataVersion(Fts5Index
*p
){
1254 if( p
->rc
==SQLITE_OK
){
1255 if( p
->pDataVersion
==0 ){
1256 p
->rc
= fts5IndexPrepareStmt(p
, &p
->pDataVersion
,
1257 sqlite3_mprintf("PRAGMA %Q.data_version", p
->pConfig
->zDb
)
1259 if( p
->rc
) return 0;
1262 if( SQLITE_ROW
==sqlite3_step(p
->pDataVersion
) ){
1263 iVersion
= sqlite3_column_int64(p
->pDataVersion
, 0);
1265 p
->rc
= sqlite3_reset(p
->pDataVersion
);
1272 ** Read, deserialize and return the structure record.
1274 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1275 ** are over-allocated as described for function fts5StructureDecode()
1278 ** If an error occurs, NULL is returned and an error code left in the
1279 ** Fts5Index handle. If an error has already occurred when this function
1280 ** is called, it is a no-op.
1282 static Fts5Structure
*fts5StructureRead(Fts5Index
*p
){
1284 if( p
->pStruct
==0 ){
1285 p
->iStructVersion
= fts5IndexDataVersion(p
);
1286 if( p
->rc
==SQLITE_OK
){
1287 p
->pStruct
= fts5StructureReadUncached(p
);
1293 Fts5Structure
*pTest
= fts5StructureReadUncached(p
);
1296 assert_nc( p
->pStruct
->nSegment
==pTest
->nSegment
);
1297 assert_nc( p
->pStruct
->nLevel
==pTest
->nLevel
);
1298 for(i
=0; i
<pTest
->nLevel
; i
++){
1299 assert_nc( p
->pStruct
->aLevel
[i
].nMerge
==pTest
->aLevel
[i
].nMerge
);
1300 assert_nc( p
->pStruct
->aLevel
[i
].nSeg
==pTest
->aLevel
[i
].nSeg
);
1301 for(j
=0; j
<pTest
->aLevel
[i
].nSeg
; j
++){
1302 Fts5StructureSegment
*p1
= &pTest
->aLevel
[i
].aSeg
[j
];
1303 Fts5StructureSegment
*p2
= &p
->pStruct
->aLevel
[i
].aSeg
[j
];
1304 assert_nc( p1
->iSegid
==p2
->iSegid
);
1305 assert_nc( p1
->pgnoFirst
==p2
->pgnoFirst
);
1306 assert_nc( p1
->pgnoLast
==p2
->pgnoLast
);
1309 fts5StructureRelease(pTest
);
1314 if( p
->rc
!=SQLITE_OK
) return 0;
1315 assert( p
->iStructVersion
!=0 );
1316 assert( p
->pStruct
!=0 );
1317 fts5StructureRef(p
->pStruct
);
1321 static void fts5StructureInvalidate(Fts5Index
*p
){
1323 fts5StructureRelease(p
->pStruct
);
1329 ** Return the total number of segments in index structure pStruct. This
1330 ** function is only ever used as part of assert() conditions.
1333 static int fts5StructureCountSegments(Fts5Structure
*pStruct
){
1334 int nSegment
= 0; /* Total number of segments */
1336 int iLvl
; /* Used to iterate through levels */
1337 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
1338 nSegment
+= pStruct
->aLevel
[iLvl
].nSeg
;
1346 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
1347 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
1348 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
1349 (pBuf)->n += nBlob; \
1352 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \
1353 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
1354 assert( (pBuf)->nSpace>=(pBuf)->n ); \
1359 ** Serialize and store the "structure" record.
1361 ** If an error occurs, leave an error code in the Fts5Index object. If an
1362 ** error has already occurred, this function is a no-op.
1364 static void fts5StructureWrite(Fts5Index
*p
, Fts5Structure
*pStruct
){
1365 if( p
->rc
==SQLITE_OK
){
1366 Fts5Buffer buf
; /* Buffer to serialize record into */
1367 int iLvl
; /* Used to iterate through levels */
1368 int iCookie
; /* Cookie value to store */
1369 int nHdr
= (pStruct
->nOriginCntr
>0 ? (4+4+9+9+9) : (4+9+9));
1371 assert( pStruct
->nSegment
==fts5StructureCountSegments(pStruct
) );
1372 memset(&buf
, 0, sizeof(Fts5Buffer
));
1374 /* Append the current configuration cookie */
1375 iCookie
= p
->pConfig
->iCookie
;
1376 if( iCookie
<0 ) iCookie
= 0;
1378 if( 0==sqlite3Fts5BufferSize(&p
->rc
, &buf
, nHdr
) ){
1379 sqlite3Fts5Put32(buf
.p
, iCookie
);
1381 if( pStruct
->nOriginCntr
>0 ){
1382 fts5BufferSafeAppendBlob(&buf
, FTS5_STRUCTURE_V2
, 4);
1384 fts5BufferSafeAppendVarint(&buf
, pStruct
->nLevel
);
1385 fts5BufferSafeAppendVarint(&buf
, pStruct
->nSegment
);
1386 fts5BufferSafeAppendVarint(&buf
, (i64
)pStruct
->nWriteCounter
);
1389 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
1390 int iSeg
; /* Used to iterate through segments */
1391 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
1392 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->nMerge
);
1393 fts5BufferAppendVarint(&p
->rc
, &buf
, pLvl
->nSeg
);
1394 assert( pLvl
->nMerge
<=pLvl
->nSeg
);
1396 for(iSeg
=0; iSeg
<pLvl
->nSeg
; iSeg
++){
1397 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
1398 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->iSegid
);
1399 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->pgnoFirst
);
1400 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->pgnoLast
);
1401 if( pStruct
->nOriginCntr
>0 ){
1402 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->iOrigin1
);
1403 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->iOrigin2
);
1404 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->nPgTombstone
);
1405 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->nEntryTombstone
);
1406 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->nEntry
);
1411 fts5DataWrite(p
, FTS5_STRUCTURE_ROWID
, buf
.p
, buf
.n
);
1412 fts5BufferFree(&buf
);
1417 static void fts5DebugStructure(int*,Fts5Buffer
*,Fts5Structure
*);
1418 static void fts5PrintStructure(const char *zCaption
, Fts5Structure
*pStruct
){
1421 memset(&buf
, 0, sizeof(buf
));
1422 fts5DebugStructure(&rc
, &buf
, pStruct
);
1423 fprintf(stdout
, "%s: %s\n", zCaption
, buf
.p
);
1425 fts5BufferFree(&buf
);
1428 # define fts5PrintStructure(x,y)
1431 static int fts5SegmentSize(Fts5StructureSegment
*pSeg
){
1432 return 1 + pSeg
->pgnoLast
- pSeg
->pgnoFirst
;
1436 ** Return a copy of index structure pStruct. Except, promote as many
1437 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1440 static void fts5StructurePromoteTo(
1444 Fts5Structure
*pStruct
1447 Fts5StructureLevel
*pOut
= &pStruct
->aLevel
[iPromote
];
1449 if( pOut
->nMerge
==0 ){
1450 for(il
=iPromote
+1; il
<pStruct
->nLevel
; il
++){
1451 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[il
];
1452 if( pLvl
->nMerge
) return;
1453 for(is
=pLvl
->nSeg
-1; is
>=0; is
--){
1454 int sz
= fts5SegmentSize(&pLvl
->aSeg
[is
]);
1455 if( sz
>szPromote
) return;
1456 fts5StructureExtendLevel(&p
->rc
, pStruct
, iPromote
, 1, 1);
1458 memcpy(pOut
->aSeg
, &pLvl
->aSeg
[is
], sizeof(Fts5StructureSegment
));
1467 ** A new segment has just been written to level iLvl of index structure
1468 ** pStruct. This function determines if any segments should be promoted
1469 ** as a result. Segments are promoted in two scenarios:
1471 ** a) If the segment just written is smaller than one or more segments
1472 ** within the previous populated level, it is promoted to the previous
1475 ** b) If the segment just written is larger than the newest segment on
1476 ** the next populated level, then that segment, and any other adjacent
1477 ** segments that are also smaller than the one just written, are
1480 ** If one or more segments are promoted, the structure object is updated
1483 static void fts5StructurePromote(
1484 Fts5Index
*p
, /* FTS5 backend object */
1485 int iLvl
, /* Index level just updated */
1486 Fts5Structure
*pStruct
/* Index structure */
1488 if( p
->rc
==SQLITE_OK
){
1491 int szPromote
= 0; /* Promote anything this size or smaller */
1492 Fts5StructureSegment
*pSeg
; /* Segment just written */
1493 int szSeg
; /* Size of segment just written */
1494 int nSeg
= pStruct
->aLevel
[iLvl
].nSeg
;
1496 if( nSeg
==0 ) return;
1497 pSeg
= &pStruct
->aLevel
[iLvl
].aSeg
[pStruct
->aLevel
[iLvl
].nSeg
-1];
1498 szSeg
= (1 + pSeg
->pgnoLast
- pSeg
->pgnoFirst
);
1500 /* Check for condition (a) */
1501 for(iTst
=iLvl
-1; iTst
>=0 && pStruct
->aLevel
[iTst
].nSeg
==0; iTst
--);
1505 Fts5StructureLevel
*pTst
= &pStruct
->aLevel
[iTst
];
1506 assert( pTst
->nMerge
==0 );
1507 for(i
=0; i
<pTst
->nSeg
; i
++){
1508 int sz
= pTst
->aSeg
[i
].pgnoLast
- pTst
->aSeg
[i
].pgnoFirst
+ 1;
1509 if( sz
>szMax
) szMax
= sz
;
1512 /* Condition (a) is true. Promote the newest segment on level
1513 ** iLvl to level iTst. */
1519 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1520 ** is a no-op if it is not. */
1525 fts5StructurePromoteTo(p
, iPromote
, szPromote
, pStruct
);
1531 ** Advance the iterator passed as the only argument. If the end of the
1532 ** doclist-index page is reached, return non-zero.
1534 static int fts5DlidxLvlNext(Fts5DlidxLvl
*pLvl
){
1535 Fts5Data
*pData
= pLvl
->pData
;
1537 if( pLvl
->iOff
==0 ){
1538 assert( pLvl
->bEof
==0 );
1540 pLvl
->iOff
+= fts5GetVarint32(&pData
->p
[1], pLvl
->iLeafPgno
);
1541 pLvl
->iOff
+= fts5GetVarint(&pData
->p
[pLvl
->iOff
], (u64
*)&pLvl
->iRowid
);
1542 pLvl
->iFirstOff
= pLvl
->iOff
;
1545 for(iOff
=pLvl
->iOff
; iOff
<pData
->nn
; iOff
++){
1546 if( pData
->p
[iOff
] ) break;
1549 if( iOff
<pData
->nn
){
1551 pLvl
->iLeafPgno
+= (iOff
- pLvl
->iOff
) + 1;
1552 iOff
+= fts5GetVarint(&pData
->p
[iOff
], &iVal
);
1553 pLvl
->iRowid
+= iVal
;
1564 ** Advance the iterator passed as the only argument.
1566 static int fts5DlidxIterNextR(Fts5Index
*p
, Fts5DlidxIter
*pIter
, int iLvl
){
1567 Fts5DlidxLvl
*pLvl
= &pIter
->aLvl
[iLvl
];
1569 assert( iLvl
<pIter
->nLvl
);
1570 if( fts5DlidxLvlNext(pLvl
) ){
1571 if( (iLvl
+1) < pIter
->nLvl
){
1572 fts5DlidxIterNextR(p
, pIter
, iLvl
+1);
1573 if( pLvl
[1].bEof
==0 ){
1574 fts5DataRelease(pLvl
->pData
);
1575 memset(pLvl
, 0, sizeof(Fts5DlidxLvl
));
1576 pLvl
->pData
= fts5DataRead(p
,
1577 FTS5_DLIDX_ROWID(pIter
->iSegid
, iLvl
, pLvl
[1].iLeafPgno
)
1579 if( pLvl
->pData
) fts5DlidxLvlNext(pLvl
);
1584 return pIter
->aLvl
[0].bEof
;
1586 static int fts5DlidxIterNext(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1587 return fts5DlidxIterNextR(p
, pIter
, 0);
1591 ** The iterator passed as the first argument has the following fields set
1592 ** as follows. This function sets up the rest of the iterator so that it
1593 ** points to the first rowid in the doclist-index.
1596 ** pointer to doclist-index record,
1598 ** When this function is called pIter->iLeafPgno is the page number the
1599 ** doclist is associated with (the one featuring the term).
1601 static int fts5DlidxIterFirst(Fts5DlidxIter
*pIter
){
1603 for(i
=0; i
<pIter
->nLvl
; i
++){
1604 fts5DlidxLvlNext(&pIter
->aLvl
[i
]);
1606 return pIter
->aLvl
[0].bEof
;
1610 static int fts5DlidxIterEof(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1611 return p
->rc
!=SQLITE_OK
|| pIter
->aLvl
[0].bEof
;
1614 static void fts5DlidxIterLast(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1617 /* Advance each level to the last entry on the last page */
1618 for(i
=pIter
->nLvl
-1; p
->rc
==SQLITE_OK
&& i
>=0; i
--){
1619 Fts5DlidxLvl
*pLvl
= &pIter
->aLvl
[i
];
1620 while( fts5DlidxLvlNext(pLvl
)==0 );
1624 Fts5DlidxLvl
*pChild
= &pLvl
[-1];
1625 fts5DataRelease(pChild
->pData
);
1626 memset(pChild
, 0, sizeof(Fts5DlidxLvl
));
1627 pChild
->pData
= fts5DataRead(p
,
1628 FTS5_DLIDX_ROWID(pIter
->iSegid
, i
-1, pLvl
->iLeafPgno
)
1635 ** Move the iterator passed as the only argument to the previous entry.
1637 static int fts5DlidxLvlPrev(Fts5DlidxLvl
*pLvl
){
1638 int iOff
= pLvl
->iOff
;
1640 assert( pLvl
->bEof
==0 );
1641 if( iOff
<=pLvl
->iFirstOff
){
1644 u8
*a
= pLvl
->pData
->p
;
1647 fts5DlidxLvlNext(pLvl
);
1650 int ii
= pLvl
->iOff
;
1657 ii
+= sqlite3Fts5GetVarint(&a
[ii
], &delta
);
1659 if( ii
>=iOff
) break;
1660 pLvl
->iLeafPgno
+= nZero
+1;
1661 pLvl
->iRowid
+= delta
;
1669 static int fts5DlidxIterPrevR(Fts5Index
*p
, Fts5DlidxIter
*pIter
, int iLvl
){
1670 Fts5DlidxLvl
*pLvl
= &pIter
->aLvl
[iLvl
];
1672 assert( iLvl
<pIter
->nLvl
);
1673 if( fts5DlidxLvlPrev(pLvl
) ){
1674 if( (iLvl
+1) < pIter
->nLvl
){
1675 fts5DlidxIterPrevR(p
, pIter
, iLvl
+1);
1676 if( pLvl
[1].bEof
==0 ){
1677 fts5DataRelease(pLvl
->pData
);
1678 memset(pLvl
, 0, sizeof(Fts5DlidxLvl
));
1679 pLvl
->pData
= fts5DataRead(p
,
1680 FTS5_DLIDX_ROWID(pIter
->iSegid
, iLvl
, pLvl
[1].iLeafPgno
)
1683 while( fts5DlidxLvlNext(pLvl
)==0 );
1690 return pIter
->aLvl
[0].bEof
;
1692 static int fts5DlidxIterPrev(Fts5Index
*p
, Fts5DlidxIter
*pIter
){
1693 return fts5DlidxIterPrevR(p
, pIter
, 0);
1697 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1699 static void fts5DlidxIterFree(Fts5DlidxIter
*pIter
){
1702 for(i
=0; i
<pIter
->nLvl
; i
++){
1703 fts5DataRelease(pIter
->aLvl
[i
].pData
);
1705 sqlite3_free(pIter
);
1709 static Fts5DlidxIter
*fts5DlidxIterInit(
1710 Fts5Index
*p
, /* Fts5 Backend to iterate within */
1711 int bRev
, /* True for ORDER BY ASC */
1712 int iSegid
, /* Segment id */
1713 int iLeafPg
/* Leaf page number to load dlidx for */
1715 Fts5DlidxIter
*pIter
= 0;
1719 for(i
=0; p
->rc
==SQLITE_OK
&& bDone
==0; i
++){
1720 sqlite3_int64 nByte
= sizeof(Fts5DlidxIter
) + i
* sizeof(Fts5DlidxLvl
);
1721 Fts5DlidxIter
*pNew
;
1723 pNew
= (Fts5DlidxIter
*)sqlite3_realloc64(pIter
, nByte
);
1725 p
->rc
= SQLITE_NOMEM
;
1727 i64 iRowid
= FTS5_DLIDX_ROWID(iSegid
, i
, iLeafPg
);
1728 Fts5DlidxLvl
*pLvl
= &pNew
->aLvl
[i
];
1730 memset(pLvl
, 0, sizeof(Fts5DlidxLvl
));
1731 pLvl
->pData
= fts5DataRead(p
, iRowid
);
1732 if( pLvl
->pData
&& (pLvl
->pData
->p
[0] & 0x0001)==0 ){
1739 if( p
->rc
==SQLITE_OK
){
1740 pIter
->iSegid
= iSegid
;
1742 fts5DlidxIterFirst(pIter
);
1744 fts5DlidxIterLast(p
, pIter
);
1748 if( p
->rc
!=SQLITE_OK
){
1749 fts5DlidxIterFree(pIter
);
1756 static i64
fts5DlidxIterRowid(Fts5DlidxIter
*pIter
){
1757 return pIter
->aLvl
[0].iRowid
;
1759 static int fts5DlidxIterPgno(Fts5DlidxIter
*pIter
){
1760 return pIter
->aLvl
[0].iLeafPgno
;
1764 ** Load the next leaf page into the segment iterator.
1766 static void fts5SegIterNextPage(
1767 Fts5Index
*p
, /* FTS5 backend object */
1768 Fts5SegIter
*pIter
/* Iterator to advance to next page */
1771 Fts5StructureSegment
*pSeg
= pIter
->pSeg
;
1772 fts5DataRelease(pIter
->pLeaf
);
1774 if( pIter
->pNextLeaf
){
1775 pIter
->pLeaf
= pIter
->pNextLeaf
;
1776 pIter
->pNextLeaf
= 0;
1777 }else if( pIter
->iLeafPgno
<=pSeg
->pgnoLast
){
1778 pIter
->pLeaf
= fts5LeafRead(p
,
1779 FTS5_SEGMENT_ROWID(pSeg
->iSegid
, pIter
->iLeafPgno
)
1784 pLeaf
= pIter
->pLeaf
;
1787 pIter
->iPgidxOff
= pLeaf
->szLeaf
;
1788 if( fts5LeafIsTermless(pLeaf
) ){
1789 pIter
->iEndofDoclist
= pLeaf
->nn
+1;
1791 pIter
->iPgidxOff
+= fts5GetVarint32(&pLeaf
->p
[pIter
->iPgidxOff
],
1792 pIter
->iEndofDoclist
1799 ** Argument p points to a buffer containing a varint to be interpreted as a
1800 ** position list size field. Read the varint and return the number of bytes
1801 ** read. Before returning, set *pnSz to the number of bytes in the position
1802 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1804 static int fts5GetPoslistSize(const u8
*p
, int *pnSz
, int *pbDel
){
1807 fts5FastGetVarint32(p
, n
, nSz
);
1808 assert_nc( nSz
>=0 );
1810 *pbDel
= nSz
& 0x0001;
1815 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1816 ** position-list size field. Read the value of the field and store it
1817 ** in the following variables:
1822 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1823 ** position list content (if any).
1825 static void fts5SegIterLoadNPos(Fts5Index
*p
, Fts5SegIter
*pIter
){
1826 if( p
->rc
==SQLITE_OK
){
1827 int iOff
= pIter
->iLeafOffset
; /* Offset to read at */
1828 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
1829 if( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
){
1830 int iEod
= MIN(pIter
->iEndofDoclist
, pIter
->pLeaf
->szLeaf
);
1833 if( iOff
<iEod
&& pIter
->pLeaf
->p
[iOff
]==0 ){
1836 if( iOff
<iEod
&& pIter
->pLeaf
->p
[iOff
]==0 ){
1845 fts5FastGetVarint32(pIter
->pLeaf
->p
, iOff
, nSz
);
1846 pIter
->bDel
= (nSz
& 0x0001);
1847 pIter
->nPos
= nSz
>>1;
1848 assert_nc( pIter
->nPos
>=0 );
1850 pIter
->iLeafOffset
= iOff
;
1854 static void fts5SegIterLoadRowid(Fts5Index
*p
, Fts5SegIter
*pIter
){
1855 u8
*a
= pIter
->pLeaf
->p
; /* Buffer to read data from */
1856 i64 iOff
= pIter
->iLeafOffset
;
1858 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
1859 while( iOff
>=pIter
->pLeaf
->szLeaf
){
1860 fts5SegIterNextPage(p
, pIter
);
1861 if( pIter
->pLeaf
==0 ){
1862 if( p
->rc
==SQLITE_OK
) p
->rc
= FTS5_CORRUPT
;
1866 a
= pIter
->pLeaf
->p
;
1868 iOff
+= sqlite3Fts5GetVarint(&a
[iOff
], (u64
*)&pIter
->iRowid
);
1869 pIter
->iLeafOffset
= iOff
;
1873 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1874 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1875 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1876 ** the first term in the segment).
1878 ** This function populates:
1881 ** Fts5SegIter.rowid
1883 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1884 ** the first position list. The position list belonging to document
1885 ** (Fts5SegIter.iRowid).
1887 static void fts5SegIterLoadTerm(Fts5Index
*p
, Fts5SegIter
*pIter
, int nKeep
){
1888 u8
*a
= pIter
->pLeaf
->p
; /* Buffer to read data from */
1889 i64 iOff
= pIter
->iLeafOffset
; /* Offset to read at */
1890 int nNew
; /* Bytes of new data */
1892 iOff
+= fts5GetVarint32(&a
[iOff
], nNew
);
1893 if( iOff
+nNew
>pIter
->pLeaf
->szLeaf
|| nKeep
>pIter
->term
.n
|| nNew
==0 ){
1894 p
->rc
= FTS5_CORRUPT
;
1897 pIter
->term
.n
= nKeep
;
1898 fts5BufferAppendBlob(&p
->rc
, &pIter
->term
, nNew
, &a
[iOff
]);
1899 assert( pIter
->term
.n
<=pIter
->term
.nSpace
);
1901 pIter
->iTermLeafOffset
= iOff
;
1902 pIter
->iTermLeafPgno
= pIter
->iLeafPgno
;
1903 pIter
->iLeafOffset
= iOff
;
1905 if( pIter
->iPgidxOff
>=pIter
->pLeaf
->nn
){
1906 pIter
->iEndofDoclist
= pIter
->pLeaf
->nn
+1;
1909 pIter
->iPgidxOff
+= fts5GetVarint32(&a
[pIter
->iPgidxOff
], nExtra
);
1910 pIter
->iEndofDoclist
+= nExtra
;
1913 fts5SegIterLoadRowid(p
, pIter
);
1916 static void fts5SegIterNext(Fts5Index
*, Fts5SegIter
*, int*);
1917 static void fts5SegIterNext_Reverse(Fts5Index
*, Fts5SegIter
*, int*);
1918 static void fts5SegIterNext_None(Fts5Index
*, Fts5SegIter
*, int*);
1920 static void fts5SegIterSetNext(Fts5Index
*p
, Fts5SegIter
*pIter
){
1921 if( pIter
->flags
& FTS5_SEGITER_REVERSE
){
1922 pIter
->xNext
= fts5SegIterNext_Reverse
;
1923 }else if( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
){
1924 pIter
->xNext
= fts5SegIterNext_None
;
1926 pIter
->xNext
= fts5SegIterNext
;
1931 ** Allocate a tombstone hash page array object (pIter->pTombArray) for
1932 ** the iterator passed as the second argument. If an OOM error occurs,
1933 ** leave an error in the Fts5Index object.
1935 static void fts5SegIterAllocTombstone(Fts5Index
*p
, Fts5SegIter
*pIter
){
1936 const int nTomb
= pIter
->pSeg
->nPgTombstone
;
1938 int nByte
= nTomb
* sizeof(Fts5Data
*) + sizeof(Fts5TombstoneArray
);
1939 Fts5TombstoneArray
*pNew
;
1940 pNew
= (Fts5TombstoneArray
*)sqlite3Fts5MallocZero(&p
->rc
, nByte
);
1942 pNew
->nTombstone
= nTomb
;
1944 pIter
->pTombArray
= pNew
;
1950 ** Initialize the iterator object pIter to iterate through the entries in
1951 ** segment pSeg. The iterator is left pointing to the first entry when
1952 ** this function returns.
1954 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1955 ** an error has already occurred when this function is called, it is a no-op.
1957 static void fts5SegIterInit(
1958 Fts5Index
*p
, /* FTS index object */
1959 Fts5StructureSegment
*pSeg
, /* Description of segment */
1960 Fts5SegIter
*pIter
/* Object to populate */
1962 if( pSeg
->pgnoFirst
==0 ){
1963 /* This happens if the segment is being used as an input to an incremental
1964 ** merge and all data has already been "trimmed". See function
1965 ** fts5TrimSegments() for details. In this case leave the iterator empty.
1966 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1967 ** at EOF already. */
1968 assert( pIter
->pLeaf
==0 );
1972 if( p
->rc
==SQLITE_OK
){
1973 memset(pIter
, 0, sizeof(*pIter
));
1974 fts5SegIterSetNext(p
, pIter
);
1976 pIter
->iLeafPgno
= pSeg
->pgnoFirst
-1;
1978 fts5SegIterNextPage(p
, pIter
);
1979 }while( p
->rc
==SQLITE_OK
&& pIter
->pLeaf
&& pIter
->pLeaf
->nn
==4 );
1982 if( p
->rc
==SQLITE_OK
&& pIter
->pLeaf
){
1983 pIter
->iLeafOffset
= 4;
1984 assert( pIter
->pLeaf
!=0 );
1985 assert_nc( pIter
->pLeaf
->nn
>4 );
1986 assert_nc( fts5LeafFirstTermOff(pIter
->pLeaf
)==4 );
1987 pIter
->iPgidxOff
= pIter
->pLeaf
->szLeaf
+1;
1988 fts5SegIterLoadTerm(p
, pIter
, 0);
1989 fts5SegIterLoadNPos(p
, pIter
);
1990 fts5SegIterAllocTombstone(p
, pIter
);
1995 ** This function is only ever called on iterators created by calls to
1996 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1998 ** The iterator is in an unusual state when this function is called: the
1999 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
2000 ** the position-list size field for the first relevant rowid on the page.
2001 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
2003 ** This function advances the iterator so that it points to the last
2004 ** relevant rowid on the page and, if necessary, initializes the
2005 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
2006 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
2007 ** byte of the position list content associated with said rowid.
2009 static void fts5SegIterReverseInitPage(Fts5Index
*p
, Fts5SegIter
*pIter
){
2010 int eDetail
= p
->pConfig
->eDetail
;
2011 int n
= pIter
->pLeaf
->szLeaf
;
2012 int i
= pIter
->iLeafOffset
;
2013 u8
*a
= pIter
->pLeaf
->p
;
2014 int iRowidOffset
= 0;
2016 if( n
>pIter
->iEndofDoclist
){
2017 n
= pIter
->iEndofDoclist
;
2020 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
2024 if( eDetail
==FTS5_DETAIL_NONE
){
2026 if( i
<n
&& a
[i
]==0 ){
2028 if( i
<n
&& a
[i
]==0 ) i
++;
2033 i
+= fts5GetPoslistSize(&a
[i
], &nPos
, &bDummy
);
2037 i
+= fts5GetVarint(&a
[i
], &iDelta
);
2038 pIter
->iRowid
+= iDelta
;
2040 /* If necessary, grow the pIter->aRowidOffset[] array. */
2041 if( iRowidOffset
>=pIter
->nRowidOffset
){
2042 int nNew
= pIter
->nRowidOffset
+ 8;
2043 int *aNew
= (int*)sqlite3_realloc64(pIter
->aRowidOffset
,nNew
*sizeof(int));
2045 p
->rc
= SQLITE_NOMEM
;
2048 pIter
->aRowidOffset
= aNew
;
2049 pIter
->nRowidOffset
= nNew
;
2052 pIter
->aRowidOffset
[iRowidOffset
++] = pIter
->iLeafOffset
;
2053 pIter
->iLeafOffset
= i
;
2055 pIter
->iRowidOffset
= iRowidOffset
;
2056 fts5SegIterLoadNPos(p
, pIter
);
2062 static void fts5SegIterReverseNewPage(Fts5Index
*p
, Fts5SegIter
*pIter
){
2063 assert( pIter
->flags
& FTS5_SEGITER_REVERSE
);
2064 assert( pIter
->flags
& FTS5_SEGITER_ONETERM
);
2066 fts5DataRelease(pIter
->pLeaf
);
2068 while( p
->rc
==SQLITE_OK
&& pIter
->iLeafPgno
>pIter
->iTermLeafPgno
){
2071 pNew
= fts5DataRead(p
, FTS5_SEGMENT_ROWID(
2072 pIter
->pSeg
->iSegid
, pIter
->iLeafPgno
2075 /* iTermLeafOffset may be equal to szLeaf if the term is the last
2076 ** thing on the page - i.e. the first rowid is on the following page.
2077 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
2078 if( pIter
->iLeafPgno
==pIter
->iTermLeafPgno
){
2079 assert( pIter
->pLeaf
==0 );
2080 if( pIter
->iTermLeafOffset
<pNew
->szLeaf
){
2081 pIter
->pLeaf
= pNew
;
2082 pIter
->iLeafOffset
= pIter
->iTermLeafOffset
;
2086 iRowidOff
= fts5LeafFirstRowidOff(pNew
);
2088 if( iRowidOff
>=pNew
->szLeaf
){
2089 p
->rc
= FTS5_CORRUPT
;
2091 pIter
->pLeaf
= pNew
;
2092 pIter
->iLeafOffset
= iRowidOff
;
2098 u8
*a
= &pIter
->pLeaf
->p
[pIter
->iLeafOffset
];
2099 pIter
->iLeafOffset
+= fts5GetVarint(a
, (u64
*)&pIter
->iRowid
);
2102 fts5DataRelease(pNew
);
2108 pIter
->iEndofDoclist
= pIter
->pLeaf
->nn
+1;
2109 fts5SegIterReverseInitPage(p
, pIter
);
2114 ** Return true if the iterator passed as the second argument currently
2115 ** points to a delete marker. A delete marker is an entry with a 0 byte
2118 static int fts5MultiIterIsEmpty(Fts5Index
*p
, Fts5Iter
*pIter
){
2119 Fts5SegIter
*pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
2120 return (p
->rc
==SQLITE_OK
&& pSeg
->pLeaf
&& pSeg
->nPos
==0);
2124 ** Advance iterator pIter to the next entry.
2126 ** This version of fts5SegIterNext() is only used by reverse iterators.
2128 static void fts5SegIterNext_Reverse(
2129 Fts5Index
*p
, /* FTS5 backend object */
2130 Fts5SegIter
*pIter
, /* Iterator to advance */
2131 int *pbUnused
/* Unused */
2133 assert( pIter
->flags
& FTS5_SEGITER_REVERSE
);
2134 assert( pIter
->pNextLeaf
==0 );
2135 UNUSED_PARAM(pbUnused
);
2137 if( pIter
->iRowidOffset
>0 ){
2138 u8
*a
= pIter
->pLeaf
->p
;
2142 pIter
->iRowidOffset
--;
2143 pIter
->iLeafOffset
= pIter
->aRowidOffset
[pIter
->iRowidOffset
];
2144 fts5SegIterLoadNPos(p
, pIter
);
2145 iOff
= pIter
->iLeafOffset
;
2146 if( p
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
){
2147 iOff
+= pIter
->nPos
;
2149 fts5GetVarint(&a
[iOff
], &iDelta
);
2150 pIter
->iRowid
-= iDelta
;
2152 fts5SegIterReverseNewPage(p
, pIter
);
2157 ** Advance iterator pIter to the next entry.
2159 ** This version of fts5SegIterNext() is only used if detail=none and the
2160 ** iterator is not a reverse direction iterator.
2162 static void fts5SegIterNext_None(
2163 Fts5Index
*p
, /* FTS5 backend object */
2164 Fts5SegIter
*pIter
, /* Iterator to advance */
2165 int *pbNewTerm
/* OUT: Set for new term */
2169 assert( p
->rc
==SQLITE_OK
);
2170 assert( (pIter
->flags
& FTS5_SEGITER_REVERSE
)==0 );
2171 assert( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
2173 ASSERT_SZLEAF_OK(pIter
->pLeaf
);
2174 iOff
= pIter
->iLeafOffset
;
2176 /* Next entry is on the next page */
2177 while( pIter
->pSeg
&& iOff
>=pIter
->pLeaf
->szLeaf
){
2178 fts5SegIterNextPage(p
, pIter
);
2179 if( p
->rc
|| pIter
->pLeaf
==0 ) return;
2184 if( iOff
<pIter
->iEndofDoclist
){
2185 /* Next entry is on the current page */
2187 iOff
+= sqlite3Fts5GetVarint(&pIter
->pLeaf
->p
[iOff
], (u64
*)&iDelta
);
2188 pIter
->iLeafOffset
= iOff
;
2189 pIter
->iRowid
+= iDelta
;
2190 }else if( (pIter
->flags
& FTS5_SEGITER_ONETERM
)==0 ){
2193 if( iOff
!=fts5LeafFirstTermOff(pIter
->pLeaf
) ){
2194 iOff
+= fts5GetVarint32(&pIter
->pLeaf
->p
[iOff
], nKeep
);
2196 pIter
->iLeafOffset
= iOff
;
2197 fts5SegIterLoadTerm(p
, pIter
, nKeep
);
2199 const u8
*pList
= 0;
2200 const char *zTerm
= 0;
2203 sqlite3Fts5HashScanNext(p
->pHash
);
2204 sqlite3Fts5HashScanEntry(p
->pHash
, &zTerm
, &nTerm
, &pList
, &nList
);
2205 if( pList
==0 ) goto next_none_eof
;
2206 pIter
->pLeaf
->p
= (u8
*)pList
;
2207 pIter
->pLeaf
->nn
= nList
;
2208 pIter
->pLeaf
->szLeaf
= nList
;
2209 pIter
->iEndofDoclist
= nList
;
2210 sqlite3Fts5BufferSet(&p
->rc
,&pIter
->term
, nTerm
, (u8
*)zTerm
);
2211 pIter
->iLeafOffset
= fts5GetVarint(pList
, (u64
*)&pIter
->iRowid
);
2214 if( pbNewTerm
) *pbNewTerm
= 1;
2219 fts5SegIterLoadNPos(p
, pIter
);
2223 fts5DataRelease(pIter
->pLeaf
);
2229 ** Advance iterator pIter to the next entry.
2231 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2232 ** is not considered an error if the iterator reaches EOF. If an error has
2233 ** already occurred when this function is called, it is a no-op.
2235 static void fts5SegIterNext(
2236 Fts5Index
*p
, /* FTS5 backend object */
2237 Fts5SegIter
*pIter
, /* Iterator to advance */
2238 int *pbNewTerm
/* OUT: Set for new term */
2240 Fts5Data
*pLeaf
= pIter
->pLeaf
;
2247 assert( pbNewTerm
==0 || *pbNewTerm
==0 );
2248 assert( p
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
);
2250 /* Search for the end of the position list within the current page. */
2254 ASSERT_SZLEAF_OK(pLeaf
);
2255 iOff
= pIter
->iLeafOffset
+ pIter
->nPos
;
2258 /* The next entry is on the current page. */
2259 assert_nc( iOff
<=pIter
->iEndofDoclist
);
2260 if( iOff
>=pIter
->iEndofDoclist
){
2262 if( iOff
!=fts5LeafFirstTermOff(pLeaf
) ){
2263 iOff
+= fts5GetVarint32(&a
[iOff
], nKeep
);
2267 iOff
+= sqlite3Fts5GetVarint(&a
[iOff
], &iDelta
);
2268 pIter
->iRowid
+= iDelta
;
2269 assert_nc( iDelta
>0 );
2271 pIter
->iLeafOffset
= iOff
;
2273 }else if( pIter
->pSeg
==0 ){
2274 const u8
*pList
= 0;
2275 const char *zTerm
= 0;
2278 assert( (pIter
->flags
& FTS5_SEGITER_ONETERM
) || pbNewTerm
);
2279 if( 0==(pIter
->flags
& FTS5_SEGITER_ONETERM
) ){
2280 sqlite3Fts5HashScanNext(p
->pHash
);
2281 sqlite3Fts5HashScanEntry(p
->pHash
, &zTerm
, &nTerm
, &pList
, &nList
);
2284 fts5DataRelease(pIter
->pLeaf
);
2287 pIter
->pLeaf
->p
= (u8
*)pList
;
2288 pIter
->pLeaf
->nn
= nList
;
2289 pIter
->pLeaf
->szLeaf
= nList
;
2290 pIter
->iEndofDoclist
= nList
+1;
2291 sqlite3Fts5BufferSet(&p
->rc
, &pIter
->term
, nTerm
, (u8
*)zTerm
);
2292 pIter
->iLeafOffset
= fts5GetVarint(pList
, (u64
*)&pIter
->iRowid
);
2297 /* Next entry is not on the current page */
2299 fts5SegIterNextPage(p
, pIter
);
2300 pLeaf
= pIter
->pLeaf
;
2301 if( pLeaf
==0 ) break;
2302 ASSERT_SZLEAF_OK(pLeaf
);
2303 if( (iOff
= fts5LeafFirstRowidOff(pLeaf
)) && iOff
<pLeaf
->szLeaf
){
2304 iOff
+= sqlite3Fts5GetVarint(&pLeaf
->p
[iOff
], (u64
*)&pIter
->iRowid
);
2305 pIter
->iLeafOffset
= iOff
;
2307 if( pLeaf
->nn
>pLeaf
->szLeaf
){
2308 pIter
->iPgidxOff
= pLeaf
->szLeaf
+ fts5GetVarint32(
2309 &pLeaf
->p
[pLeaf
->szLeaf
], pIter
->iEndofDoclist
2313 else if( pLeaf
->nn
>pLeaf
->szLeaf
){
2314 pIter
->iPgidxOff
= pLeaf
->szLeaf
+ fts5GetVarint32(
2315 &pLeaf
->p
[pLeaf
->szLeaf
], iOff
2317 pIter
->iLeafOffset
= iOff
;
2318 pIter
->iEndofDoclist
= iOff
;
2321 assert_nc( iOff
<pLeaf
->szLeaf
);
2322 if( iOff
>pLeaf
->szLeaf
){
2323 p
->rc
= FTS5_CORRUPT
;
2329 /* Check if the iterator is now at EOF. If so, return early. */
2332 if( pIter
->flags
& FTS5_SEGITER_ONETERM
){
2333 fts5DataRelease(pIter
->pLeaf
);
2336 fts5SegIterLoadTerm(p
, pIter
, nKeep
);
2337 fts5SegIterLoadNPos(p
, pIter
);
2338 if( pbNewTerm
) *pbNewTerm
= 1;
2341 /* The following could be done by calling fts5SegIterLoadNPos(). But
2342 ** this block is particularly performance critical, so equivalent
2343 ** code is inlined. */
2345 assert_nc( pIter
->iLeafOffset
<=pIter
->pLeaf
->nn
);
2346 fts5FastGetVarint32(pIter
->pLeaf
->p
, pIter
->iLeafOffset
, nSz
);
2347 pIter
->bDel
= (nSz
& 0x0001);
2348 pIter
->nPos
= nSz
>>1;
2349 assert_nc( pIter
->nPos
>=0 );
2354 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2356 #define fts5IndexSkipVarint(a, iOff) { \
2357 int iEnd = iOff+9; \
2358 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
2362 ** Iterator pIter currently points to the first rowid in a doclist. This
2363 ** function sets the iterator up so that iterates in reverse order through
2366 static void fts5SegIterReverse(Fts5Index
*p
, Fts5SegIter
*pIter
){
2367 Fts5DlidxIter
*pDlidx
= pIter
->pDlidx
;
2368 Fts5Data
*pLast
= 0;
2371 if( pDlidx
&& p
->pConfig
->iVersion
==FTS5_CURRENT_VERSION
){
2372 int iSegid
= pIter
->pSeg
->iSegid
;
2373 pgnoLast
= fts5DlidxIterPgno(pDlidx
);
2374 pLast
= fts5LeafRead(p
, FTS5_SEGMENT_ROWID(iSegid
, pgnoLast
));
2376 Fts5Data
*pLeaf
= pIter
->pLeaf
; /* Current leaf data */
2378 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2379 ** position-list content for the current rowid. Back it up so that it
2380 ** points to the start of the position-list size field. */
2382 if( pIter
->iTermLeafPgno
==pIter
->iLeafPgno
){
2383 iPoslist
= pIter
->iTermLeafOffset
;
2387 fts5IndexSkipVarint(pLeaf
->p
, iPoslist
);
2388 pIter
->iLeafOffset
= iPoslist
;
2390 /* If this condition is true then the largest rowid for the current
2391 ** term may not be stored on the current page. So search forward to
2392 ** see where said rowid really is. */
2393 if( pIter
->iEndofDoclist
>=pLeaf
->szLeaf
){
2395 Fts5StructureSegment
*pSeg
= pIter
->pSeg
;
2397 /* The last rowid in the doclist may not be on the current page. Search
2398 ** forward to find the page containing the last rowid. */
2399 for(pgno
=pIter
->iLeafPgno
+1; !p
->rc
&& pgno
<=pSeg
->pgnoLast
; pgno
++){
2400 i64 iAbs
= FTS5_SEGMENT_ROWID(pSeg
->iSegid
, pgno
);
2401 Fts5Data
*pNew
= fts5LeafRead(p
, iAbs
);
2403 int iRowid
, bTermless
;
2404 iRowid
= fts5LeafFirstRowidOff(pNew
);
2405 bTermless
= fts5LeafIsTermless(pNew
);
2407 SWAPVAL(Fts5Data
*, pNew
, pLast
);
2410 fts5DataRelease(pNew
);
2411 if( bTermless
==0 ) break;
2417 /* If pLast is NULL at this point, then the last rowid for this doclist
2418 ** lies on the page currently indicated by the iterator. In this case
2419 ** pIter->iLeafOffset is already set to point to the position-list size
2420 ** field associated with the first relevant rowid on the page.
2422 ** Or, if pLast is non-NULL, then it is the page that contains the last
2423 ** rowid. In this case configure the iterator so that it points to the
2424 ** first rowid on this page.
2428 fts5DataRelease(pIter
->pLeaf
);
2429 pIter
->pLeaf
= pLast
;
2430 pIter
->iLeafPgno
= pgnoLast
;
2431 iOff
= fts5LeafFirstRowidOff(pLast
);
2432 if( iOff
>pLast
->szLeaf
){
2433 p
->rc
= FTS5_CORRUPT
;
2436 iOff
+= fts5GetVarint(&pLast
->p
[iOff
], (u64
*)&pIter
->iRowid
);
2437 pIter
->iLeafOffset
= iOff
;
2439 if( fts5LeafIsTermless(pLast
) ){
2440 pIter
->iEndofDoclist
= pLast
->nn
+1;
2442 pIter
->iEndofDoclist
= fts5LeafFirstTermOff(pLast
);
2446 fts5SegIterReverseInitPage(p
, pIter
);
2450 ** Iterator pIter currently points to the first rowid of a doclist.
2451 ** There is a doclist-index associated with the final term on the current
2452 ** page. If the current term is the last term on the page, load the
2453 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2455 static void fts5SegIterLoadDlidx(Fts5Index
*p
, Fts5SegIter
*pIter
){
2456 int iSeg
= pIter
->pSeg
->iSegid
;
2457 int bRev
= (pIter
->flags
& FTS5_SEGITER_REVERSE
);
2458 Fts5Data
*pLeaf
= pIter
->pLeaf
; /* Current leaf data */
2460 assert( pIter
->flags
& FTS5_SEGITER_ONETERM
);
2461 assert( pIter
->pDlidx
==0 );
2463 /* Check if the current doclist ends on this page. If it does, return
2464 ** early without loading the doclist-index (as it belongs to a different
2466 if( pIter
->iTermLeafPgno
==pIter
->iLeafPgno
2467 && pIter
->iEndofDoclist
<pLeaf
->szLeaf
2472 pIter
->pDlidx
= fts5DlidxIterInit(p
, bRev
, iSeg
, pIter
->iTermLeafPgno
);
2476 ** The iterator object passed as the second argument currently contains
2477 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2478 ** function searches the leaf page for a term matching (pTerm/nTerm).
2480 ** If the specified term is found on the page, then the iterator is left
2481 ** pointing to it. If argument bGe is zero and the term is not found,
2482 ** the iterator is left pointing at EOF.
2484 ** If bGe is non-zero and the specified term is not found, then the
2485 ** iterator is left pointing to the smallest term in the segment that
2486 ** is larger than the specified term, even if this term is not on the
2489 static void fts5LeafSeek(
2490 Fts5Index
*p
, /* Leave any error code here */
2491 int bGe
, /* True for a >= search */
2492 Fts5SegIter
*pIter
, /* Iterator to seek */
2493 const u8
*pTerm
, int nTerm
/* Term to search for */
2496 const u8
*a
= pIter
->pLeaf
->p
;
2497 u32 n
= (u32
)pIter
->pLeaf
->nn
;
2503 u32 iPgidx
; /* Current offset in pgidx */
2506 assert( p
->rc
==SQLITE_OK
);
2508 iPgidx
= (u32
)pIter
->pLeaf
->szLeaf
;
2509 iPgidx
+= fts5GetVarint32(&a
[iPgidx
], iTermOff
);
2512 p
->rc
= FTS5_CORRUPT
;
2518 /* Figure out how many new bytes are in this term */
2519 fts5FastGetVarint32(a
, iOff
, nNew
);
2524 assert( nKeep
>=nMatch
);
2525 if( nKeep
==nMatch
){
2528 nCmp
= (u32
)MIN(nNew
, nTerm
-nMatch
);
2529 for(i
=0; i
<nCmp
; i
++){
2530 if( a
[iOff
+i
]!=pTerm
[nMatch
+i
] ) break;
2534 if( (u32
)nTerm
==nMatch
){
2536 goto search_success
;
2540 }else if( i
<nNew
&& a
[iOff
+i
]>pTerm
[nMatch
] ){
2550 iPgidx
+= fts5GetVarint32(&a
[iPgidx
], nKeep
);
2555 p
->rc
= FTS5_CORRUPT
;
2559 /* Read the nKeep field of the next term. */
2560 fts5FastGetVarint32(a
, iOff
, nKeep
);
2565 fts5DataRelease(pIter
->pLeaf
);
2568 }else if( bEndOfPage
){
2570 fts5SegIterNextPage(p
, pIter
);
2571 if( pIter
->pLeaf
==0 ) return;
2572 a
= pIter
->pLeaf
->p
;
2573 if( fts5LeafIsTermless(pIter
->pLeaf
)==0 ){
2574 iPgidx
= (u32
)pIter
->pLeaf
->szLeaf
;
2575 iPgidx
+= fts5GetVarint32(&pIter
->pLeaf
->p
[iPgidx
], iOff
);
2576 if( iOff
<4 || (i64
)iOff
>=pIter
->pLeaf
->szLeaf
){
2577 p
->rc
= FTS5_CORRUPT
;
2582 n
= (u32
)pIter
->pLeaf
->nn
;
2583 iOff
+= fts5GetVarint32(&a
[iOff
], nNew
);
2591 if( (i64
)iOff
+nNew
>n
|| nNew
<1 ){
2592 p
->rc
= FTS5_CORRUPT
;
2595 pIter
->iLeafOffset
= iOff
+ nNew
;
2596 pIter
->iTermLeafOffset
= pIter
->iLeafOffset
;
2597 pIter
->iTermLeafPgno
= pIter
->iLeafPgno
;
2599 fts5BufferSet(&p
->rc
, &pIter
->term
, nKeep
, pTerm
);
2600 fts5BufferAppendBlob(&p
->rc
, &pIter
->term
, nNew
, &a
[iOff
]);
2603 pIter
->iEndofDoclist
= pIter
->pLeaf
->nn
+1;
2606 iPgidx
+= fts5GetVarint32(&a
[iPgidx
], nExtra
);
2607 pIter
->iEndofDoclist
= iTermOff
+ nExtra
;
2609 pIter
->iPgidxOff
= iPgidx
;
2611 fts5SegIterLoadRowid(p
, pIter
);
2612 fts5SegIterLoadNPos(p
, pIter
);
2615 static sqlite3_stmt
*fts5IdxSelectStmt(Fts5Index
*p
){
2616 if( p
->pIdxSelect
==0 ){
2617 Fts5Config
*pConfig
= p
->pConfig
;
2618 fts5IndexPrepareStmt(p
, &p
->pIdxSelect
, sqlite3_mprintf(
2619 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2620 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2621 pConfig
->zDb
, pConfig
->zName
2624 return p
->pIdxSelect
;
2628 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2629 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2631 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2632 ** an error has already occurred when this function is called, it is a no-op.
2634 static void fts5SegIterSeekInit(
2635 Fts5Index
*p
, /* FTS5 backend */
2636 const u8
*pTerm
, int nTerm
, /* Term to seek to */
2637 int flags
, /* Mask of FTS5INDEX_XXX flags */
2638 Fts5StructureSegment
*pSeg
, /* Description of segment */
2639 Fts5SegIter
*pIter
/* Object to populate */
2642 int bGe
= (flags
& FTS5INDEX_QUERY_SCAN
);
2643 int bDlidx
= 0; /* True if there is a doclist-index */
2644 sqlite3_stmt
*pIdxSelect
= 0;
2646 assert( bGe
==0 || (flags
& FTS5INDEX_QUERY_DESC
)==0 );
2647 assert( pTerm
&& nTerm
);
2648 memset(pIter
, 0, sizeof(*pIter
));
2651 /* This block sets stack variable iPg to the leaf page number that may
2652 ** contain term (pTerm/nTerm), if it is present in the segment. */
2653 pIdxSelect
= fts5IdxSelectStmt(p
);
2655 sqlite3_bind_int(pIdxSelect
, 1, pSeg
->iSegid
);
2656 sqlite3_bind_blob(pIdxSelect
, 2, pTerm
, nTerm
, SQLITE_STATIC
);
2657 if( SQLITE_ROW
==sqlite3_step(pIdxSelect
) ){
2658 i64 val
= sqlite3_column_int(pIdxSelect
, 0);
2659 iPg
= (int)(val
>>1);
2660 bDlidx
= (val
& 0x0001);
2662 p
->rc
= sqlite3_reset(pIdxSelect
);
2663 sqlite3_bind_null(pIdxSelect
, 2);
2665 if( iPg
<pSeg
->pgnoFirst
){
2666 iPg
= pSeg
->pgnoFirst
;
2670 pIter
->iLeafPgno
= iPg
- 1;
2671 fts5SegIterNextPage(p
, pIter
);
2674 fts5LeafSeek(p
, bGe
, pIter
, pTerm
, nTerm
);
2677 if( p
->rc
==SQLITE_OK
&& (bGe
==0 || (flags
& FTS5INDEX_QUERY_SCANONETERM
)) ){
2678 pIter
->flags
|= FTS5_SEGITER_ONETERM
;
2680 if( flags
& FTS5INDEX_QUERY_DESC
){
2681 pIter
->flags
|= FTS5_SEGITER_REVERSE
;
2684 fts5SegIterLoadDlidx(p
, pIter
);
2686 if( flags
& FTS5INDEX_QUERY_DESC
){
2687 fts5SegIterReverse(p
, pIter
);
2692 fts5SegIterSetNext(p
, pIter
);
2693 if( 0==(flags
& FTS5INDEX_QUERY_SCANONETERM
) ){
2694 fts5SegIterAllocTombstone(p
, pIter
);
2699 ** 1) an error has occurred, or
2700 ** 2) the iterator points to EOF, or
2701 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
2702 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2703 ** to an entry with a term greater than or equal to (pTerm/nTerm).
2705 assert_nc( p
->rc
!=SQLITE_OK
/* 1 */
2706 || pIter
->pLeaf
==0 /* 2 */
2707 || fts5BufferCompareBlob(&pIter
->term
, pTerm
, nTerm
)==0 /* 3 */
2708 || (bGe
&& fts5BufferCompareBlob(&pIter
->term
, pTerm
, nTerm
)>0) /* 4 */
2714 ** SQL used by fts5SegIterNextInit() to find the page to open.
2716 static sqlite3_stmt
*fts5IdxNextStmt(Fts5Index
*p
){
2717 if( p
->pIdxNextSelect
==0 ){
2718 Fts5Config
*pConfig
= p
->pConfig
;
2719 fts5IndexPrepareStmt(p
, &p
->pIdxNextSelect
, sqlite3_mprintf(
2720 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2721 "segid=? AND term>? ORDER BY term ASC LIMIT 1",
2722 pConfig
->zDb
, pConfig
->zName
2726 return p
->pIdxNextSelect
;
2730 ** This is similar to fts5SegIterSeekInit(), except that it initializes
2731 ** the segment iterator to point to the first term following the page
2732 ** with pToken/nToken on it.
2734 static void fts5SegIterNextInit(
2736 const char *pTerm
, int nTerm
,
2737 Fts5StructureSegment
*pSeg
, /* Description of segment */
2738 Fts5SegIter
*pIter
/* Object to populate */
2740 int iPg
= -1; /* Page of segment to open */
2742 sqlite3_stmt
*pSel
= 0; /* SELECT to find iPg */
2744 pSel
= fts5IdxNextStmt(p
);
2746 assert( p
->rc
==SQLITE_OK
);
2747 sqlite3_bind_int(pSel
, 1, pSeg
->iSegid
);
2748 sqlite3_bind_blob(pSel
, 2, pTerm
, nTerm
, SQLITE_STATIC
);
2750 if( sqlite3_step(pSel
)==SQLITE_ROW
){
2751 i64 val
= sqlite3_column_int64(pSel
, 0);
2752 iPg
= (int)(val
>>1);
2753 bDlidx
= (val
& 0x0001);
2755 p
->rc
= sqlite3_reset(pSel
);
2756 sqlite3_bind_null(pSel
, 2);
2760 memset(pIter
, 0, sizeof(*pIter
));
2762 pIter
->flags
|= FTS5_SEGITER_ONETERM
;
2764 pIter
->iLeafPgno
= iPg
- 1;
2765 fts5SegIterNextPage(p
, pIter
);
2766 fts5SegIterSetNext(p
, pIter
);
2769 const u8
*a
= pIter
->pLeaf
->p
;
2772 pIter
->iPgidxOff
= pIter
->pLeaf
->szLeaf
;
2773 pIter
->iPgidxOff
+= fts5GetVarint32(&a
[pIter
->iPgidxOff
], iTermOff
);
2774 pIter
->iLeafOffset
= iTermOff
;
2775 fts5SegIterLoadTerm(p
, pIter
, 0);
2776 fts5SegIterLoadNPos(p
, pIter
);
2777 if( bDlidx
) fts5SegIterLoadDlidx(p
, pIter
);
2779 assert( p
->rc
!=SQLITE_OK
||
2780 fts5BufferCompareBlob(&pIter
->term
, (const u8
*)pTerm
, nTerm
)>0
2786 ** Initialize the object pIter to point to term pTerm/nTerm within the
2787 ** in-memory hash table. If there is no such term in the hash-table, the
2788 ** iterator is set to EOF.
2790 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2791 ** an error has already occurred when this function is called, it is a no-op.
2793 static void fts5SegIterHashInit(
2794 Fts5Index
*p
, /* FTS5 backend */
2795 const u8
*pTerm
, int nTerm
, /* Term to seek to */
2796 int flags
, /* Mask of FTS5INDEX_XXX flags */
2797 Fts5SegIter
*pIter
/* Object to populate */
2802 Fts5Data
*pLeaf
= 0;
2805 assert( p
->rc
==SQLITE_OK
);
2807 if( pTerm
==0 || (flags
& FTS5INDEX_QUERY_SCAN
) ){
2808 const u8
*pList
= 0;
2810 p
->rc
= sqlite3Fts5HashScanInit(p
->pHash
, (const char*)pTerm
, nTerm
);
2811 sqlite3Fts5HashScanEntry(p
->pHash
, (const char**)&z
, &n
, &pList
, &nList
);
2813 pLeaf
= fts5IdxMalloc(p
, sizeof(Fts5Data
));
2815 pLeaf
->p
= (u8
*)pList
;
2819 /* The call to sqlite3Fts5HashScanInit() causes the hash table to
2820 ** fill the size field of all existing position lists. This means they
2821 ** can no longer be appended to. Since the only scenario in which they
2822 ** can be appended to is if the previous operation on this table was
2823 ** a DELETE, by clearing the Fts5Index.bDelete flag we can avoid this
2824 ** possibility altogether. */
2827 p
->rc
= sqlite3Fts5HashQuery(p
->pHash
, sizeof(Fts5Data
),
2828 (const char*)pTerm
, nTerm
, (void**)&pLeaf
, &nList
2831 pLeaf
->p
= (u8
*)&pLeaf
[1];
2835 pIter
->flags
|= FTS5_SEGITER_ONETERM
;
2839 sqlite3Fts5BufferSet(&p
->rc
, &pIter
->term
, n
, z
);
2840 pLeaf
->nn
= pLeaf
->szLeaf
= nList
;
2841 pIter
->pLeaf
= pLeaf
;
2842 pIter
->iLeafOffset
= fts5GetVarint(pLeaf
->p
, (u64
*)&pIter
->iRowid
);
2843 pIter
->iEndofDoclist
= pLeaf
->nn
;
2845 if( flags
& FTS5INDEX_QUERY_DESC
){
2846 pIter
->flags
|= FTS5_SEGITER_REVERSE
;
2847 fts5SegIterReverseInitPage(p
, pIter
);
2849 fts5SegIterLoadNPos(p
, pIter
);
2853 fts5SegIterSetNext(p
, pIter
);
2857 ** Array ap[] contains n elements. Release each of these elements using
2858 ** fts5DataRelease(). Then free the array itself using sqlite3_free().
2860 static void fts5IndexFreeArray(Fts5Data
**ap
, int n
){
2863 for(ii
=0; ii
<n
; ii
++){
2864 fts5DataRelease(ap
[ii
]);
2871 ** Decrement the ref-count of the object passed as the only argument. If it
2872 ** reaches 0, free it and its contents.
2874 static void fts5TombstoneArrayDelete(Fts5TombstoneArray
*p
){
2879 for(ii
=0; ii
<p
->nTombstone
; ii
++){
2880 fts5DataRelease(p
->apTombstone
[ii
]);
2888 ** Zero the iterator passed as the only argument.
2890 static void fts5SegIterClear(Fts5SegIter
*pIter
){
2891 fts5BufferFree(&pIter
->term
);
2892 fts5DataRelease(pIter
->pLeaf
);
2893 fts5DataRelease(pIter
->pNextLeaf
);
2894 fts5TombstoneArrayDelete(pIter
->pTombArray
);
2895 fts5DlidxIterFree(pIter
->pDlidx
);
2896 sqlite3_free(pIter
->aRowidOffset
);
2897 memset(pIter
, 0, sizeof(Fts5SegIter
));
2903 ** This function is used as part of the big assert() procedure implemented by
2904 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2905 ** in *pRes is the correct result of comparing the current positions of the
2908 static void fts5AssertComparisonResult(
2914 int i1
= p1
- pIter
->aSeg
;
2915 int i2
= p2
- pIter
->aSeg
;
2917 if( p1
->pLeaf
|| p2
->pLeaf
){
2919 assert( pRes
->iFirst
==i2
);
2920 }else if( p2
->pLeaf
==0 ){
2921 assert( pRes
->iFirst
==i1
);
2923 int nMin
= MIN(p1
->term
.n
, p2
->term
.n
);
2924 int res
= fts5Memcmp(p1
->term
.p
, p2
->term
.p
, nMin
);
2925 if( res
==0 ) res
= p1
->term
.n
- p2
->term
.n
;
2928 assert( pRes
->bTermEq
==1 );
2929 assert( p1
->iRowid
!=p2
->iRowid
);
2930 res
= ((p1
->iRowid
> p2
->iRowid
)==pIter
->bRev
) ? -1 : 1;
2932 assert( pRes
->bTermEq
==0 );
2936 assert( pRes
->iFirst
==i1
);
2938 assert( pRes
->iFirst
==i2
);
2945 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2946 ** is compiled. In that case, this function is essentially an assert()
2947 ** statement used to verify that the contents of the pIter->aFirst[] array
2950 static void fts5AssertMultiIterSetup(Fts5Index
*p
, Fts5Iter
*pIter
){
2951 if( p
->rc
==SQLITE_OK
){
2952 Fts5SegIter
*pFirst
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
2955 assert( (pFirst
->pLeaf
==0)==pIter
->base
.bEof
);
2957 /* Check that pIter->iSwitchRowid is set correctly. */
2958 for(i
=0; i
<pIter
->nSeg
; i
++){
2959 Fts5SegIter
*p1
= &pIter
->aSeg
[i
];
2962 || fts5BufferCompare(&pFirst
->term
, &p1
->term
)
2963 || p1
->iRowid
==pIter
->iSwitchRowid
2964 || (p1
->iRowid
<pIter
->iSwitchRowid
)==pIter
->bRev
2968 for(i
=0; i
<pIter
->nSeg
; i
+=2){
2969 Fts5SegIter
*p1
= &pIter
->aSeg
[i
];
2970 Fts5SegIter
*p2
= &pIter
->aSeg
[i
+1];
2971 Fts5CResult
*pRes
= &pIter
->aFirst
[(pIter
->nSeg
+ i
) / 2];
2972 fts5AssertComparisonResult(pIter
, p1
, p2
, pRes
);
2975 for(i
=1; i
<(pIter
->nSeg
/ 2); i
+=2){
2976 Fts5SegIter
*p1
= &pIter
->aSeg
[ pIter
->aFirst
[i
*2].iFirst
];
2977 Fts5SegIter
*p2
= &pIter
->aSeg
[ pIter
->aFirst
[i
*2+1].iFirst
];
2978 Fts5CResult
*pRes
= &pIter
->aFirst
[i
];
2979 fts5AssertComparisonResult(pIter
, p1
, p2
, pRes
);
2984 # define fts5AssertMultiIterSetup(x,y)
2988 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2990 ** If the returned value is non-zero, then it is the index of an entry
2991 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2992 ** to a key that is a duplicate of another, higher priority,
2993 ** segment-iterator in the pSeg->aSeg[] array.
2995 static int fts5MultiIterDoCompare(Fts5Iter
*pIter
, int iOut
){
2996 int i1
; /* Index of left-hand Fts5SegIter */
2997 int i2
; /* Index of right-hand Fts5SegIter */
2999 Fts5SegIter
*p1
; /* Left-hand Fts5SegIter */
3000 Fts5SegIter
*p2
; /* Right-hand Fts5SegIter */
3001 Fts5CResult
*pRes
= &pIter
->aFirst
[iOut
];
3003 assert( iOut
<pIter
->nSeg
&& iOut
>0 );
3004 assert( pIter
->bRev
==0 || pIter
->bRev
==1 );
3006 if( iOut
>=(pIter
->nSeg
/2) ){
3007 i1
= (iOut
- pIter
->nSeg
/2) * 2;
3010 i1
= pIter
->aFirst
[iOut
*2].iFirst
;
3011 i2
= pIter
->aFirst
[iOut
*2+1].iFirst
;
3013 p1
= &pIter
->aSeg
[i1
];
3014 p2
= &pIter
->aSeg
[i2
];
3017 if( p1
->pLeaf
==0 ){ /* If p1 is at EOF */
3019 }else if( p2
->pLeaf
==0 ){ /* If p2 is at EOF */
3022 int res
= fts5BufferCompare(&p1
->term
, &p2
->term
);
3027 if( p1
->iRowid
==p2
->iRowid
){
3030 res
= ((p1
->iRowid
> p2
->iRowid
)==pIter
->bRev
) ? -1 : +1;
3040 pRes
->iFirst
= (u16
)iRes
;
3045 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
3046 ** It is an error if leaf iLeafPgno does not exist. Unless the db is
3047 ** a 'secure-delete' db, if it contains no rowids then this is also an error.
3049 static void fts5SegIterGotoPage(
3050 Fts5Index
*p
, /* FTS5 backend object */
3051 Fts5SegIter
*pIter
, /* Iterator to advance */
3054 assert( iLeafPgno
>pIter
->iLeafPgno
);
3056 if( iLeafPgno
>pIter
->pSeg
->pgnoLast
){
3057 p
->rc
= FTS5_CORRUPT
;
3059 fts5DataRelease(pIter
->pNextLeaf
);
3060 pIter
->pNextLeaf
= 0;
3061 pIter
->iLeafPgno
= iLeafPgno
-1;
3063 while( p
->rc
==SQLITE_OK
){
3065 fts5SegIterNextPage(p
, pIter
);
3066 if( pIter
->pLeaf
==0 ) break;
3067 iOff
= fts5LeafFirstRowidOff(pIter
->pLeaf
);
3069 u8
*a
= pIter
->pLeaf
->p
;
3070 int n
= pIter
->pLeaf
->szLeaf
;
3071 if( iOff
<4 || iOff
>=n
){
3072 p
->rc
= FTS5_CORRUPT
;
3074 iOff
+= fts5GetVarint(&a
[iOff
], (u64
*)&pIter
->iRowid
);
3075 pIter
->iLeafOffset
= iOff
;
3076 fts5SegIterLoadNPos(p
, pIter
);
3085 ** Advance the iterator passed as the second argument until it is at or
3086 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
3087 ** always advanced at least once.
3089 static void fts5SegIterNextFrom(
3090 Fts5Index
*p
, /* FTS5 backend object */
3091 Fts5SegIter
*pIter
, /* Iterator to advance */
3092 i64 iMatch
/* Advance iterator at least this far */
3094 int bRev
= (pIter
->flags
& FTS5_SEGITER_REVERSE
);
3095 Fts5DlidxIter
*pDlidx
= pIter
->pDlidx
;
3096 int iLeafPgno
= pIter
->iLeafPgno
;
3099 assert( pIter
->flags
& FTS5_SEGITER_ONETERM
);
3100 assert( pIter
->pDlidx
);
3101 assert( pIter
->pLeaf
);
3104 while( !fts5DlidxIterEof(p
, pDlidx
) && iMatch
>fts5DlidxIterRowid(pDlidx
) ){
3105 iLeafPgno
= fts5DlidxIterPgno(pDlidx
);
3106 fts5DlidxIterNext(p
, pDlidx
);
3108 assert_nc( iLeafPgno
>=pIter
->iLeafPgno
|| p
->rc
);
3109 if( iLeafPgno
>pIter
->iLeafPgno
){
3110 fts5SegIterGotoPage(p
, pIter
, iLeafPgno
);
3114 assert( pIter
->pNextLeaf
==0 );
3115 assert( iMatch
<pIter
->iRowid
);
3116 while( !fts5DlidxIterEof(p
, pDlidx
) && iMatch
<fts5DlidxIterRowid(pDlidx
) ){
3117 fts5DlidxIterPrev(p
, pDlidx
);
3119 iLeafPgno
= fts5DlidxIterPgno(pDlidx
);
3121 assert( fts5DlidxIterEof(p
, pDlidx
) || iLeafPgno
<=pIter
->iLeafPgno
);
3123 if( iLeafPgno
<pIter
->iLeafPgno
){
3124 pIter
->iLeafPgno
= iLeafPgno
+1;
3125 fts5SegIterReverseNewPage(p
, pIter
);
3131 if( bMove
&& p
->rc
==SQLITE_OK
) pIter
->xNext(p
, pIter
, 0);
3132 if( pIter
->pLeaf
==0 ) break;
3133 if( bRev
==0 && pIter
->iRowid
>=iMatch
) break;
3134 if( bRev
!=0 && pIter
->iRowid
<=iMatch
) break;
3136 }while( p
->rc
==SQLITE_OK
);
3140 ** Free the iterator object passed as the second argument.
3142 static void fts5MultiIterFree(Fts5Iter
*pIter
){
3145 for(i
=0; i
<pIter
->nSeg
; i
++){
3146 fts5SegIterClear(&pIter
->aSeg
[i
]);
3148 fts5BufferFree(&pIter
->poslist
);
3149 sqlite3_free(pIter
);
3153 static void fts5MultiIterAdvanced(
3154 Fts5Index
*p
, /* FTS5 backend to iterate within */
3155 Fts5Iter
*pIter
, /* Iterator to update aFirst[] array for */
3156 int iChanged
, /* Index of sub-iterator just advanced */
3157 int iMinset
/* Minimum entry in aFirst[] to set */
3160 for(i
=(pIter
->nSeg
+iChanged
)/2; i
>=iMinset
&& p
->rc
==SQLITE_OK
; i
=i
/2){
3162 if( (iEq
= fts5MultiIterDoCompare(pIter
, i
)) ){
3163 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iEq
];
3164 assert( p
->rc
==SQLITE_OK
);
3165 pSeg
->xNext(p
, pSeg
, 0);
3166 i
= pIter
->nSeg
+ iEq
;
3172 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
3173 ** points to the same term though - just a different rowid. This function
3174 ** attempts to update the contents of the pIter->aFirst[] accordingly.
3175 ** If it does so successfully, 0 is returned. Otherwise 1.
3177 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
3178 ** on the iterator instead. That function does the same as this one, except
3179 ** that it deals with more complicated cases as well.
3181 static int fts5MultiIterAdvanceRowid(
3182 Fts5Iter
*pIter
, /* Iterator to update aFirst[] array for */
3183 int iChanged
, /* Index of sub-iterator just advanced */
3184 Fts5SegIter
**ppFirst
3186 Fts5SegIter
*pNew
= &pIter
->aSeg
[iChanged
];
3188 if( pNew
->iRowid
==pIter
->iSwitchRowid
3189 || (pNew
->iRowid
<pIter
->iSwitchRowid
)==pIter
->bRev
3192 Fts5SegIter
*pOther
= &pIter
->aSeg
[iChanged
^ 0x0001];
3193 pIter
->iSwitchRowid
= pIter
->bRev
? SMALLEST_INT64
: LARGEST_INT64
;
3194 for(i
=(pIter
->nSeg
+iChanged
)/2; 1; i
=i
/2){
3195 Fts5CResult
*pRes
= &pIter
->aFirst
[i
];
3197 assert( pNew
->pLeaf
);
3198 assert( pRes
->bTermEq
==0 || pOther
->pLeaf
);
3200 if( pRes
->bTermEq
){
3201 if( pNew
->iRowid
==pOther
->iRowid
){
3203 }else if( (pOther
->iRowid
>pNew
->iRowid
)==pIter
->bRev
){
3204 pIter
->iSwitchRowid
= pOther
->iRowid
;
3206 }else if( (pOther
->iRowid
>pIter
->iSwitchRowid
)==pIter
->bRev
){
3207 pIter
->iSwitchRowid
= pOther
->iRowid
;
3210 pRes
->iFirst
= (u16
)(pNew
- pIter
->aSeg
);
3213 pOther
= &pIter
->aSeg
[ pIter
->aFirst
[i
^ 0x0001].iFirst
];
3222 ** Set the pIter->bEof variable based on the state of the sub-iterators.
3224 static void fts5MultiIterSetEof(Fts5Iter
*pIter
){
3225 Fts5SegIter
*pSeg
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
3226 pIter
->base
.bEof
= pSeg
->pLeaf
==0;
3227 pIter
->iSwitchRowid
= pSeg
->iRowid
;
3231 ** The argument to this macro must be an Fts5Data structure containing a
3232 ** tombstone hash page. This macro returns the key-size of the hash-page.
3234 #define TOMBSTONE_KEYSIZE(pPg) (pPg->p[0]==4 ? 4 : 8)
3236 #define TOMBSTONE_NSLOT(pPg) \
3237 ((pPg->nn > 16) ? ((pPg->nn-8) / TOMBSTONE_KEYSIZE(pPg)) : 1)
3240 ** Query a single tombstone hash table for rowid iRowid. Return true if
3241 ** it is found or false otherwise. The tombstone hash table is one of
3242 ** nHashTable tables.
3244 static int fts5IndexTombstoneQuery(
3245 Fts5Data
*pHash
, /* Hash table page to query */
3246 int nHashTable
, /* Number of pages attached to segment */
3247 u64 iRowid
/* Rowid to query hash for */
3249 const int szKey
= TOMBSTONE_KEYSIZE(pHash
);
3250 const int nSlot
= TOMBSTONE_NSLOT(pHash
);
3251 int iSlot
= (iRowid
/ nHashTable
) % nSlot
;
3252 int nCollide
= nSlot
;
3256 }else if( szKey
==4 ){
3257 u32
*aSlot
= (u32
*)&pHash
->p
[8];
3258 while( aSlot
[iSlot
] ){
3259 if( fts5GetU32((u8
*)&aSlot
[iSlot
])==iRowid
) return 1;
3260 if( nCollide
--==0 ) break;
3261 iSlot
= (iSlot
+1)%nSlot
;
3264 u64
*aSlot
= (u64
*)&pHash
->p
[8];
3265 while( aSlot
[iSlot
] ){
3266 if( fts5GetU64((u8
*)&aSlot
[iSlot
])==iRowid
) return 1;
3267 if( nCollide
--==0 ) break;
3268 iSlot
= (iSlot
+1)%nSlot
;
3276 ** Return true if the iterator passed as the only argument points
3277 ** to an segment entry for which there is a tombstone. Return false
3278 ** if there is no tombstone or if the iterator is already at EOF.
3280 static int fts5MultiIterIsDeleted(Fts5Iter
*pIter
){
3281 int iFirst
= pIter
->aFirst
[1].iFirst
;
3282 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iFirst
];
3283 Fts5TombstoneArray
*pArray
= pSeg
->pTombArray
;
3285 if( pSeg
->pLeaf
&& pArray
){
3286 /* Figure out which page the rowid might be present on. */
3287 int iPg
= ((u64
)pSeg
->iRowid
) % pArray
->nTombstone
;
3290 /* If tombstone hash page iPg has not yet been loaded from the
3291 ** database, load it now. */
3292 if( pArray
->apTombstone
[iPg
]==0 ){
3293 pArray
->apTombstone
[iPg
] = fts5DataRead(pIter
->pIndex
,
3294 FTS5_TOMBSTONE_ROWID(pSeg
->pSeg
->iSegid
, iPg
)
3296 if( pArray
->apTombstone
[iPg
]==0 ) return 0;
3299 return fts5IndexTombstoneQuery(
3300 pArray
->apTombstone
[iPg
],
3310 ** Move the iterator to the next entry.
3312 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
3313 ** considered an error if the iterator reaches EOF, or if it is already at
3314 ** EOF when this function is called.
3316 static void fts5MultiIterNext(
3319 int bFrom
, /* True if argument iFrom is valid */
3320 i64 iFrom
/* Advance at least as far as this */
3322 int bUseFrom
= bFrom
;
3323 assert( pIter
->base
.bEof
==0 );
3324 while( p
->rc
==SQLITE_OK
){
3325 int iFirst
= pIter
->aFirst
[1].iFirst
;
3327 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iFirst
];
3328 assert( p
->rc
==SQLITE_OK
);
3329 if( bUseFrom
&& pSeg
->pDlidx
){
3330 fts5SegIterNextFrom(p
, pSeg
, iFrom
);
3332 pSeg
->xNext(p
, pSeg
, &bNewTerm
);
3335 if( pSeg
->pLeaf
==0 || bNewTerm
3336 || fts5MultiIterAdvanceRowid(pIter
, iFirst
, &pSeg
)
3338 fts5MultiIterAdvanced(p
, pIter
, iFirst
, 1);
3339 fts5MultiIterSetEof(pIter
);
3340 pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
3341 if( pSeg
->pLeaf
==0 ) return;
3344 fts5AssertMultiIterSetup(p
, pIter
);
3345 assert( pSeg
==&pIter
->aSeg
[pIter
->aFirst
[1].iFirst
] && pSeg
->pLeaf
);
3346 if( (pIter
->bSkipEmpty
==0 || pSeg
->nPos
)
3347 && 0==fts5MultiIterIsDeleted(pIter
)
3349 pIter
->xSetOutputs(pIter
, pSeg
);
3356 static void fts5MultiIterNext2(
3359 int *pbNewTerm
/* OUT: True if *might* be new term */
3361 assert( pIter
->bSkipEmpty
);
3362 if( p
->rc
==SQLITE_OK
){
3365 int iFirst
= pIter
->aFirst
[1].iFirst
;
3366 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iFirst
];
3369 assert( p
->rc
==SQLITE_OK
);
3370 pSeg
->xNext(p
, pSeg
, &bNewTerm
);
3371 if( pSeg
->pLeaf
==0 || bNewTerm
3372 || fts5MultiIterAdvanceRowid(pIter
, iFirst
, &pSeg
)
3374 fts5MultiIterAdvanced(p
, pIter
, iFirst
, 1);
3375 fts5MultiIterSetEof(pIter
);
3378 fts5AssertMultiIterSetup(p
, pIter
);
3380 }while( (fts5MultiIterIsEmpty(p
, pIter
) || fts5MultiIterIsDeleted(pIter
))
3381 && (p
->rc
==SQLITE_OK
)
3386 static void fts5IterSetOutputs_Noop(Fts5Iter
*pUnused1
, Fts5SegIter
*pUnused2
){
3387 UNUSED_PARAM2(pUnused1
, pUnused2
);
3390 static Fts5Iter
*fts5MultiIterAlloc(
3391 Fts5Index
*p
, /* FTS5 backend to iterate within */
3395 i64 nSlot
; /* Power of two >= nSeg */
3397 for(nSlot
=2; nSlot
<nSeg
; nSlot
=nSlot
*2);
3398 pNew
= fts5IdxMalloc(p
,
3399 sizeof(Fts5Iter
) + /* pNew */
3400 sizeof(Fts5SegIter
) * (nSlot
-1) + /* pNew->aSeg[] */
3401 sizeof(Fts5CResult
) * nSlot
/* pNew->aFirst[] */
3405 pNew
->aFirst
= (Fts5CResult
*)&pNew
->aSeg
[nSlot
];
3407 pNew
->xSetOutputs
= fts5IterSetOutputs_Noop
;
3412 static void fts5PoslistCallback(
3415 const u8
*pChunk
, int nChunk
3417 UNUSED_PARAM(pUnused
);
3418 assert_nc( nChunk
>=0 );
3420 fts5BufferSafeAppendBlob((Fts5Buffer
*)pContext
, pChunk
, nChunk
);
3424 typedef struct PoslistCallbackCtx PoslistCallbackCtx
;
3425 struct PoslistCallbackCtx
{
3426 Fts5Buffer
*pBuf
; /* Append to this buffer */
3427 Fts5Colset
*pColset
; /* Restrict matches to this column */
3428 int eState
; /* See above */
3431 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx
;
3432 struct PoslistOffsetsCtx
{
3433 Fts5Buffer
*pBuf
; /* Append to this buffer */
3434 Fts5Colset
*pColset
; /* Restrict matches to this column */
3440 ** TODO: Make this more efficient!
3442 static int fts5IndexColsetTest(Fts5Colset
*pColset
, int iCol
){
3444 for(i
=0; i
<pColset
->nCol
; i
++){
3445 if( pColset
->aiCol
[i
]==iCol
) return 1;
3450 static void fts5PoslistOffsetsCallback(
3453 const u8
*pChunk
, int nChunk
3455 PoslistOffsetsCtx
*pCtx
= (PoslistOffsetsCtx
*)pContext
;
3456 UNUSED_PARAM(pUnused
);
3457 assert_nc( nChunk
>=0 );
3462 i
+= fts5GetVarint32(&pChunk
[i
], iVal
);
3463 iVal
+= pCtx
->iRead
- 2;
3465 if( fts5IndexColsetTest(pCtx
->pColset
, iVal
) ){
3466 fts5BufferSafeAppendVarint(pCtx
->pBuf
, iVal
+ 2 - pCtx
->iWrite
);
3467 pCtx
->iWrite
= iVal
;
3473 static void fts5PoslistFilterCallback(
3476 const u8
*pChunk
, int nChunk
3478 PoslistCallbackCtx
*pCtx
= (PoslistCallbackCtx
*)pContext
;
3479 UNUSED_PARAM(pUnused
);
3480 assert_nc( nChunk
>=0 );
3482 /* Search through to find the first varint with value 1. This is the
3483 ** start of the next columns hits. */
3487 if( pCtx
->eState
==2 ){
3489 fts5FastGetVarint32(pChunk
, i
, iCol
);
3490 if( fts5IndexColsetTest(pCtx
->pColset
, iCol
) ){
3492 fts5BufferSafeAppendVarint(pCtx
->pBuf
, 1);
3499 while( i
<nChunk
&& pChunk
[i
]!=0x01 ){
3500 while( pChunk
[i
] & 0x80 ) i
++;
3504 fts5BufferSafeAppendBlob(pCtx
->pBuf
, &pChunk
[iStart
], i
-iStart
);
3513 fts5FastGetVarint32(pChunk
, i
, iCol
);
3514 pCtx
->eState
= fts5IndexColsetTest(pCtx
->pColset
, iCol
);
3516 fts5BufferSafeAppendBlob(pCtx
->pBuf
, &pChunk
[iStart
], i
-iStart
);
3525 static void fts5ChunkIterate(
3526 Fts5Index
*p
, /* Index object */
3527 Fts5SegIter
*pSeg
, /* Poslist of this iterator */
3528 void *pCtx
, /* Context pointer for xChunk callback */
3529 void (*xChunk
)(Fts5Index
*, void*, const u8
*, int)
3531 int nRem
= pSeg
->nPos
; /* Number of bytes still to come */
3532 Fts5Data
*pData
= 0;
3533 u8
*pChunk
= &pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3534 int nChunk
= MIN(nRem
, pSeg
->pLeaf
->szLeaf
- pSeg
->iLeafOffset
);
3535 int pgno
= pSeg
->iLeafPgno
;
3538 /* This function does not work with detail=none databases. */
3539 assert( p
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
);
3541 if( (pSeg
->flags
& FTS5_SEGITER_REVERSE
)==0 ){
3546 xChunk(p
, pCtx
, pChunk
, nChunk
);
3548 fts5DataRelease(pData
);
3551 }else if( pSeg
->pSeg
==0 ){
3552 p
->rc
= FTS5_CORRUPT
;
3556 pData
= fts5LeafRead(p
, FTS5_SEGMENT_ROWID(pSeg
->pSeg
->iSegid
, pgno
));
3557 if( pData
==0 ) break;
3558 pChunk
= &pData
->p
[4];
3559 nChunk
= MIN(nRem
, pData
->szLeaf
- 4);
3560 if( pgno
==pgnoSave
){
3561 assert( pSeg
->pNextLeaf
==0 );
3562 pSeg
->pNextLeaf
= pData
;
3570 ** Iterator pIter currently points to a valid entry (not EOF). This
3571 ** function appends the position list data for the current entry to
3572 ** buffer pBuf. It does not make a copy of the position-list size
3575 static void fts5SegiterPoslist(
3578 Fts5Colset
*pColset
,
3583 if( 0==fts5BufferGrow(&p
->rc
, pBuf
, pSeg
->nPos
+FTS5_DATA_ZERO_PADDING
) ){
3584 assert( pBuf
->p
!=0 );
3585 assert( pBuf
->nSpace
>= pBuf
->n
+pSeg
->nPos
+FTS5_DATA_ZERO_PADDING
);
3586 memset(&pBuf
->p
[pBuf
->n
+pSeg
->nPos
], 0, FTS5_DATA_ZERO_PADDING
);
3588 fts5ChunkIterate(p
, pSeg
, (void*)pBuf
, fts5PoslistCallback
);
3590 if( p
->pConfig
->eDetail
==FTS5_DETAIL_FULL
){
3591 PoslistCallbackCtx sCtx
;
3593 sCtx
.pColset
= pColset
;
3594 sCtx
.eState
= fts5IndexColsetTest(pColset
, 0);
3595 assert( sCtx
.eState
==0 || sCtx
.eState
==1 );
3596 fts5ChunkIterate(p
, pSeg
, (void*)&sCtx
, fts5PoslistFilterCallback
);
3598 PoslistOffsetsCtx sCtx
;
3599 memset(&sCtx
, 0, sizeof(sCtx
));
3601 sCtx
.pColset
= pColset
;
3602 fts5ChunkIterate(p
, pSeg
, (void*)&sCtx
, fts5PoslistOffsetsCallback
);
3609 ** Parameter pPos points to a buffer containing a position list, size nPos.
3610 ** This function filters it according to pColset (which must be non-NULL)
3611 ** and sets pIter->base.pData/nData to point to the new position list.
3612 ** If memory is required for the new position list, use buffer pIter->poslist.
3613 ** Or, if the new position list is a contiguous subset of the input, set
3614 ** pIter->base.pData/nData to point directly to it.
3616 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3617 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3618 ** before returning.
3620 static void fts5IndexExtractColset(
3622 Fts5Colset
*pColset
, /* Colset to filter on */
3623 const u8
*pPos
, int nPos
, /* Position list */
3626 if( *pRc
==SQLITE_OK
){
3628 const u8
*aCopy
= p
;
3629 const u8
*pEnd
= &p
[nPos
]; /* One byte past end of position list */
3633 if( pColset
->nCol
>1 && sqlite3Fts5BufferSize(pRc
, &pIter
->poslist
, nPos
) ){
3638 while( pColset
->aiCol
[i
]<iCurrent
){
3640 if( i
==pColset
->nCol
){
3641 pIter
->base
.pData
= pIter
->poslist
.p
;
3642 pIter
->base
.nData
= pIter
->poslist
.n
;
3647 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3648 ** not part of a varint */
3649 while( p
<pEnd
&& *p
!=0x01 ){
3650 while( *p
++ & 0x80 );
3653 if( pColset
->aiCol
[i
]==iCurrent
){
3654 if( pColset
->nCol
==1 ){
3655 pIter
->base
.pData
= aCopy
;
3656 pIter
->base
.nData
= p
-aCopy
;
3659 fts5BufferSafeAppendBlob(&pIter
->poslist
, aCopy
, p
-aCopy
);
3662 pIter
->base
.pData
= pIter
->poslist
.p
;
3663 pIter
->base
.nData
= pIter
->poslist
.n
;
3668 if( iCurrent
& 0x80 ){
3670 p
+= fts5GetVarint32(p
, iCurrent
);
3678 ** xSetOutputs callback used by detail=none tables.
3680 static void fts5IterSetOutputs_None(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3681 assert( pIter
->pIndex
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
3682 pIter
->base
.iRowid
= pSeg
->iRowid
;
3683 pIter
->base
.nData
= pSeg
->nPos
;
3687 ** xSetOutputs callback used by detail=full and detail=col tables when no
3688 ** column filters are specified.
3690 static void fts5IterSetOutputs_Nocolset(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3691 pIter
->base
.iRowid
= pSeg
->iRowid
;
3692 pIter
->base
.nData
= pSeg
->nPos
;
3694 assert( pIter
->pIndex
->pConfig
->eDetail
!=FTS5_DETAIL_NONE
);
3695 assert( pIter
->pColset
==0 );
3697 if( pSeg
->iLeafOffset
+pSeg
->nPos
<=pSeg
->pLeaf
->szLeaf
){
3698 /* All data is stored on the current page. Populate the output
3699 ** variables to point into the body of the page object. */
3700 pIter
->base
.pData
= &pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3702 /* The data is distributed over two or more pages. Copy it into the
3703 ** Fts5Iter.poslist buffer and then set the output pointer to point
3704 ** to this buffer. */
3705 fts5BufferZero(&pIter
->poslist
);
3706 fts5SegiterPoslist(pIter
->pIndex
, pSeg
, 0, &pIter
->poslist
);
3707 pIter
->base
.pData
= pIter
->poslist
.p
;
3712 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3713 ** against no columns at all).
3715 static void fts5IterSetOutputs_ZeroColset(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3717 pIter
->base
.nData
= 0;
3721 ** xSetOutputs callback used by detail=col when there is a column filter
3722 ** and there are 100 or more columns. Also called as a fallback from
3723 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3725 static void fts5IterSetOutputs_Col(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3726 fts5BufferZero(&pIter
->poslist
);
3727 fts5SegiterPoslist(pIter
->pIndex
, pSeg
, pIter
->pColset
, &pIter
->poslist
);
3728 pIter
->base
.iRowid
= pSeg
->iRowid
;
3729 pIter
->base
.pData
= pIter
->poslist
.p
;
3730 pIter
->base
.nData
= pIter
->poslist
.n
;
3734 ** xSetOutputs callback used when:
3737 ** * there is a column filter, and
3738 ** * the table contains 100 or fewer columns.
3740 ** The last point is to ensure all column numbers are stored as
3741 ** single-byte varints.
3743 static void fts5IterSetOutputs_Col100(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3745 assert( pIter
->pIndex
->pConfig
->eDetail
==FTS5_DETAIL_COLUMNS
);
3746 assert( pIter
->pColset
);
3748 if( pSeg
->iLeafOffset
+pSeg
->nPos
>pSeg
->pLeaf
->szLeaf
){
3749 fts5IterSetOutputs_Col(pIter
, pSeg
);
3751 u8
*a
= (u8
*)&pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3752 u8
*pEnd
= (u8
*)&a
[pSeg
->nPos
];
3754 int *aiCol
= pIter
->pColset
->aiCol
;
3755 int *aiColEnd
= &aiCol
[pIter
->pColset
->nCol
];
3757 u8
*aOut
= pIter
->poslist
.p
;
3760 pIter
->base
.iRowid
= pSeg
->iRowid
;
3763 iPrev
+= (int)a
++[0] - 2;
3764 while( *aiCol
<iPrev
){
3766 if( aiCol
==aiColEnd
) goto setoutputs_col_out
;
3768 if( *aiCol
==iPrev
){
3769 *aOut
++ = (u8
)((iPrev
- iPrevOut
) + 2);
3775 pIter
->base
.pData
= pIter
->poslist
.p
;
3776 pIter
->base
.nData
= aOut
- pIter
->poslist
.p
;
3781 ** xSetOutputs callback used by detail=full when there is a column filter.
3783 static void fts5IterSetOutputs_Full(Fts5Iter
*pIter
, Fts5SegIter
*pSeg
){
3784 Fts5Colset
*pColset
= pIter
->pColset
;
3785 pIter
->base
.iRowid
= pSeg
->iRowid
;
3787 assert( pIter
->pIndex
->pConfig
->eDetail
==FTS5_DETAIL_FULL
);
3790 if( pSeg
->iLeafOffset
+pSeg
->nPos
<=pSeg
->pLeaf
->szLeaf
){
3791 /* All data is stored on the current page. Populate the output
3792 ** variables to point into the body of the page object. */
3793 const u8
*a
= &pSeg
->pLeaf
->p
[pSeg
->iLeafOffset
];
3794 int *pRc
= &pIter
->pIndex
->rc
;
3795 fts5BufferZero(&pIter
->poslist
);
3796 fts5IndexExtractColset(pRc
, pColset
, a
, pSeg
->nPos
, pIter
);
3798 /* The data is distributed over two or more pages. Copy it into the
3799 ** Fts5Iter.poslist buffer and then set the output pointer to point
3800 ** to this buffer. */
3801 fts5BufferZero(&pIter
->poslist
);
3802 fts5SegiterPoslist(pIter
->pIndex
, pSeg
, pColset
, &pIter
->poslist
);
3803 pIter
->base
.pData
= pIter
->poslist
.p
;
3804 pIter
->base
.nData
= pIter
->poslist
.n
;
3808 static void fts5IterSetOutputCb(int *pRc
, Fts5Iter
*pIter
){
3809 assert( pIter
!=0 || (*pRc
)!=SQLITE_OK
);
3810 if( *pRc
==SQLITE_OK
){
3811 Fts5Config
*pConfig
= pIter
->pIndex
->pConfig
;
3812 if( pConfig
->eDetail
==FTS5_DETAIL_NONE
){
3813 pIter
->xSetOutputs
= fts5IterSetOutputs_None
;
3816 else if( pIter
->pColset
==0 ){
3817 pIter
->xSetOutputs
= fts5IterSetOutputs_Nocolset
;
3820 else if( pIter
->pColset
->nCol
==0 ){
3821 pIter
->xSetOutputs
= fts5IterSetOutputs_ZeroColset
;
3824 else if( pConfig
->eDetail
==FTS5_DETAIL_FULL
){
3825 pIter
->xSetOutputs
= fts5IterSetOutputs_Full
;
3829 assert( pConfig
->eDetail
==FTS5_DETAIL_COLUMNS
);
3830 if( pConfig
->nCol
<=100 ){
3831 pIter
->xSetOutputs
= fts5IterSetOutputs_Col100
;
3832 sqlite3Fts5BufferSize(pRc
, &pIter
->poslist
, pConfig
->nCol
);
3834 pIter
->xSetOutputs
= fts5IterSetOutputs_Col
;
3841 ** All the component segment-iterators of pIter have been set up. This
3842 ** functions finishes setup for iterator pIter itself.
3844 static void fts5MultiIterFinishSetup(Fts5Index
*p
, Fts5Iter
*pIter
){
3846 for(iIter
=pIter
->nSeg
-1; iIter
>0; iIter
--){
3848 if( (iEq
= fts5MultiIterDoCompare(pIter
, iIter
)) ){
3849 Fts5SegIter
*pSeg
= &pIter
->aSeg
[iEq
];
3850 if( p
->rc
==SQLITE_OK
) pSeg
->xNext(p
, pSeg
, 0);
3851 fts5MultiIterAdvanced(p
, pIter
, iEq
, iIter
);
3854 fts5MultiIterSetEof(pIter
);
3855 fts5AssertMultiIterSetup(p
, pIter
);
3857 if( (pIter
->bSkipEmpty
&& fts5MultiIterIsEmpty(p
, pIter
))
3858 || fts5MultiIterIsDeleted(pIter
)
3860 fts5MultiIterNext(p
, pIter
, 0, 0);
3861 }else if( pIter
->base
.bEof
==0 ){
3862 Fts5SegIter
*pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
3863 pIter
->xSetOutputs(pIter
, pSeg
);
3868 ** Allocate a new Fts5Iter object.
3870 ** The new object will be used to iterate through data in structure pStruct.
3871 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3872 ** is zero or greater, data from the first nSegment segments on level iLevel
3875 ** The iterator initially points to the first term/rowid entry in the
3878 static void fts5MultiIterNew(
3879 Fts5Index
*p
, /* FTS5 backend to iterate within */
3880 Fts5Structure
*pStruct
, /* Structure of specific index */
3881 int flags
, /* FTS5INDEX_QUERY_XXX flags */
3882 Fts5Colset
*pColset
, /* Colset to filter on (or NULL) */
3883 const u8
*pTerm
, int nTerm
, /* Term to seek to (or NULL/0) */
3884 int iLevel
, /* Level to iterate (-1 for all) */
3885 int nSegment
, /* Number of segments to merge (iLevel>=0) */
3886 Fts5Iter
**ppOut
/* New object */
3888 int nSeg
= 0; /* Number of segment-iters in use */
3889 int iIter
= 0; /* */
3890 int iSeg
; /* Used to iterate through segments */
3891 Fts5StructureLevel
*pLvl
;
3894 assert( (pTerm
==0 && nTerm
==0) || iLevel
<0 );
3896 /* Allocate space for the new multi-seg-iterator. */
3897 if( p
->rc
==SQLITE_OK
){
3899 assert( pStruct
->nSegment
==fts5StructureCountSegments(pStruct
) );
3900 nSeg
= pStruct
->nSegment
;
3901 nSeg
+= (p
->pHash
&& 0==(flags
& FTS5INDEX_QUERY_SKIPHASH
));
3903 nSeg
= MIN(pStruct
->aLevel
[iLevel
].nSeg
, nSegment
);
3906 *ppOut
= pNew
= fts5MultiIterAlloc(p
, nSeg
);
3908 assert( p
->rc
!=SQLITE_OK
);
3909 goto fts5MultiIterNew_post_check
;
3911 pNew
->bRev
= (0!=(flags
& FTS5INDEX_QUERY_DESC
));
3912 pNew
->bSkipEmpty
= (0!=(flags
& FTS5INDEX_QUERY_SKIPEMPTY
));
3913 pNew
->pColset
= pColset
;
3914 if( (flags
& FTS5INDEX_QUERY_NOOUTPUT
)==0 ){
3915 fts5IterSetOutputCb(&p
->rc
, pNew
);
3918 /* Initialize each of the component segment iterators. */
3919 if( p
->rc
==SQLITE_OK
){
3921 Fts5StructureLevel
*pEnd
= &pStruct
->aLevel
[pStruct
->nLevel
];
3922 if( p
->pHash
&& 0==(flags
& FTS5INDEX_QUERY_SKIPHASH
) ){
3923 /* Add a segment iterator for the current contents of the hash table. */
3924 Fts5SegIter
*pIter
= &pNew
->aSeg
[iIter
++];
3925 fts5SegIterHashInit(p
, pTerm
, nTerm
, flags
, pIter
);
3927 for(pLvl
=&pStruct
->aLevel
[0]; pLvl
<pEnd
; pLvl
++){
3928 for(iSeg
=pLvl
->nSeg
-1; iSeg
>=0; iSeg
--){
3929 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
3930 Fts5SegIter
*pIter
= &pNew
->aSeg
[iIter
++];
3932 fts5SegIterInit(p
, pSeg
, pIter
);
3934 fts5SegIterSeekInit(p
, pTerm
, nTerm
, flags
, pSeg
, pIter
);
3939 pLvl
= &pStruct
->aLevel
[iLevel
];
3940 for(iSeg
=nSeg
-1; iSeg
>=0; iSeg
--){
3941 fts5SegIterInit(p
, &pLvl
->aSeg
[iSeg
], &pNew
->aSeg
[iIter
++]);
3944 assert( iIter
==nSeg
);
3947 /* If the above was successful, each component iterator now points
3948 ** to the first entry in its segment. In this case initialize the
3949 ** aFirst[] array. Or, if an error has occurred, free the iterator
3950 ** object and set the output variable to NULL. */
3951 if( p
->rc
==SQLITE_OK
){
3952 fts5MultiIterFinishSetup(p
, pNew
);
3954 fts5MultiIterFree(pNew
);
3958 fts5MultiIterNew_post_check
:
3959 assert( (*ppOut
)!=0 || p
->rc
!=SQLITE_OK
);
3964 ** Create an Fts5Iter that iterates through the doclist provided
3965 ** as the second argument.
3967 static void fts5MultiIterNew2(
3968 Fts5Index
*p
, /* FTS5 backend to iterate within */
3969 Fts5Data
*pData
, /* Doclist to iterate through */
3970 int bDesc
, /* True for descending rowid order */
3971 Fts5Iter
**ppOut
/* New object */
3974 pNew
= fts5MultiIterAlloc(p
, 2);
3976 Fts5SegIter
*pIter
= &pNew
->aSeg
[1];
3977 pIter
->flags
= FTS5_SEGITER_ONETERM
;
3978 if( pData
->szLeaf
>0 ){
3979 pIter
->pLeaf
= pData
;
3980 pIter
->iLeafOffset
= fts5GetVarint(pData
->p
, (u64
*)&pIter
->iRowid
);
3981 pIter
->iEndofDoclist
= pData
->nn
;
3982 pNew
->aFirst
[1].iFirst
= 1;
3985 pIter
->flags
|= FTS5_SEGITER_REVERSE
;
3986 fts5SegIterReverseInitPage(p
, pIter
);
3988 fts5SegIterLoadNPos(p
, pIter
);
3992 pNew
->base
.bEof
= 1;
3994 fts5SegIterSetNext(p
, pIter
);
3999 fts5DataRelease(pData
);
4003 ** Return true if the iterator is at EOF or if an error has occurred.
4006 static int fts5MultiIterEof(Fts5Index
*p
, Fts5Iter
*pIter
){
4007 assert( pIter
!=0 || p
->rc
!=SQLITE_OK
);
4008 assert( p
->rc
!=SQLITE_OK
4009 || (pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
].pLeaf
==0)==pIter
->base
.bEof
4011 return (p
->rc
|| pIter
->base
.bEof
);
4015 ** Return the rowid of the entry that the iterator currently points
4016 ** to. If the iterator points to EOF when this function is called the
4017 ** results are undefined.
4019 static i64
fts5MultiIterRowid(Fts5Iter
*pIter
){
4020 assert( pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
].pLeaf
);
4021 return pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
].iRowid
;
4025 ** Move the iterator to the next entry at or following iMatch.
4027 static void fts5MultiIterNextFrom(
4034 fts5MultiIterNext(p
, pIter
, 1, iMatch
);
4035 if( fts5MultiIterEof(p
, pIter
) ) break;
4036 iRowid
= fts5MultiIterRowid(pIter
);
4037 if( pIter
->bRev
==0 && iRowid
>=iMatch
) break;
4038 if( pIter
->bRev
!=0 && iRowid
<=iMatch
) break;
4043 ** Return a pointer to a buffer containing the term associated with the
4044 ** entry that the iterator currently points to.
4046 static const u8
*fts5MultiIterTerm(Fts5Iter
*pIter
, int *pn
){
4047 Fts5SegIter
*p
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
4053 ** Allocate a new segment-id for the structure pStruct. The new segment
4054 ** id must be between 1 and 65335 inclusive, and must not be used by
4055 ** any currently existing segment. If a free segment id cannot be found,
4056 ** SQLITE_FULL is returned.
4058 ** If an error has already occurred, this function is a no-op. 0 is
4059 ** returned in this case.
4061 static int fts5AllocateSegid(Fts5Index
*p
, Fts5Structure
*pStruct
){
4064 if( p
->rc
==SQLITE_OK
){
4065 if( pStruct
->nSegment
>=FTS5_MAX_SEGMENT
){
4066 p
->rc
= SQLITE_FULL
;
4068 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
4069 ** array is 63 elements, or 252 bytes, in size. */
4070 u32 aUsed
[(FTS5_MAX_SEGMENT
+31) / 32];
4074 memset(aUsed
, 0, sizeof(aUsed
));
4075 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
4076 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
4077 int iId
= pStruct
->aLevel
[iLvl
].aSeg
[iSeg
].iSegid
;
4078 if( iId
<=FTS5_MAX_SEGMENT
&& iId
>0 ){
4079 aUsed
[(iId
-1) / 32] |= (u32
)1 << ((iId
-1) % 32);
4084 for(i
=0; aUsed
[i
]==0xFFFFFFFF; i
++);
4086 for(iSegid
=0; mask
& ((u32
)1 << iSegid
); iSegid
++);
4090 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
4091 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
4092 assert_nc( iSegid
!=pStruct
->aLevel
[iLvl
].aSeg
[iSeg
].iSegid
);
4095 assert_nc( iSegid
>0 && iSegid
<=FTS5_MAX_SEGMENT
);
4098 sqlite3_stmt
*pIdxSelect
= fts5IdxSelectStmt(p
);
4099 if( p
->rc
==SQLITE_OK
){
4100 u8 aBlob
[2] = {0xff, 0xff};
4101 sqlite3_bind_int(pIdxSelect
, 1, iSegid
);
4102 sqlite3_bind_blob(pIdxSelect
, 2, aBlob
, 2, SQLITE_STATIC
);
4103 assert_nc( sqlite3_step(pIdxSelect
)!=SQLITE_ROW
);
4104 p
->rc
= sqlite3_reset(pIdxSelect
);
4105 sqlite3_bind_null(pIdxSelect
, 2);
4116 ** Discard all data currently cached in the hash-tables.
4118 static void fts5IndexDiscardData(Fts5Index
*p
){
4119 assert( p
->pHash
|| p
->nPendingData
==0 );
4121 sqlite3Fts5HashClear(p
->pHash
);
4122 p
->nPendingData
= 0;
4124 p
->flushRc
= SQLITE_OK
;
4126 p
->nContentlessDelete
= 0;
4130 ** Return the size of the prefix, in bytes, that buffer
4131 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
4133 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
4134 ** than buffer (pOld/nOld).
4136 static int fts5PrefixCompress(int nOld
, const u8
*pOld
, const u8
*pNew
){
4138 for(i
=0; i
<nOld
; i
++){
4139 if( pOld
[i
]!=pNew
[i
] ) break;
4144 static void fts5WriteDlidxClear(
4146 Fts5SegWriter
*pWriter
,
4147 int bFlush
/* If true, write dlidx to disk */
4150 assert( bFlush
==0 || (pWriter
->nDlidx
>0 && pWriter
->aDlidx
[0].buf
.n
>0) );
4151 for(i
=0; i
<pWriter
->nDlidx
; i
++){
4152 Fts5DlidxWriter
*pDlidx
= &pWriter
->aDlidx
[i
];
4153 if( pDlidx
->buf
.n
==0 ) break;
4155 assert( pDlidx
->pgno
!=0 );
4157 FTS5_DLIDX_ROWID(pWriter
->iSegid
, i
, pDlidx
->pgno
),
4158 pDlidx
->buf
.p
, pDlidx
->buf
.n
4161 sqlite3Fts5BufferZero(&pDlidx
->buf
);
4162 pDlidx
->bPrevValid
= 0;
4167 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
4168 ** Any new array elements are zeroed before returning.
4170 static int fts5WriteDlidxGrow(
4172 Fts5SegWriter
*pWriter
,
4175 if( p
->rc
==SQLITE_OK
&& nLvl
>=pWriter
->nDlidx
){
4176 Fts5DlidxWriter
*aDlidx
= (Fts5DlidxWriter
*)sqlite3_realloc64(
4177 pWriter
->aDlidx
, sizeof(Fts5DlidxWriter
) * nLvl
4180 p
->rc
= SQLITE_NOMEM
;
4182 size_t nByte
= sizeof(Fts5DlidxWriter
) * (nLvl
- pWriter
->nDlidx
);
4183 memset(&aDlidx
[pWriter
->nDlidx
], 0, nByte
);
4184 pWriter
->aDlidx
= aDlidx
;
4185 pWriter
->nDlidx
= nLvl
;
4192 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
4193 ** enough, flush it to disk and return 1. Otherwise discard it and return
4196 static int fts5WriteFlushDlidx(Fts5Index
*p
, Fts5SegWriter
*pWriter
){
4199 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
4200 ** to the database, also write the doclist-index to disk. */
4201 if( pWriter
->aDlidx
[0].buf
.n
>0 && pWriter
->nEmpty
>=FTS5_MIN_DLIDX_SIZE
){
4204 fts5WriteDlidxClear(p
, pWriter
, bFlag
);
4205 pWriter
->nEmpty
= 0;
4210 ** This function is called whenever processing of the doclist for the
4211 ** last term on leaf page (pWriter->iBtPage) is completed.
4213 ** The doclist-index for that term is currently stored in-memory within the
4214 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
4215 ** writes it out to disk. Or, if it is too small to bother with, discards
4218 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
4220 static void fts5WriteFlushBtree(Fts5Index
*p
, Fts5SegWriter
*pWriter
){
4223 assert( pWriter
->iBtPage
|| pWriter
->nEmpty
==0 );
4224 if( pWriter
->iBtPage
==0 ) return;
4225 bFlag
= fts5WriteFlushDlidx(p
, pWriter
);
4227 if( p
->rc
==SQLITE_OK
){
4228 const char *z
= (pWriter
->btterm
.n
>0?(const char*)pWriter
->btterm
.p
:"");
4229 /* The following was already done in fts5WriteInit(): */
4230 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
4231 sqlite3_bind_blob(p
->pIdxWriter
, 2, z
, pWriter
->btterm
.n
, SQLITE_STATIC
);
4232 sqlite3_bind_int64(p
->pIdxWriter
, 3, bFlag
+ ((i64
)pWriter
->iBtPage
<<1));
4233 sqlite3_step(p
->pIdxWriter
);
4234 p
->rc
= sqlite3_reset(p
->pIdxWriter
);
4235 sqlite3_bind_null(p
->pIdxWriter
, 2);
4237 pWriter
->iBtPage
= 0;
4241 ** This is called once for each leaf page except the first that contains
4242 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
4243 ** is larger than all terms written to earlier leaves, and equal to or
4244 ** smaller than the first term on the new leaf.
4246 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
4247 ** has already occurred when this function is called, it is a no-op.
4249 static void fts5WriteBtreeTerm(
4250 Fts5Index
*p
, /* FTS5 backend object */
4251 Fts5SegWriter
*pWriter
, /* Writer object */
4252 int nTerm
, const u8
*pTerm
/* First term on new page */
4254 fts5WriteFlushBtree(p
, pWriter
);
4255 if( p
->rc
==SQLITE_OK
){
4256 fts5BufferSet(&p
->rc
, &pWriter
->btterm
, nTerm
, pTerm
);
4257 pWriter
->iBtPage
= pWriter
->writer
.pgno
;
4262 ** This function is called when flushing a leaf page that contains no
4263 ** terms at all to disk.
4265 static void fts5WriteBtreeNoTerm(
4266 Fts5Index
*p
, /* FTS5 backend object */
4267 Fts5SegWriter
*pWriter
/* Writer object */
4269 /* If there were no rowids on the leaf page either and the doclist-index
4270 ** has already been started, append an 0x00 byte to it. */
4271 if( pWriter
->bFirstRowidInPage
&& pWriter
->aDlidx
[0].buf
.n
>0 ){
4272 Fts5DlidxWriter
*pDlidx
= &pWriter
->aDlidx
[0];
4273 assert( pDlidx
->bPrevValid
);
4274 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, 0);
4277 /* Increment the "number of sequential leaves without a term" counter. */
4281 static i64
fts5DlidxExtractFirstRowid(Fts5Buffer
*pBuf
){
4285 iOff
= 1 + fts5GetVarint(&pBuf
->p
[1], (u64
*)&iRowid
);
4286 fts5GetVarint(&pBuf
->p
[iOff
], (u64
*)&iRowid
);
4291 ** Rowid iRowid has just been appended to the current leaf page. It is the
4292 ** first on the page. This function appends an appropriate entry to the current
4295 static void fts5WriteDlidxAppend(
4297 Fts5SegWriter
*pWriter
,
4303 for(i
=0; p
->rc
==SQLITE_OK
&& bDone
==0; i
++){
4305 Fts5DlidxWriter
*pDlidx
= &pWriter
->aDlidx
[i
];
4307 if( pDlidx
->buf
.n
>=p
->pConfig
->pgsz
){
4308 /* The current doclist-index page is full. Write it to disk and push
4309 ** a copy of iRowid (which will become the first rowid on the next
4310 ** doclist-index leaf page) up into the next level of the b-tree
4311 ** hierarchy. If the node being flushed is currently the root node,
4312 ** also push its first rowid upwards. */
4313 pDlidx
->buf
.p
[0] = 0x01; /* Not the root node */
4315 FTS5_DLIDX_ROWID(pWriter
->iSegid
, i
, pDlidx
->pgno
),
4316 pDlidx
->buf
.p
, pDlidx
->buf
.n
4318 fts5WriteDlidxGrow(p
, pWriter
, i
+2);
4319 pDlidx
= &pWriter
->aDlidx
[i
];
4320 if( p
->rc
==SQLITE_OK
&& pDlidx
[1].buf
.n
==0 ){
4321 i64 iFirst
= fts5DlidxExtractFirstRowid(&pDlidx
->buf
);
4323 /* This was the root node. Push its first rowid up to the new root. */
4324 pDlidx
[1].pgno
= pDlidx
->pgno
;
4325 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
[1].buf
, 0);
4326 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
[1].buf
, pDlidx
->pgno
);
4327 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
[1].buf
, iFirst
);
4328 pDlidx
[1].bPrevValid
= 1;
4329 pDlidx
[1].iPrev
= iFirst
;
4332 sqlite3Fts5BufferZero(&pDlidx
->buf
);
4333 pDlidx
->bPrevValid
= 0;
4339 if( pDlidx
->bPrevValid
){
4340 iVal
= (u64
)iRowid
- (u64
)pDlidx
->iPrev
;
4342 i64 iPgno
= (i
==0 ? pWriter
->writer
.pgno
: pDlidx
[-1].pgno
);
4343 assert( pDlidx
->buf
.n
==0 );
4344 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, !bDone
);
4345 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, iPgno
);
4349 sqlite3Fts5BufferAppendVarint(&p
->rc
, &pDlidx
->buf
, iVal
);
4350 pDlidx
->bPrevValid
= 1;
4351 pDlidx
->iPrev
= iRowid
;
4355 static void fts5WriteFlushLeaf(Fts5Index
*p
, Fts5SegWriter
*pWriter
){
4356 static const u8 zero
[] = { 0x00, 0x00, 0x00, 0x00 };
4357 Fts5PageWriter
*pPage
= &pWriter
->writer
;
4360 assert( (pPage
->pgidx
.n
==0)==(pWriter
->bFirstTermInPage
) );
4362 /* Set the szLeaf header field. */
4363 assert( 0==fts5GetU16(&pPage
->buf
.p
[2]) );
4364 fts5PutU16(&pPage
->buf
.p
[2], (u16
)pPage
->buf
.n
);
4366 if( pWriter
->bFirstTermInPage
){
4367 /* No term was written to this page. */
4368 assert( pPage
->pgidx
.n
==0 );
4369 fts5WriteBtreeNoTerm(p
, pWriter
);
4371 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
4372 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, pPage
->pgidx
.n
, pPage
->pgidx
.p
);
4375 /* Write the page out to disk */
4376 iRowid
= FTS5_SEGMENT_ROWID(pWriter
->iSegid
, pPage
->pgno
);
4377 fts5DataWrite(p
, iRowid
, pPage
->buf
.p
, pPage
->buf
.n
);
4379 /* Initialize the next page. */
4380 fts5BufferZero(&pPage
->buf
);
4381 fts5BufferZero(&pPage
->pgidx
);
4382 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, 4, zero
);
4383 pPage
->iPrevPgidx
= 0;
4386 /* Increase the leaves written counter */
4387 pWriter
->nLeafWritten
++;
4389 /* The new leaf holds no terms or rowids */
4390 pWriter
->bFirstTermInPage
= 1;
4391 pWriter
->bFirstRowidInPage
= 1;
4395 ** Append term pTerm/nTerm to the segment being written by the writer passed
4396 ** as the second argument.
4398 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4399 ** already occurred, this function is a no-op.
4401 static void fts5WriteAppendTerm(
4403 Fts5SegWriter
*pWriter
,
4404 int nTerm
, const u8
*pTerm
4406 int nPrefix
; /* Bytes of prefix compression for term */
4407 Fts5PageWriter
*pPage
= &pWriter
->writer
;
4408 Fts5Buffer
*pPgidx
= &pWriter
->writer
.pgidx
;
4409 int nMin
= MIN(pPage
->term
.n
, nTerm
);
4411 assert( p
->rc
==SQLITE_OK
);
4412 assert( pPage
->buf
.n
>=4 );
4413 assert( pPage
->buf
.n
>4 || pWriter
->bFirstTermInPage
);
4415 /* If the current leaf page is full, flush it to disk. */
4416 if( (pPage
->buf
.n
+ pPgidx
->n
+ nTerm
+ 2)>=p
->pConfig
->pgsz
){
4417 if( pPage
->buf
.n
>4 ){
4418 fts5WriteFlushLeaf(p
, pWriter
);
4419 if( p
->rc
!=SQLITE_OK
) return;
4421 fts5BufferGrow(&p
->rc
, &pPage
->buf
, nTerm
+FTS5_DATA_PADDING
);
4424 /* TODO1: Updating pgidx here. */
4425 pPgidx
->n
+= sqlite3Fts5PutVarint(
4426 &pPgidx
->p
[pPgidx
->n
], pPage
->buf
.n
- pPage
->iPrevPgidx
4428 pPage
->iPrevPgidx
= pPage
->buf
.n
;
4430 fts5PutU16(&pPgidx
->p
[pPgidx
->n
], pPage
->buf
.n
);
4434 if( pWriter
->bFirstTermInPage
){
4436 if( pPage
->pgno
!=1 ){
4437 /* This is the first term on a leaf that is not the leftmost leaf in
4438 ** the segment b-tree. In this case it is necessary to add a term to
4439 ** the b-tree hierarchy that is (a) larger than the largest term
4440 ** already written to the segment and (b) smaller than or equal to
4441 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4442 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4445 ** Usually, the previous term is available in pPage->term. The exception
4446 ** is if this is the first term written in an incremental-merge step.
4447 ** In this case the previous term is not available, so just write a
4448 ** copy of (pTerm/nTerm) into the parent node. This is slightly
4449 ** inefficient, but still correct. */
4451 if( pPage
->term
.n
){
4452 n
= 1 + fts5PrefixCompress(nMin
, pPage
->term
.p
, pTerm
);
4454 fts5WriteBtreeTerm(p
, pWriter
, n
, pTerm
);
4455 if( p
->rc
!=SQLITE_OK
) return;
4456 pPage
= &pWriter
->writer
;
4459 nPrefix
= fts5PrefixCompress(nMin
, pPage
->term
.p
, pTerm
);
4460 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
, nPrefix
);
4463 /* Append the number of bytes of new data, then the term data itself
4465 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
, nTerm
- nPrefix
);
4466 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, nTerm
- nPrefix
, &pTerm
[nPrefix
]);
4468 /* Update the Fts5PageWriter.term field. */
4469 fts5BufferSet(&p
->rc
, &pPage
->term
, nTerm
, pTerm
);
4470 pWriter
->bFirstTermInPage
= 0;
4472 pWriter
->bFirstRowidInPage
= 0;
4473 pWriter
->bFirstRowidInDoclist
= 1;
4475 assert( p
->rc
|| (pWriter
->nDlidx
>0 && pWriter
->aDlidx
[0].buf
.n
==0) );
4476 pWriter
->aDlidx
[0].pgno
= pPage
->pgno
;
4480 ** Append a rowid and position-list size field to the writers output.
4482 static void fts5WriteAppendRowid(
4484 Fts5SegWriter
*pWriter
,
4487 if( p
->rc
==SQLITE_OK
){
4488 Fts5PageWriter
*pPage
= &pWriter
->writer
;
4490 if( (pPage
->buf
.n
+ pPage
->pgidx
.n
)>=p
->pConfig
->pgsz
){
4491 fts5WriteFlushLeaf(p
, pWriter
);
4494 /* If this is to be the first rowid written to the page, set the
4495 ** rowid-pointer in the page-header. Also append a value to the dlidx
4496 ** buffer, in case a doclist-index is required. */
4497 if( pWriter
->bFirstRowidInPage
){
4498 fts5PutU16(pPage
->buf
.p
, (u16
)pPage
->buf
.n
);
4499 fts5WriteDlidxAppend(p
, pWriter
, iRowid
);
4502 /* Write the rowid. */
4503 if( pWriter
->bFirstRowidInDoclist
|| pWriter
->bFirstRowidInPage
){
4504 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
, iRowid
);
4506 assert_nc( p
->rc
|| iRowid
>pWriter
->iPrevRowid
);
4507 fts5BufferAppendVarint(&p
->rc
, &pPage
->buf
,
4508 (u64
)iRowid
- (u64
)pWriter
->iPrevRowid
4511 pWriter
->iPrevRowid
= iRowid
;
4512 pWriter
->bFirstRowidInDoclist
= 0;
4513 pWriter
->bFirstRowidInPage
= 0;
4517 static void fts5WriteAppendPoslistData(
4519 Fts5SegWriter
*pWriter
,
4523 Fts5PageWriter
*pPage
= &pWriter
->writer
;
4524 const u8
*a
= aData
;
4527 assert( p
->pConfig
->pgsz
>0 || p
->rc
!=SQLITE_OK
);
4528 while( p
->rc
==SQLITE_OK
4529 && (pPage
->buf
.n
+ pPage
->pgidx
.n
+ n
)>=p
->pConfig
->pgsz
4531 int nReq
= p
->pConfig
->pgsz
- pPage
->buf
.n
- pPage
->pgidx
.n
;
4533 while( nCopy
<nReq
){
4535 nCopy
+= fts5GetVarint(&a
[nCopy
], (u64
*)&dummy
);
4537 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, nCopy
, a
);
4540 fts5WriteFlushLeaf(p
, pWriter
);
4543 fts5BufferAppendBlob(&p
->rc
, &pPage
->buf
, n
, a
);
4548 ** Flush any data cached by the writer object to the database. Free any
4549 ** allocations associated with the writer.
4551 static void fts5WriteFinish(
4553 Fts5SegWriter
*pWriter
, /* Writer object */
4554 int *pnLeaf
/* OUT: Number of leaf pages in b-tree */
4557 Fts5PageWriter
*pLeaf
= &pWriter
->writer
;
4558 if( p
->rc
==SQLITE_OK
){
4559 assert( pLeaf
->pgno
>=1 );
4560 if( pLeaf
->buf
.n
>4 ){
4561 fts5WriteFlushLeaf(p
, pWriter
);
4563 *pnLeaf
= pLeaf
->pgno
-1;
4564 if( pLeaf
->pgno
>1 ){
4565 fts5WriteFlushBtree(p
, pWriter
);
4568 fts5BufferFree(&pLeaf
->term
);
4569 fts5BufferFree(&pLeaf
->buf
);
4570 fts5BufferFree(&pLeaf
->pgidx
);
4571 fts5BufferFree(&pWriter
->btterm
);
4573 for(i
=0; i
<pWriter
->nDlidx
; i
++){
4574 sqlite3Fts5BufferFree(&pWriter
->aDlidx
[i
].buf
);
4576 sqlite3_free(pWriter
->aDlidx
);
4579 static void fts5WriteInit(
4581 Fts5SegWriter
*pWriter
,
4584 const int nBuffer
= p
->pConfig
->pgsz
+ FTS5_DATA_PADDING
;
4586 memset(pWriter
, 0, sizeof(Fts5SegWriter
));
4587 pWriter
->iSegid
= iSegid
;
4589 fts5WriteDlidxGrow(p
, pWriter
, 1);
4590 pWriter
->writer
.pgno
= 1;
4591 pWriter
->bFirstTermInPage
= 1;
4592 pWriter
->iBtPage
= 1;
4594 assert( pWriter
->writer
.buf
.n
==0 );
4595 assert( pWriter
->writer
.pgidx
.n
==0 );
4597 /* Grow the two buffers to pgsz + padding bytes in size. */
4598 sqlite3Fts5BufferSize(&p
->rc
, &pWriter
->writer
.pgidx
, nBuffer
);
4599 sqlite3Fts5BufferSize(&p
->rc
, &pWriter
->writer
.buf
, nBuffer
);
4601 if( p
->pIdxWriter
==0 ){
4602 Fts5Config
*pConfig
= p
->pConfig
;
4603 fts5IndexPrepareStmt(p
, &p
->pIdxWriter
, sqlite3_mprintf(
4604 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4605 pConfig
->zDb
, pConfig
->zName
4609 if( p
->rc
==SQLITE_OK
){
4610 /* Initialize the 4-byte leaf-page header to 0x00. */
4611 memset(pWriter
->writer
.buf
.p
, 0, 4);
4612 pWriter
->writer
.buf
.n
= 4;
4614 /* Bind the current output segment id to the index-writer. This is an
4615 ** optimization over binding the same value over and over as rows are
4616 ** inserted into %_idx by the current writer. */
4617 sqlite3_bind_int(p
->pIdxWriter
, 1, pWriter
->iSegid
);
4622 ** Iterator pIter was used to iterate through the input segments of on an
4623 ** incremental merge operation. This function is called if the incremental
4624 ** merge step has finished but the input has not been completely exhausted.
4626 static void fts5TrimSegments(Fts5Index
*p
, Fts5Iter
*pIter
){
4629 memset(&buf
, 0, sizeof(Fts5Buffer
));
4630 for(i
=0; i
<pIter
->nSeg
&& p
->rc
==SQLITE_OK
; i
++){
4631 Fts5SegIter
*pSeg
= &pIter
->aSeg
[i
];
4632 if( pSeg
->pSeg
==0 ){
4634 }else if( pSeg
->pLeaf
==0 ){
4635 /* All keys from this input segment have been transfered to the output.
4636 ** Set both the first and last page-numbers to 0 to indicate that the
4637 ** segment is now empty. */
4638 pSeg
->pSeg
->pgnoLast
= 0;
4639 pSeg
->pSeg
->pgnoFirst
= 0;
4641 int iOff
= pSeg
->iTermLeafOffset
; /* Offset on new first leaf page */
4644 int iId
= pSeg
->pSeg
->iSegid
;
4645 u8 aHdr
[4] = {0x00, 0x00, 0x00, 0x00};
4647 iLeafRowid
= FTS5_SEGMENT_ROWID(iId
, pSeg
->iTermLeafPgno
);
4648 pData
= fts5LeafRead(p
, iLeafRowid
);
4650 if( iOff
>pData
->szLeaf
){
4651 /* This can occur if the pages that the segments occupy overlap - if
4652 ** a single page has been assigned to more than one segment. In
4653 ** this case a prior iteration of this loop may have corrupted the
4654 ** segment currently being trimmed. */
4655 p
->rc
= FTS5_CORRUPT
;
4657 fts5BufferZero(&buf
);
4658 fts5BufferGrow(&p
->rc
, &buf
, pData
->nn
);
4659 fts5BufferAppendBlob(&p
->rc
, &buf
, sizeof(aHdr
), aHdr
);
4660 fts5BufferAppendVarint(&p
->rc
, &buf
, pSeg
->term
.n
);
4661 fts5BufferAppendBlob(&p
->rc
, &buf
, pSeg
->term
.n
, pSeg
->term
.p
);
4662 fts5BufferAppendBlob(&p
->rc
, &buf
,pData
->szLeaf
-iOff
,&pData
->p
[iOff
]);
4663 if( p
->rc
==SQLITE_OK
){
4664 /* Set the szLeaf field */
4665 fts5PutU16(&buf
.p
[2], (u16
)buf
.n
);
4668 /* Set up the new page-index array */
4669 fts5BufferAppendVarint(&p
->rc
, &buf
, 4);
4670 if( pSeg
->iLeafPgno
==pSeg
->iTermLeafPgno
4671 && pSeg
->iEndofDoclist
<pData
->szLeaf
4672 && pSeg
->iPgidxOff
<=pData
->nn
4674 int nDiff
= pData
->szLeaf
- pSeg
->iEndofDoclist
;
4675 fts5BufferAppendVarint(&p
->rc
, &buf
, buf
.n
- 1 - nDiff
- 4);
4676 fts5BufferAppendBlob(&p
->rc
, &buf
,
4677 pData
->nn
- pSeg
->iPgidxOff
, &pData
->p
[pSeg
->iPgidxOff
]
4681 pSeg
->pSeg
->pgnoFirst
= pSeg
->iTermLeafPgno
;
4682 fts5DataDelete(p
, FTS5_SEGMENT_ROWID(iId
, 1), iLeafRowid
);
4683 fts5DataWrite(p
, iLeafRowid
, buf
.p
, buf
.n
);
4685 fts5DataRelease(pData
);
4689 fts5BufferFree(&buf
);
4692 static void fts5MergeChunkCallback(
4695 const u8
*pChunk
, int nChunk
4697 Fts5SegWriter
*pWriter
= (Fts5SegWriter
*)pCtx
;
4698 fts5WriteAppendPoslistData(p
, pWriter
, pChunk
, nChunk
);
4704 static void fts5IndexMergeLevel(
4705 Fts5Index
*p
, /* FTS5 backend object */
4706 Fts5Structure
**ppStruct
, /* IN/OUT: Stucture of index */
4707 int iLvl
, /* Level to read input from */
4708 int *pnRem
/* Write up to this many output leaves */
4710 Fts5Structure
*pStruct
= *ppStruct
;
4711 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
4712 Fts5StructureLevel
*pLvlOut
;
4713 Fts5Iter
*pIter
= 0; /* Iterator to read input data */
4714 int nRem
= pnRem
? *pnRem
: 0; /* Output leaf pages left to write */
4715 int nInput
; /* Number of input segments */
4716 Fts5SegWriter writer
; /* Writer object */
4717 Fts5StructureSegment
*pSeg
; /* Output segment */
4719 int bOldest
; /* True if the output segment is the oldest */
4720 int eDetail
= p
->pConfig
->eDetail
;
4721 const int flags
= FTS5INDEX_QUERY_NOOUTPUT
;
4722 int bTermWritten
= 0; /* True if current term already output */
4724 assert( iLvl
<pStruct
->nLevel
);
4725 assert( pLvl
->nMerge
<=pLvl
->nSeg
);
4727 memset(&writer
, 0, sizeof(Fts5SegWriter
));
4728 memset(&term
, 0, sizeof(Fts5Buffer
));
4730 pLvlOut
= &pStruct
->aLevel
[iLvl
+1];
4731 assert( pLvlOut
->nSeg
>0 );
4732 nInput
= pLvl
->nMerge
;
4733 pSeg
= &pLvlOut
->aSeg
[pLvlOut
->nSeg
-1];
4735 fts5WriteInit(p
, &writer
, pSeg
->iSegid
);
4736 writer
.writer
.pgno
= pSeg
->pgnoLast
+1;
4739 int iSegid
= fts5AllocateSegid(p
, pStruct
);
4741 /* Extend the Fts5Structure object as required to ensure the output
4742 ** segment exists. */
4743 if( iLvl
==pStruct
->nLevel
-1 ){
4744 fts5StructureAddLevel(&p
->rc
, ppStruct
);
4745 pStruct
= *ppStruct
;
4747 fts5StructureExtendLevel(&p
->rc
, pStruct
, iLvl
+1, 1, 0);
4749 pLvl
= &pStruct
->aLevel
[iLvl
];
4750 pLvlOut
= &pStruct
->aLevel
[iLvl
+1];
4752 fts5WriteInit(p
, &writer
, iSegid
);
4754 /* Add the new segment to the output level */
4755 pSeg
= &pLvlOut
->aSeg
[pLvlOut
->nSeg
];
4757 pSeg
->pgnoFirst
= 1;
4758 pSeg
->iSegid
= iSegid
;
4759 pStruct
->nSegment
++;
4761 /* Read input from all segments in the input level */
4762 nInput
= pLvl
->nSeg
;
4764 /* Set the range of origins that will go into the output segment. */
4765 if( pStruct
->nOriginCntr
>0 ){
4766 pSeg
->iOrigin1
= pLvl
->aSeg
[0].iOrigin1
;
4767 pSeg
->iOrigin2
= pLvl
->aSeg
[pLvl
->nSeg
-1].iOrigin2
;
4770 bOldest
= (pLvlOut
->nSeg
==1 && pStruct
->nLevel
==iLvl
+2);
4773 for(fts5MultiIterNew(p
, pStruct
, flags
, 0, 0, 0, iLvl
, nInput
, &pIter
);
4774 fts5MultiIterEof(p
, pIter
)==0;
4775 fts5MultiIterNext(p
, pIter
, 0, 0)
4777 Fts5SegIter
*pSegIter
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
4778 int nPos
; /* position-list size field value */
4782 pTerm
= fts5MultiIterTerm(pIter
, &nTerm
);
4783 if( nTerm
!=term
.n
|| fts5Memcmp(pTerm
, term
.p
, nTerm
) ){
4784 if( pnRem
&& writer
.nLeafWritten
>nRem
){
4787 fts5BufferSet(&p
->rc
, &term
, nTerm
, pTerm
);
4791 /* Check for key annihilation. */
4792 if( pSegIter
->nPos
==0 && (bOldest
|| pSegIter
->bDel
==0) ) continue;
4794 if( p
->rc
==SQLITE_OK
&& bTermWritten
==0 ){
4795 /* This is a new term. Append a term to the output segment. */
4796 fts5WriteAppendTerm(p
, &writer
, nTerm
, pTerm
);
4800 /* Append the rowid to the output */
4801 /* WRITEPOSLISTSIZE */
4802 fts5WriteAppendRowid(p
, &writer
, fts5MultiIterRowid(pIter
));
4804 if( eDetail
==FTS5_DETAIL_NONE
){
4805 if( pSegIter
->bDel
){
4806 fts5BufferAppendVarint(&p
->rc
, &writer
.writer
.buf
, 0);
4807 if( pSegIter
->nPos
>0 ){
4808 fts5BufferAppendVarint(&p
->rc
, &writer
.writer
.buf
, 0);
4812 /* Append the position-list data to the output */
4813 nPos
= pSegIter
->nPos
*2 + pSegIter
->bDel
;
4814 fts5BufferAppendVarint(&p
->rc
, &writer
.writer
.buf
, nPos
);
4815 fts5ChunkIterate(p
, pSegIter
, (void*)&writer
, fts5MergeChunkCallback
);
4819 /* Flush the last leaf page to disk. Set the output segment b-tree height
4820 ** and last leaf page number at the same time. */
4821 fts5WriteFinish(p
, &writer
, &pSeg
->pgnoLast
);
4823 assert( pIter
!=0 || p
->rc
!=SQLITE_OK
);
4824 if( fts5MultiIterEof(p
, pIter
) ){
4827 /* Remove the redundant segments from the %_data table */
4828 assert( pSeg
->nEntry
==0 );
4829 for(i
=0; i
<nInput
; i
++){
4830 Fts5StructureSegment
*pOld
= &pLvl
->aSeg
[i
];
4831 pSeg
->nEntry
+= (pOld
->nEntry
- pOld
->nEntryTombstone
);
4832 fts5DataRemoveSegment(p
, pOld
);
4835 /* Remove the redundant segments from the input level */
4836 if( pLvl
->nSeg
!=nInput
){
4837 int nMove
= (pLvl
->nSeg
- nInput
) * sizeof(Fts5StructureSegment
);
4838 memmove(pLvl
->aSeg
, &pLvl
->aSeg
[nInput
], nMove
);
4840 pStruct
->nSegment
-= nInput
;
4841 pLvl
->nSeg
-= nInput
;
4843 if( pSeg
->pgnoLast
==0 ){
4845 pStruct
->nSegment
--;
4848 assert( pSeg
->pgnoLast
>0 );
4849 fts5TrimSegments(p
, pIter
);
4850 pLvl
->nMerge
= nInput
;
4853 fts5MultiIterFree(pIter
);
4854 fts5BufferFree(&term
);
4855 if( pnRem
) *pnRem
-= writer
.nLeafWritten
;
4859 ** If this is not a contentless_delete=1 table, or if the 'deletemerge'
4860 ** configuration option is set to 0, then this function always returns -1.
4861 ** Otherwise, it searches the structure object passed as the second argument
4862 ** for a level suitable for merging due to having a large number of
4863 ** tombstones in the tombstone hash. If one is found, its index is returned.
4864 ** Otherwise, if there is no suitable level, -1.
4866 static int fts5IndexFindDeleteMerge(Fts5Index
*p
, Fts5Structure
*pStruct
){
4867 Fts5Config
*pConfig
= p
->pConfig
;
4869 if( pConfig
->bContentlessDelete
&& pConfig
->nDeleteMerge
>0 ){
4873 for(ii
=0; ii
<pStruct
->nLevel
; ii
++){
4874 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[ii
];
4878 for(iSeg
=0; iSeg
<pLvl
->nSeg
; iSeg
++){
4879 nEntry
+= pLvl
->aSeg
[iSeg
].nEntry
;
4880 nTomb
+= pLvl
->aSeg
[iSeg
].nEntryTombstone
;
4882 assert_nc( nEntry
>0 || pLvl
->nSeg
==0 );
4884 int nPercent
= (nTomb
* 100) / nEntry
;
4885 if( nPercent
>=pConfig
->nDeleteMerge
&& nPercent
>nBest
){
4896 ** Do up to nPg pages of automerge work on the index.
4898 ** Return true if any changes were actually made, or false otherwise.
4900 static int fts5IndexMerge(
4901 Fts5Index
*p
, /* FTS5 backend object */
4902 Fts5Structure
**ppStruct
, /* IN/OUT: Current structure of index */
4903 int nPg
, /* Pages of work to do */
4904 int nMin
/* Minimum number of segments to merge */
4908 Fts5Structure
*pStruct
= *ppStruct
;
4909 while( nRem
>0 && p
->rc
==SQLITE_OK
){
4910 int iLvl
; /* To iterate through levels */
4911 int iBestLvl
= 0; /* Level offering the most input segments */
4912 int nBest
= 0; /* Number of input segments on best level */
4914 /* Set iBestLvl to the level to read input segments from. Or to -1 if
4915 ** there is no level suitable to merge segments from. */
4916 assert( pStruct
->nLevel
>0 );
4917 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
4918 Fts5StructureLevel
*pLvl
= &pStruct
->aLevel
[iLvl
];
4920 if( pLvl
->nMerge
>nBest
){
4926 if( pLvl
->nSeg
>nBest
){
4932 iBestLvl
= fts5IndexFindDeleteMerge(p
, pStruct
);
4935 if( iBestLvl
<0 ) break;
4937 fts5IndexMergeLevel(p
, &pStruct
, iBestLvl
, &nRem
);
4938 if( p
->rc
==SQLITE_OK
&& pStruct
->aLevel
[iBestLvl
].nMerge
==0 ){
4939 fts5StructurePromote(p
, iBestLvl
+1, pStruct
);
4942 if( nMin
==1 ) nMin
= 2;
4944 *ppStruct
= pStruct
;
4949 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4950 ** segment. This function updates the write-counter accordingly and, if
4951 ** necessary, performs incremental merge work.
4953 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4954 ** already occurred, this function is a no-op.
4956 static void fts5IndexAutomerge(
4957 Fts5Index
*p
, /* FTS5 backend object */
4958 Fts5Structure
**ppStruct
, /* IN/OUT: Current structure of index */
4959 int nLeaf
/* Number of output leaves just written */
4961 if( p
->rc
==SQLITE_OK
&& p
->pConfig
->nAutomerge
>0 && ALWAYS((*ppStruct
)!=0) ){
4962 Fts5Structure
*pStruct
= *ppStruct
;
4963 u64 nWrite
; /* Initial value of write-counter */
4964 int nWork
; /* Number of work-quanta to perform */
4965 int nRem
; /* Number of leaf pages left to write */
4967 /* Update the write-counter. While doing so, set nWork. */
4968 nWrite
= pStruct
->nWriteCounter
;
4969 nWork
= (int)(((nWrite
+ nLeaf
) / p
->nWorkUnit
) - (nWrite
/ p
->nWorkUnit
));
4970 pStruct
->nWriteCounter
+= nLeaf
;
4971 nRem
= (int)(p
->nWorkUnit
* nWork
* pStruct
->nLevel
);
4973 fts5IndexMerge(p
, ppStruct
, nRem
, p
->pConfig
->nAutomerge
);
4977 static void fts5IndexCrisismerge(
4978 Fts5Index
*p
, /* FTS5 backend object */
4979 Fts5Structure
**ppStruct
/* IN/OUT: Current structure of index */
4981 const int nCrisis
= p
->pConfig
->nCrisisMerge
;
4982 Fts5Structure
*pStruct
= *ppStruct
;
4983 if( pStruct
&& pStruct
->nLevel
>0 ){
4985 while( p
->rc
==SQLITE_OK
&& pStruct
->aLevel
[iLvl
].nSeg
>=nCrisis
){
4986 fts5IndexMergeLevel(p
, &pStruct
, iLvl
, 0);
4987 assert( p
->rc
!=SQLITE_OK
|| pStruct
->nLevel
>(iLvl
+1) );
4988 fts5StructurePromote(p
, iLvl
+1, pStruct
);
4991 *ppStruct
= pStruct
;
4995 static int fts5IndexReturn(Fts5Index
*p
){
5001 typedef struct Fts5FlushCtx Fts5FlushCtx
;
5002 struct Fts5FlushCtx
{
5004 Fts5SegWriter writer
;
5008 ** Buffer aBuf[] contains a list of varints, all small enough to fit
5009 ** in a 32-bit integer. Return the size of the largest prefix of this
5010 ** list nMax bytes or less in size.
5012 static int fts5PoslistPrefix(const u8
*aBuf
, int nMax
){
5015 ret
= fts5GetVarint32(aBuf
, dummy
);
5018 int i
= fts5GetVarint32(&aBuf
[ret
], dummy
);
5019 if( (ret
+ i
) > nMax
) break;
5027 ** Execute the SQL statement:
5029 ** DELETE FROM %_idx WHERE (segid, (pgno/2)) = ($iSegid, $iPgno);
5031 ** This is used when a secure-delete operation removes the last term
5032 ** from a segment leaf page. In that case the %_idx entry is removed
5033 ** too. This is done to ensure that if all instances of a token are
5034 ** removed from an fts5 database in secure-delete mode, no trace of
5035 ** the token itself remains in the database.
5037 static void fts5SecureDeleteIdxEntry(
5038 Fts5Index
*p
, /* FTS5 backend object */
5039 int iSegid
, /* Id of segment to delete entry for */
5040 int iPgno
/* Page number within segment */
5043 assert( p
->pConfig
->iVersion
==FTS5_CURRENT_VERSION_SECUREDELETE
);
5044 if( p
->pDeleteFromIdx
==0 ){
5045 fts5IndexPrepareStmt(p
, &p
->pDeleteFromIdx
, sqlite3_mprintf(
5046 "DELETE FROM '%q'.'%q_idx' WHERE (segid, (pgno/2)) = (?1, ?2)",
5047 p
->pConfig
->zDb
, p
->pConfig
->zName
5050 if( p
->rc
==SQLITE_OK
){
5051 sqlite3_bind_int(p
->pDeleteFromIdx
, 1, iSegid
);
5052 sqlite3_bind_int(p
->pDeleteFromIdx
, 2, iPgno
);
5053 sqlite3_step(p
->pDeleteFromIdx
);
5054 p
->rc
= sqlite3_reset(p
->pDeleteFromIdx
);
5060 ** This is called when a secure-delete operation removes a position-list
5061 ** that overflows onto segment page iPgno of segment pSeg. This function
5062 ** rewrites node iPgno, and possibly one or more of its right-hand peers,
5063 ** to remove this portion of the position list.
5065 ** Output variable (*pbLastInDoclist) is set to true if the position-list
5066 ** removed is followed by a new term or the end-of-segment, or false if
5067 ** it is followed by another rowid/position list.
5069 static void fts5SecureDeleteOverflow(
5071 Fts5StructureSegment
*pSeg
,
5073 int *pbLastInDoclist
5075 const int bDetailNone
= (p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
5077 Fts5Data
*pLeaf
= 0;
5080 *pbLastInDoclist
= 1;
5081 for(pgno
=iPgno
; p
->rc
==SQLITE_OK
&& pgno
<=pSeg
->pgnoLast
; pgno
++){
5082 i64 iRowid
= FTS5_SEGMENT_ROWID(pSeg
->iSegid
, pgno
);
5086 pLeaf
= fts5DataRead(p
, iRowid
);
5087 if( pLeaf
==0 ) break;
5090 iNext
= fts5GetU16(&aPg
[0]);
5092 *pbLastInDoclist
= 0;
5094 if( iNext
==0 && pLeaf
->szLeaf
!=pLeaf
->nn
){
5095 fts5GetVarint32(&aPg
[pLeaf
->szLeaf
], iNext
);
5099 /* The page contains no terms or rowids. Replace it with an empty
5100 ** page and move on to the right-hand peer. */
5101 const u8 aEmpty
[] = {0x00, 0x00, 0x00, 0x04};
5102 assert_nc( bDetailNone
==0 || pLeaf
->nn
==4 );
5103 if( bDetailNone
==0 ) fts5DataWrite(p
, iRowid
, aEmpty
, sizeof(aEmpty
));
5104 fts5DataRelease(pLeaf
);
5106 }else if( bDetailNone
){
5108 }else if( iNext
>=pLeaf
->szLeaf
|| pLeaf
->nn
<pLeaf
->szLeaf
|| iNext
<4 ){
5109 p
->rc
= FTS5_CORRUPT
;
5112 int nShift
= iNext
- 4;
5118 /* Unless the current page footer is 0 bytes in size (in which case
5119 ** the new page footer will be as well), allocate and populate a
5120 ** buffer containing the new page footer. Set stack variables aIdx
5121 ** and nIdx accordingly. */
5122 if( pLeaf
->nn
>pLeaf
->szLeaf
){
5124 int i1
= pLeaf
->szLeaf
;
5127 i1
+= fts5GetVarint32(&aPg
[i1
], iFirst
);
5129 p
->rc
= FTS5_CORRUPT
;
5132 aIdx
= sqlite3Fts5MallocZero(&p
->rc
, (pLeaf
->nn
-pLeaf
->szLeaf
)+2);
5133 if( aIdx
==0 ) break;
5134 i2
= sqlite3Fts5PutVarint(aIdx
, iFirst
-nShift
);
5136 memcpy(&aIdx
[i2
], &aPg
[i1
], pLeaf
->nn
-i1
);
5137 i2
+= (pLeaf
->nn
-i1
);
5142 /* Modify the contents of buffer aPg[]. Set nPg to the new size
5143 ** in bytes. The new page is always smaller than the old. */
5144 nPg
= pLeaf
->szLeaf
- nShift
;
5145 memmove(&aPg
[4], &aPg
[4+nShift
], nPg
-4);
5146 fts5PutU16(&aPg
[2], nPg
);
5147 if( fts5GetU16(&aPg
[0]) ) fts5PutU16(&aPg
[0], 4);
5149 memcpy(&aPg
[nPg
], aIdx
, nIdx
);
5154 /* Write the new page to disk and exit the loop */
5155 assert( nPg
>4 || fts5GetU16(aPg
)==0 );
5156 fts5DataWrite(p
, iRowid
, aPg
, nPg
);
5160 fts5DataRelease(pLeaf
);
5164 ** Completely remove the entry that pSeg currently points to from
5167 static void fts5DoSecureDelete(
5171 const int bDetailNone
= (p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
);
5172 int iSegid
= pSeg
->pSeg
->iSegid
;
5173 u8
*aPg
= pSeg
->pLeaf
->p
;
5174 int nPg
= pSeg
->pLeaf
->nn
;
5175 int iPgIdx
= pSeg
->pLeaf
->szLeaf
;
5182 int bLastInDoclist
= 0;
5185 int iDelKeyOff
= 0; /* Offset of deleted key, if any */
5188 aIdx
= sqlite3Fts5MallocZero(&p
->rc
, nIdx
+16);
5190 memcpy(aIdx
, &aPg
[iPgIdx
], nIdx
);
5192 /* At this point segment iterator pSeg points to the entry
5193 ** this function should remove from the b-tree segment.
5195 ** In detail=full or detail=column mode, pSeg->iLeafOffset is the
5196 ** offset of the first byte in the position-list for the entry to
5197 ** remove. Immediately before this comes two varints that will also
5198 ** need to be removed:
5200 ** + the rowid or delta rowid value for the entry, and
5201 ** + the size of the position list in bytes.
5203 ** Or, in detail=none mode, there is a single varint prior to
5204 ** pSeg->iLeafOffset - the rowid or delta rowid value.
5206 ** This block sets the following variables:
5209 ** The offset of the first byte of the rowid or delta-rowid
5210 ** value for the doclist entry being removed.
5213 ** The value of the rowid or delta-rowid value for the doclist
5214 ** entry being removed.
5217 ** The offset of the next entry following the position list
5218 ** for the one being removed. If the position list for this
5219 ** entry overflows onto the next leaf page, this value will be
5220 ** greater than pLeaf->szLeaf.
5223 int iSOP
; /* Start-Of-Position-list */
5224 if( pSeg
->iLeafPgno
==pSeg
->iTermLeafPgno
){
5225 iStart
= pSeg
->iTermLeafOffset
;
5227 iStart
= fts5GetU16(&aPg
[0]);
5230 iSOP
= iStart
+ fts5GetVarint(&aPg
[iStart
], &iDelta
);
5231 assert_nc( iSOP
<=pSeg
->iLeafOffset
);
5234 while( iSOP
<pSeg
->iLeafOffset
){
5235 if( aPg
[iSOP
]==0x00 ) iSOP
++;
5236 if( aPg
[iSOP
]==0x00 ) iSOP
++;
5238 iSOP
= iStart
+ fts5GetVarint(&aPg
[iStart
], &iDelta
);
5242 if( iNextOff
<pSeg
->iEndofDoclist
&& aPg
[iNextOff
]==0x00 ) iNextOff
++;
5243 if( iNextOff
<pSeg
->iEndofDoclist
&& aPg
[iNextOff
]==0x00 ) iNextOff
++;
5247 iSOP
+= fts5GetVarint32(&aPg
[iSOP
], nPos
);
5248 while( iSOP
<pSeg
->iLeafOffset
){
5249 iStart
= iSOP
+ (nPos
/2);
5250 iSOP
= iStart
+ fts5GetVarint(&aPg
[iStart
], &iDelta
);
5251 iSOP
+= fts5GetVarint32(&aPg
[iSOP
], nPos
);
5253 assert_nc( iSOP
==pSeg
->iLeafOffset
);
5254 iNextOff
= pSeg
->iLeafOffset
+ pSeg
->nPos
;
5260 /* If the position-list for the entry being removed flows over past
5261 ** the end of this page, delete the portion of the position-list on the
5262 ** next page and beyond.
5264 ** Set variable bLastInDoclist to true if this entry happens
5265 ** to be the last rowid in the doclist for its term. */
5266 if( iNextOff
>=iPgIdx
){
5267 int pgno
= pSeg
->iLeafPgno
+1;
5268 fts5SecureDeleteOverflow(p
, pSeg
->pSeg
, pgno
, &bLastInDoclist
);
5272 if( pSeg
->bDel
==0 ){
5273 if( iNextOff
!=iPgIdx
){
5274 /* Loop through the page-footer. If iNextOff (offset of the
5275 ** entry following the one we are removing) is equal to the
5276 ** offset of a key on this page, then the entry is the last
5277 ** in its doclist. */
5279 for(iIdx
=0; iIdx
<nIdx
; /* no-op */){
5281 iIdx
+= fts5GetVarint32(&aIdx
[iIdx
], iVal
);
5283 if( iKeyOff
==iNextOff
){
5289 /* If this is (a) the first rowid on a page and (b) is not followed by
5290 ** another position list on the same page, set the "first-rowid" field
5291 ** of the header to 0. */
5292 if( fts5GetU16(&aPg
[0])==iStart
&& (bLastInDoclist
|| iNextOff
==iPgIdx
) ){
5293 fts5PutU16(&aPg
[0], 0);
5298 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], iDelta
);
5300 }else if( bLastInDoclist
==0 ){
5301 if( iNextOff
!=iPgIdx
){
5303 iNextOff
+= fts5GetVarint(&aPg
[iNextOff
], &iNextDelta
);
5304 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], iDelta
+ iNextDelta
);
5307 pSeg
->iLeafPgno
==pSeg
->iTermLeafPgno
5308 && iStart
==pSeg
->iTermLeafOffset
5310 /* The entry being removed was the only position list in its
5311 ** doclist. Therefore the term needs to be removed as well. */
5315 /* Set iKeyOff to the offset of the term that will be removed - the
5316 ** last offset in the footer that is not greater than iStart. */
5317 for(iIdx
=0; iIdx
<nIdx
; iKey
++){
5319 iIdx
+= fts5GetVarint32(&aIdx
[iIdx
], iVal
);
5320 if( (iKeyOff
+iVal
)>(u32
)iStart
) break;
5323 assert_nc( iKey
>=1 );
5325 /* Set iDelKeyOff to the value of the footer entry to remove from
5327 iDelKeyOff
= iOff
= iKeyOff
;
5329 if( iNextOff
!=iPgIdx
){
5330 /* This is the only position-list associated with the term, and there
5331 ** is another term following it on this page. So the subsequent term
5332 ** needs to be moved to replace the term associated with the entry
5333 ** being removed. */
5339 iDelKeyOff
= iNextOff
;
5340 iNextOff
+= fts5GetVarint32(&aPg
[iNextOff
], nPrefix2
);
5341 iNextOff
+= fts5GetVarint32(&aPg
[iNextOff
], nSuffix2
);
5344 iKeyOff
+= fts5GetVarint32(&aPg
[iKeyOff
], nPrefix
);
5346 iKeyOff
+= fts5GetVarint32(&aPg
[iKeyOff
], nSuffix
);
5348 nPrefix
= MIN(nPrefix
, nPrefix2
);
5349 nSuffix
= (nPrefix2
+ nSuffix2
) - nPrefix
;
5351 if( (iKeyOff
+nSuffix
)>iPgIdx
|| (iNextOff
+nSuffix2
)>iPgIdx
){
5352 p
->rc
= FTS5_CORRUPT
;
5355 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], nPrefix
);
5357 iOff
+= sqlite3Fts5PutVarint(&aPg
[iOff
], nSuffix
);
5358 if( nPrefix2
>pSeg
->term
.n
){
5359 p
->rc
= FTS5_CORRUPT
;
5360 }else if( nPrefix2
>nPrefix
){
5361 memcpy(&aPg
[iOff
], &pSeg
->term
.p
[nPrefix
], nPrefix2
-nPrefix
);
5362 iOff
+= (nPrefix2
-nPrefix
);
5364 memmove(&aPg
[iOff
], &aPg
[iNextOff
], nSuffix2
);
5366 iNextOff
+= nSuffix2
;
5369 }else if( iStart
==4 ){
5372 assert_nc( pSeg
->iLeafPgno
>pSeg
->iTermLeafPgno
);
5373 /* The entry being removed may be the only position list in
5375 for(iPgno
=pSeg
->iLeafPgno
-1; iPgno
>pSeg
->iTermLeafPgno
; iPgno
-- ){
5376 Fts5Data
*pPg
= fts5DataRead(p
, FTS5_SEGMENT_ROWID(iSegid
, iPgno
));
5377 int bEmpty
= (pPg
&& pPg
->nn
==4);
5378 fts5DataRelease(pPg
);
5379 if( bEmpty
==0 ) break;
5382 if( iPgno
==pSeg
->iTermLeafPgno
){
5383 i64 iId
= FTS5_SEGMENT_ROWID(iSegid
, pSeg
->iTermLeafPgno
);
5384 Fts5Data
*pTerm
= fts5DataRead(p
, iId
);
5385 if( pTerm
&& pTerm
->szLeaf
==pSeg
->iTermLeafOffset
){
5386 u8
*aTermIdx
= &pTerm
->p
[pTerm
->szLeaf
];
5387 int nTermIdx
= pTerm
->nn
- pTerm
->szLeaf
;
5393 int nByte
= fts5GetVarint32(&aTermIdx
[iTermIdx
], iVal
);
5395 if( (iTermIdx
+nByte
)>=nTermIdx
) break;
5398 nTermIdx
= iTermIdx
;
5400 memmove(&pTerm
->p
[iTermOff
], &pTerm
->p
[pTerm
->szLeaf
], nTermIdx
);
5401 fts5PutU16(&pTerm
->p
[2], iTermOff
);
5403 fts5DataWrite(p
, iId
, pTerm
->p
, iTermOff
+nTermIdx
);
5405 fts5SecureDeleteIdxEntry(p
, iSegid
, pSeg
->iTermLeafPgno
);
5408 fts5DataRelease(pTerm
);
5412 /* Assuming no error has occurred, this block does final edits to the
5413 ** leaf page before writing it back to disk. Input variables are:
5415 ** nPg: Total initial size of leaf page.
5416 ** iPgIdx: Initial offset of page footer.
5418 ** iOff: Offset to move data to
5419 ** iNextOff: Offset to move data from
5421 if( p
->rc
==SQLITE_OK
){
5422 const int nMove
= nPg
- iNextOff
; /* Number of bytes to move */
5423 int nShift
= iNextOff
- iOff
; /* Distance to move them */
5425 int iPrevKeyOut
= 0;
5428 memmove(&aPg
[iOff
], &aPg
[iNextOff
], nMove
);
5431 fts5PutU16(&aPg
[2], iPgIdx
);
5433 for(iIdx
=0; iIdx
<nIdx
; /* no-op */){
5435 iIdx
+= fts5GetVarint32(&aIdx
[iIdx
], iVal
);
5437 if( iKeyIn
!=iDelKeyOff
){
5438 int iKeyOut
= (iKeyIn
- (iKeyIn
>iOff
? nShift
: 0));
5439 nPg
+= sqlite3Fts5PutVarint(&aPg
[nPg
], iKeyOut
- iPrevKeyOut
);
5440 iPrevKeyOut
= iKeyOut
;
5444 if( iPgIdx
==nPg
&& nIdx
>0 && pSeg
->iLeafPgno
!=1 ){
5445 fts5SecureDeleteIdxEntry(p
, iSegid
, pSeg
->iLeafPgno
);
5448 assert_nc( nPg
>4 || fts5GetU16(aPg
)==0 );
5449 fts5DataWrite(p
, FTS5_SEGMENT_ROWID(iSegid
,pSeg
->iLeafPgno
), aPg
, nPg
);
5455 ** This is called as part of flushing a delete to disk in 'secure-delete'
5456 ** mode. It edits the segments within the database described by argument
5457 ** pStruct to remove the entries for term zTerm, rowid iRowid.
5459 static void fts5FlushSecureDelete(
5461 Fts5Structure
*pStruct
,
5466 const int f
= FTS5INDEX_QUERY_SKIPHASH
;
5467 Fts5Iter
*pIter
= 0; /* Used to find term instance */
5469 fts5MultiIterNew(p
, pStruct
, f
, 0, (const u8
*)zTerm
, nTerm
, -1, 0, &pIter
);
5470 if( fts5MultiIterEof(p
, pIter
)==0 ){
5471 i64 iThis
= fts5MultiIterRowid(pIter
);
5473 fts5MultiIterNextFrom(p
, pIter
, iRowid
);
5476 if( p
->rc
==SQLITE_OK
5477 && fts5MultiIterEof(p
, pIter
)==0
5478 && iRowid
==fts5MultiIterRowid(pIter
)
5480 Fts5SegIter
*pSeg
= &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
];
5481 fts5DoSecureDelete(p
, pSeg
);
5485 fts5MultiIterFree(pIter
);
5490 ** Flush the contents of in-memory hash table iHash to a new level-0
5491 ** segment on disk. Also update the corresponding structure record.
5493 ** If an error occurs, set the Fts5Index.rc error code. If an error has
5494 ** already occurred, this function is a no-op.
5496 static void fts5FlushOneHash(Fts5Index
*p
){
5497 Fts5Hash
*pHash
= p
->pHash
;
5498 Fts5Structure
*pStruct
;
5500 int pgnoLast
= 0; /* Last leaf page number in segment */
5502 /* Obtain a reference to the index structure and allocate a new segment-id
5503 ** for the new level-0 segment. */
5504 pStruct
= fts5StructureRead(p
);
5505 fts5StructureInvalidate(p
);
5507 if( sqlite3Fts5HashIsEmpty(pHash
)==0 ){
5508 iSegid
= fts5AllocateSegid(p
, pStruct
);
5510 const int pgsz
= p
->pConfig
->pgsz
;
5511 int eDetail
= p
->pConfig
->eDetail
;
5512 int bSecureDelete
= p
->pConfig
->bSecureDelete
;
5513 Fts5StructureSegment
*pSeg
; /* New segment within pStruct */
5514 Fts5Buffer
*pBuf
; /* Buffer in which to assemble leaf page */
5515 Fts5Buffer
*pPgidx
; /* Buffer in which to assemble pgidx */
5517 Fts5SegWriter writer
;
5518 fts5WriteInit(p
, &writer
, iSegid
);
5520 pBuf
= &writer
.writer
.buf
;
5521 pPgidx
= &writer
.writer
.pgidx
;
5523 /* fts5WriteInit() should have initialized the buffers to (most likely)
5524 ** the maximum space required. */
5525 assert( p
->rc
|| pBuf
->nSpace
>=(pgsz
+ FTS5_DATA_PADDING
) );
5526 assert( p
->rc
|| pPgidx
->nSpace
>=(pgsz
+ FTS5_DATA_PADDING
) );
5528 /* Begin scanning through hash table entries. This loop runs once for each
5529 ** term/doclist currently stored within the hash table. */
5530 if( p
->rc
==SQLITE_OK
){
5531 p
->rc
= sqlite3Fts5HashScanInit(pHash
, 0, 0);
5533 while( p
->rc
==SQLITE_OK
&& 0==sqlite3Fts5HashScanEof(pHash
) ){
5534 const char *zTerm
; /* Buffer containing term */
5535 int nTerm
; /* Size of zTerm in bytes */
5536 const u8
*pDoclist
; /* Pointer to doclist for this term */
5537 int nDoclist
; /* Size of doclist in bytes */
5539 /* Get the term and doclist for this entry. */
5540 sqlite3Fts5HashScanEntry(pHash
, &zTerm
, &nTerm
, &pDoclist
, &nDoclist
);
5541 if( bSecureDelete
==0 ){
5542 fts5WriteAppendTerm(p
, &writer
, nTerm
, (const u8
*)zTerm
);
5543 if( p
->rc
!=SQLITE_OK
) break;
5544 assert( writer
.bFirstRowidInPage
==0 );
5547 if( !bSecureDelete
&& pgsz
>=(pBuf
->n
+ pPgidx
->n
+ nDoclist
+ 1) ){
5548 /* The entire doclist will fit on the current leaf. */
5549 fts5BufferSafeAppendBlob(pBuf
, pDoclist
, nDoclist
);
5551 int bTermWritten
= !bSecureDelete
;
5556 /* The entire doclist will not fit on this leaf. The following
5557 ** loop iterates through the poslists that make up the current
5559 while( p
->rc
==SQLITE_OK
&& iOff
<nDoclist
){
5561 iOff
+= fts5GetVarint(&pDoclist
[iOff
], &iDelta
);
5564 /* If in secure delete mode, and if this entry in the poslist is
5565 ** in fact a delete, then edit the existing segments directly
5566 ** using fts5FlushSecureDelete(). */
5567 if( bSecureDelete
){
5568 if( eDetail
==FTS5_DETAIL_NONE
){
5569 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0x00 ){
5570 fts5FlushSecureDelete(p
, pStruct
, zTerm
, nTerm
, iRowid
);
5572 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0x00 ){
5579 }else if( (pDoclist
[iOff
] & 0x01) ){
5580 fts5FlushSecureDelete(p
, pStruct
, zTerm
, nTerm
, iRowid
);
5581 if( p
->rc
!=SQLITE_OK
|| pDoclist
[iOff
]==0x01 ){
5588 if( p
->rc
==SQLITE_OK
&& bTermWritten
==0 ){
5589 fts5WriteAppendTerm(p
, &writer
, nTerm
, (const u8
*)zTerm
);
5591 assert( p
->rc
!=SQLITE_OK
|| writer
.bFirstRowidInPage
==0 );
5594 if( writer
.bFirstRowidInPage
){
5595 fts5PutU16(&pBuf
->p
[0], (u16
)pBuf
->n
); /* first rowid on page */
5596 pBuf
->n
+= sqlite3Fts5PutVarint(&pBuf
->p
[pBuf
->n
], iRowid
);
5597 writer
.bFirstRowidInPage
= 0;
5598 fts5WriteDlidxAppend(p
, &writer
, iRowid
);
5600 u64 iRowidDelta
= (u64
)iRowid
- (u64
)iPrev
;
5601 pBuf
->n
+= sqlite3Fts5PutVarint(&pBuf
->p
[pBuf
->n
], iRowidDelta
);
5603 if( p
->rc
!=SQLITE_OK
) break;
5604 assert( pBuf
->n
<=pBuf
->nSpace
);
5607 if( eDetail
==FTS5_DETAIL_NONE
){
5608 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0 ){
5609 pBuf
->p
[pBuf
->n
++] = 0;
5611 if( iOff
<nDoclist
&& pDoclist
[iOff
]==0 ){
5612 pBuf
->p
[pBuf
->n
++] = 0;
5616 if( (pBuf
->n
+ pPgidx
->n
)>=pgsz
){
5617 fts5WriteFlushLeaf(p
, &writer
);
5622 int nCopy
= fts5GetPoslistSize(&pDoclist
[iOff
], &nPos
, &bDel
);
5623 if( bDel
&& bSecureDelete
){
5624 fts5BufferAppendVarint(&p
->rc
, pBuf
, nPos
*2);
5630 if( (pBuf
->n
+ pPgidx
->n
+ nCopy
) <= pgsz
){
5631 /* The entire poslist will fit on the current leaf. So copy
5633 fts5BufferSafeAppendBlob(pBuf
, &pDoclist
[iOff
], nCopy
);
5635 /* The entire poslist will not fit on this leaf. So it needs
5636 ** to be broken into sections. The only qualification being
5637 ** that each varint must be stored contiguously. */
5638 const u8
*pPoslist
= &pDoclist
[iOff
];
5640 while( p
->rc
==SQLITE_OK
){
5641 int nSpace
= pgsz
- pBuf
->n
- pPgidx
->n
;
5643 if( (nCopy
- iPos
)<=nSpace
){
5646 n
= fts5PoslistPrefix(&pPoslist
[iPos
], nSpace
);
5649 fts5BufferSafeAppendBlob(pBuf
, &pPoslist
[iPos
], n
);
5651 if( (pBuf
->n
+ pPgidx
->n
)>=pgsz
){
5652 fts5WriteFlushLeaf(p
, &writer
);
5654 if( iPos
>=nCopy
) break;
5662 /* TODO2: Doclist terminator written here. */
5663 /* pBuf->p[pBuf->n++] = '\0'; */
5664 assert( pBuf
->n
<=pBuf
->nSpace
);
5665 if( p
->rc
==SQLITE_OK
) sqlite3Fts5HashScanNext(pHash
);
5667 fts5WriteFinish(p
, &writer
, &pgnoLast
);
5669 assert( p
->rc
!=SQLITE_OK
|| bSecureDelete
|| pgnoLast
>0 );
5671 /* Update the Fts5Structure. It is written back to the database by the
5672 ** fts5StructureRelease() call below. */
5673 if( pStruct
->nLevel
==0 ){
5674 fts5StructureAddLevel(&p
->rc
, &pStruct
);
5676 fts5StructureExtendLevel(&p
->rc
, pStruct
, 0, 1, 0);
5677 if( p
->rc
==SQLITE_OK
){
5678 pSeg
= &pStruct
->aLevel
[0].aSeg
[ pStruct
->aLevel
[0].nSeg
++ ];
5679 pSeg
->iSegid
= iSegid
;
5680 pSeg
->pgnoFirst
= 1;
5681 pSeg
->pgnoLast
= pgnoLast
;
5682 if( pStruct
->nOriginCntr
>0 ){
5683 pSeg
->iOrigin1
= pStruct
->nOriginCntr
;
5684 pSeg
->iOrigin2
= pStruct
->nOriginCntr
;
5685 pSeg
->nEntry
= p
->nPendingRow
;
5686 pStruct
->nOriginCntr
++;
5688 pStruct
->nSegment
++;
5690 fts5StructurePromote(p
, 0, pStruct
);
5695 fts5IndexAutomerge(p
, &pStruct
, pgnoLast
+ p
->nContentlessDelete
);
5696 fts5IndexCrisismerge(p
, &pStruct
);
5697 fts5StructureWrite(p
, pStruct
);
5698 fts5StructureRelease(pStruct
);
5702 ** Flush any data stored in the in-memory hash tables to the database.
5704 static void fts5IndexFlush(Fts5Index
*p
){
5705 /* Unless it is empty, flush the hash table to disk */
5710 if( p
->nPendingData
|| p
->nContentlessDelete
){
5712 fts5FlushOneHash(p
);
5713 if( p
->rc
==SQLITE_OK
){
5714 sqlite3Fts5HashClear(p
->pHash
);
5715 p
->nPendingData
= 0;
5717 p
->nContentlessDelete
= 0;
5718 }else if( p
->nPendingData
|| p
->nContentlessDelete
){
5724 static Fts5Structure
*fts5IndexOptimizeStruct(
5726 Fts5Structure
*pStruct
5728 Fts5Structure
*pNew
= 0;
5729 sqlite3_int64 nByte
= sizeof(Fts5Structure
);
5730 int nSeg
= pStruct
->nSegment
;
5733 /* Figure out if this structure requires optimization. A structure does
5734 ** not require optimization if either:
5736 ** 1. it consists of fewer than two segments, or
5737 ** 2. all segments are on the same level, or
5738 ** 3. all segments except one are currently inputs to a merge operation.
5740 ** In the first case, if there are no tombstone hash pages, return NULL. In
5741 ** the second, increment the ref-count on *pStruct and return a copy of the
5744 if( nSeg
==0 ) return 0;
5745 for(i
=0; i
<pStruct
->nLevel
; i
++){
5746 int nThis
= pStruct
->aLevel
[i
].nSeg
;
5747 int nMerge
= pStruct
->aLevel
[i
].nMerge
;
5748 if( nThis
>0 && (nThis
==nSeg
|| (nThis
==nSeg
-1 && nMerge
==nThis
)) ){
5749 if( nSeg
==1 && nThis
==1 && pStruct
->aLevel
[i
].aSeg
[0].nPgTombstone
==0 ){
5752 fts5StructureRef(pStruct
);
5755 assert( pStruct
->aLevel
[i
].nMerge
<=nThis
);
5758 nByte
+= (pStruct
->nLevel
+1) * sizeof(Fts5StructureLevel
);
5759 pNew
= (Fts5Structure
*)sqlite3Fts5MallocZero(&p
->rc
, nByte
);
5762 Fts5StructureLevel
*pLvl
;
5763 nByte
= nSeg
* sizeof(Fts5StructureSegment
);
5764 pNew
->nLevel
= MIN(pStruct
->nLevel
+1, FTS5_MAX_LEVEL
);
5766 pNew
->nWriteCounter
= pStruct
->nWriteCounter
;
5767 pNew
->nOriginCntr
= pStruct
->nOriginCntr
;
5768 pLvl
= &pNew
->aLevel
[pNew
->nLevel
-1];
5769 pLvl
->aSeg
= (Fts5StructureSegment
*)sqlite3Fts5MallocZero(&p
->rc
, nByte
);
5773 /* Iterate through all segments, from oldest to newest. Add them to
5774 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
5775 ** segment in the data structure. */
5776 for(iLvl
=pStruct
->nLevel
-1; iLvl
>=0; iLvl
--){
5777 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
5778 pLvl
->aSeg
[iSegOut
] = pStruct
->aLevel
[iLvl
].aSeg
[iSeg
];
5782 pNew
->nSegment
= pLvl
->nSeg
= nSeg
;
5792 int sqlite3Fts5IndexOptimize(Fts5Index
*p
){
5793 Fts5Structure
*pStruct
;
5794 Fts5Structure
*pNew
= 0;
5796 assert( p
->rc
==SQLITE_OK
);
5798 assert( p
->rc
!=SQLITE_OK
|| p
->nContentlessDelete
==0 );
5799 pStruct
= fts5StructureRead(p
);
5800 assert( p
->rc
!=SQLITE_OK
|| pStruct
!=0 );
5801 fts5StructureInvalidate(p
);
5804 pNew
= fts5IndexOptimizeStruct(p
, pStruct
);
5806 fts5StructureRelease(pStruct
);
5808 assert( pNew
==0 || pNew
->nSegment
>0 );
5811 for(iLvl
=0; pNew
->aLevel
[iLvl
].nSeg
==0; iLvl
++){}
5812 while( p
->rc
==SQLITE_OK
&& pNew
->aLevel
[iLvl
].nSeg
>0 ){
5813 int nRem
= FTS5_OPT_WORK_UNIT
;
5814 fts5IndexMergeLevel(p
, &pNew
, iLvl
, &nRem
);
5817 fts5StructureWrite(p
, pNew
);
5818 fts5StructureRelease(pNew
);
5821 return fts5IndexReturn(p
);
5825 ** This is called to implement the special "VALUES('merge', $nMerge)"
5828 int sqlite3Fts5IndexMerge(Fts5Index
*p
, int nMerge
){
5829 Fts5Structure
*pStruct
= 0;
5832 pStruct
= fts5StructureRead(p
);
5834 int nMin
= p
->pConfig
->nUsermerge
;
5835 fts5StructureInvalidate(p
);
5837 Fts5Structure
*pNew
= fts5IndexOptimizeStruct(p
, pStruct
);
5838 fts5StructureRelease(pStruct
);
5843 if( pStruct
&& pStruct
->nLevel
){
5844 if( fts5IndexMerge(p
, &pStruct
, nMerge
, nMin
) ){
5845 fts5StructureWrite(p
, pStruct
);
5848 fts5StructureRelease(pStruct
);
5850 return fts5IndexReturn(p
);
5853 static void fts5AppendRowid(
5859 UNUSED_PARAM(pUnused
);
5860 fts5BufferAppendVarint(&p
->rc
, pBuf
, iDelta
);
5863 static void fts5AppendPoslist(
5869 int nData
= pMulti
->base
.nData
;
5870 int nByte
= nData
+ 9 + 9 + FTS5_DATA_ZERO_PADDING
;
5872 if( p
->rc
==SQLITE_OK
&& 0==fts5BufferGrow(&p
->rc
, pBuf
, nByte
) ){
5873 fts5BufferSafeAppendVarint(pBuf
, iDelta
);
5874 fts5BufferSafeAppendVarint(pBuf
, nData
*2);
5875 fts5BufferSafeAppendBlob(pBuf
, pMulti
->base
.pData
, nData
);
5876 memset(&pBuf
->p
[pBuf
->n
], 0, FTS5_DATA_ZERO_PADDING
);
5881 static void fts5DoclistIterNext(Fts5DoclistIter
*pIter
){
5882 u8
*p
= pIter
->aPoslist
+ pIter
->nSize
+ pIter
->nPoslist
;
5884 assert( pIter
->aPoslist
|| (p
==0 && pIter
->aPoslist
==0) );
5885 if( p
>=pIter
->aEof
){
5886 pIter
->aPoslist
= 0;
5890 p
+= fts5GetVarint(p
, (u64
*)&iDelta
);
5891 pIter
->iRowid
+= iDelta
;
5893 /* Read position list size */
5896 pIter
->nSize
= fts5GetVarint32(p
, nPos
);
5897 pIter
->nPoslist
= (nPos
>>1);
5899 pIter
->nPoslist
= ((int)(p
[0])) >> 1;
5903 pIter
->aPoslist
= p
;
5904 if( &pIter
->aPoslist
[pIter
->nPoslist
]>pIter
->aEof
){
5905 pIter
->aPoslist
= 0;
5910 static void fts5DoclistIterInit(
5912 Fts5DoclistIter
*pIter
5914 memset(pIter
, 0, sizeof(*pIter
));
5916 pIter
->aPoslist
= pBuf
->p
;
5917 pIter
->aEof
= &pBuf
->p
[pBuf
->n
];
5918 fts5DoclistIterNext(pIter
);
5924 ** Append a doclist to buffer pBuf.
5926 ** This function assumes that space within the buffer has already been
5929 static void fts5MergeAppendDocid(
5930 Fts5Buffer
*pBuf
, /* Buffer to write to */
5931 i64
*piLastRowid
, /* IN/OUT: Previous rowid written (if any) */
5932 i64 iRowid
/* Rowid to append */
5934 assert( pBuf
->n
!=0 || (*piLastRowid
)==0 );
5935 fts5BufferSafeAppendVarint(pBuf
, iRowid
- *piLastRowid
);
5936 *piLastRowid
= iRowid
;
5940 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
5941 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
5942 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
5943 (iLastRowid) = (iRowid); \
5947 ** Swap the contents of buffer *p1 with that of *p2.
5949 static void fts5BufferSwap(Fts5Buffer
*p1
, Fts5Buffer
*p2
){
5950 Fts5Buffer tmp
= *p1
;
5955 static void fts5NextRowid(Fts5Buffer
*pBuf
, int *piOff
, i64
*piRowid
){
5961 *piOff
= i
+ sqlite3Fts5GetVarint(&pBuf
->p
[i
], &iVal
);
5967 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
5968 ** In this case the buffers consist of a delta-encoded list of rowids only.
5970 static void fts5MergeRowidLists(
5971 Fts5Index
*p
, /* FTS5 backend object */
5972 Fts5Buffer
*p1
, /* First list to merge */
5973 int nBuf
, /* Number of entries in apBuf[] */
5974 Fts5Buffer
*aBuf
/* Array of other lists to merge into p1 */
5981 Fts5Buffer
*p2
= &aBuf
[0];
5985 memset(&out
, 0, sizeof(out
));
5987 sqlite3Fts5BufferSize(&p
->rc
, &out
, p1
->n
+ p2
->n
);
5990 fts5NextRowid(p1
, &i1
, &iRowid1
);
5991 fts5NextRowid(p2
, &i2
, &iRowid2
);
5992 while( i1
>=0 || i2
>=0 ){
5993 if( i1
>=0 && (i2
<0 || iRowid1
<iRowid2
) ){
5994 assert( iOut
==0 || iRowid1
>iOut
);
5995 fts5BufferSafeAppendVarint(&out
, iRowid1
- iOut
);
5997 fts5NextRowid(p1
, &i1
, &iRowid1
);
5999 assert( iOut
==0 || iRowid2
>iOut
);
6000 fts5BufferSafeAppendVarint(&out
, iRowid2
- iOut
);
6002 if( i1
>=0 && iRowid1
==iRowid2
){
6003 fts5NextRowid(p1
, &i1
, &iRowid1
);
6005 fts5NextRowid(p2
, &i2
, &iRowid2
);
6009 fts5BufferSwap(&out
, p1
);
6010 fts5BufferFree(&out
);
6013 typedef struct PrefixMerger PrefixMerger
;
6014 struct PrefixMerger
{
6015 Fts5DoclistIter iter
; /* Doclist iterator */
6016 i64 iPos
; /* For iterating through a position list */
6019 PrefixMerger
*pNext
; /* Next in docid/poslist order */
6022 static void fts5PrefixMergerInsertByRowid(
6023 PrefixMerger
**ppHead
,
6026 if( p
->iter
.aPoslist
){
6027 PrefixMerger
**pp
= ppHead
;
6028 while( *pp
&& p
->iter
.iRowid
>(*pp
)->iter
.iRowid
){
6036 static void fts5PrefixMergerInsertByPosition(
6037 PrefixMerger
**ppHead
,
6041 PrefixMerger
**pp
= ppHead
;
6042 while( *pp
&& p
->iPos
>(*pp
)->iPos
){
6052 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
6053 ** doclist in buffer p1.
6055 static void fts5MergePrefixLists(
6056 Fts5Index
*p
, /* FTS5 backend object */
6057 Fts5Buffer
*p1
, /* First list to merge */
6058 int nBuf
, /* Number of buffers in array aBuf[] */
6059 Fts5Buffer
*aBuf
/* Other lists to merge in */
6061 #define fts5PrefixMergerNextPosition(p) \
6062 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
6063 #define FTS5_MERGE_NLIST 16
6064 PrefixMerger aMerger
[FTS5_MERGE_NLIST
];
6065 PrefixMerger
*pHead
= 0;
6068 Fts5Buffer out
= {0, 0, 0};
6069 Fts5Buffer tmp
= {0, 0, 0};
6072 /* Initialize a doclist-iterator for each input buffer. Arrange them in
6073 ** a linked-list starting at pHead in ascending order of rowid. Avoid
6074 ** linking any iterators already at EOF into the linked list at all. */
6075 assert( nBuf
+1<=(int)(sizeof(aMerger
)/sizeof(aMerger
[0])) );
6076 memset(aMerger
, 0, sizeof(PrefixMerger
)*(nBuf
+1));
6077 pHead
= &aMerger
[nBuf
];
6078 fts5DoclistIterInit(p1
, &pHead
->iter
);
6079 for(i
=0; i
<nBuf
; i
++){
6080 fts5DoclistIterInit(&aBuf
[i
], &aMerger
[i
].iter
);
6081 fts5PrefixMergerInsertByRowid(&pHead
, &aMerger
[i
]);
6084 if( nOut
==0 ) return;
6085 nOut
+= p1
->n
+ 9 + 10*nBuf
;
6087 /* The maximum size of the output is equal to the sum of the
6088 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
6089 ** first rowid in one input is a large negative number, and the first in
6090 ** the other a non-negative number, the delta for the non-negative
6091 ** number will be larger on disk than the literal integer value
6094 ** Or, if the input position-lists are corrupt, then the output might
6095 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
6096 ** (the value PoslistNext64() uses for EOF) as a position and appending
6097 ** it to the output. This can happen at most once for each input
6098 ** position-list, hence (nBuf+1) 10 byte paddings. */
6099 if( sqlite3Fts5BufferSize(&p
->rc
, &out
, nOut
) ) return;
6102 fts5MergeAppendDocid(&out
, iLastRowid
, pHead
->iter
.iRowid
);
6104 if( pHead
->pNext
&& iLastRowid
==pHead
->pNext
->iter
.iRowid
){
6105 /* Merge data from two or more poslists */
6107 int nTmp
= FTS5_DATA_ZERO_PADDING
;
6109 PrefixMerger
*pSave
= pHead
;
6110 PrefixMerger
*pThis
= 0;
6114 while( pSave
&& pSave
->iter
.iRowid
==iLastRowid
){
6115 PrefixMerger
*pNext
= pSave
->pNext
;
6118 pSave
->aPos
= &pSave
->iter
.aPoslist
[pSave
->iter
.nSize
];
6119 fts5PrefixMergerNextPosition(pSave
);
6120 nTmp
+= pSave
->iter
.nPoslist
+ 10;
6122 fts5PrefixMergerInsertByPosition(&pHead
, pSave
);
6126 if( pHead
==0 || pHead
->pNext
==0 ){
6127 p
->rc
= FTS5_CORRUPT
;
6131 /* See the earlier comment in this function for an explanation of why
6132 ** corrupt input position lists might cause the output to consume
6133 ** at most nMerge*10 bytes of unexpected space. */
6134 if( sqlite3Fts5BufferSize(&p
->rc
, &tmp
, nTmp
+nMerge
*10) ){
6137 fts5BufferZero(&tmp
);
6140 pHead
= pThis
->pNext
;
6141 sqlite3Fts5PoslistSafeAppend(&tmp
, &iPrev
, pThis
->iPos
);
6142 fts5PrefixMergerNextPosition(pThis
);
6143 fts5PrefixMergerInsertByPosition(&pHead
, pThis
);
6145 while( pHead
->pNext
){
6147 if( pThis
->iPos
!=iPrev
){
6148 sqlite3Fts5PoslistSafeAppend(&tmp
, &iPrev
, pThis
->iPos
);
6150 fts5PrefixMergerNextPosition(pThis
);
6151 pHead
= pThis
->pNext
;
6152 fts5PrefixMergerInsertByPosition(&pHead
, pThis
);
6155 if( pHead
->iPos
!=iPrev
){
6156 sqlite3Fts5PoslistSafeAppend(&tmp
, &iPrev
, pHead
->iPos
);
6158 nTail
= pHead
->iter
.nPoslist
- pHead
->iOff
;
6160 /* WRITEPOSLISTSIZE */
6161 assert_nc( tmp
.n
+nTail
<=nTmp
);
6162 assert( tmp
.n
+nTail
<=nTmp
+nMerge
*10 );
6163 if( tmp
.n
+nTail
>nTmp
-FTS5_DATA_ZERO_PADDING
){
6164 if( p
->rc
==SQLITE_OK
) p
->rc
= FTS5_CORRUPT
;
6167 fts5BufferSafeAppendVarint(&out
, (tmp
.n
+nTail
) * 2);
6168 fts5BufferSafeAppendBlob(&out
, tmp
.p
, tmp
.n
);
6170 fts5BufferSafeAppendBlob(&out
, &pHead
->aPos
[pHead
->iOff
], nTail
);
6174 for(i
=0; i
<nBuf
+1; i
++){
6175 PrefixMerger
*pX
= &aMerger
[i
];
6176 if( pX
->iter
.aPoslist
&& pX
->iter
.iRowid
==iLastRowid
){
6177 fts5DoclistIterNext(&pX
->iter
);
6178 fts5PrefixMergerInsertByRowid(&pHead
, pX
);
6183 /* Copy poslist from pHead to output */
6184 PrefixMerger
*pThis
= pHead
;
6185 Fts5DoclistIter
*pI
= &pThis
->iter
;
6186 fts5BufferSafeAppendBlob(&out
, pI
->aPoslist
, pI
->nPoslist
+pI
->nSize
);
6187 fts5DoclistIterNext(pI
);
6188 pHead
= pThis
->pNext
;
6189 fts5PrefixMergerInsertByRowid(&pHead
, pThis
);
6194 fts5BufferFree(&tmp
);
6195 memset(&out
.p
[out
.n
], 0, FTS5_DATA_ZERO_PADDING
);
6199 static void fts5SetupPrefixIter(
6200 Fts5Index
*p
, /* Index to read from */
6201 int bDesc
, /* True for "ORDER BY rowid DESC" */
6202 int iIdx
, /* Index to scan for data */
6203 u8
*pToken
, /* Buffer containing prefix to match */
6204 int nToken
, /* Size of buffer pToken in bytes */
6205 Fts5Colset
*pColset
, /* Restrict matches to these columns */
6206 Fts5Iter
**ppIter
/* OUT: New iterator */
6208 Fts5Structure
*pStruct
;
6213 void (*xMerge
)(Fts5Index
*, Fts5Buffer
*, int, Fts5Buffer
*);
6214 void (*xAppend
)(Fts5Index
*, u64
, Fts5Iter
*, Fts5Buffer
*);
6215 if( p
->pConfig
->eDetail
==FTS5_DETAIL_NONE
){
6216 xMerge
= fts5MergeRowidLists
;
6217 xAppend
= fts5AppendRowid
;
6219 nMerge
= FTS5_MERGE_NLIST
-1;
6220 nBuf
= nMerge
*8; /* Sufficient to merge (16^8)==(2^32) lists */
6221 xMerge
= fts5MergePrefixLists
;
6222 xAppend
= fts5AppendPoslist
;
6225 aBuf
= (Fts5Buffer
*)fts5IdxMalloc(p
, sizeof(Fts5Buffer
)*nBuf
);
6226 pStruct
= fts5StructureRead(p
);
6227 assert( p
->rc
!=SQLITE_OK
|| (aBuf
&& pStruct
) );
6229 if( p
->rc
==SQLITE_OK
){
6230 const int flags
= FTS5INDEX_QUERY_SCAN
6231 | FTS5INDEX_QUERY_SKIPEMPTY
6232 | FTS5INDEX_QUERY_NOOUTPUT
;
6235 Fts5Iter
*p1
= 0; /* Iterator used to gather data from index */
6240 memset(&doclist
, 0, sizeof(doclist
));
6242 /* If iIdx is non-zero, then it is the number of a prefix-index for
6243 ** prefixes 1 character longer than the prefix being queried for. That
6244 ** index contains all the doclists required, except for the one
6245 ** corresponding to the prefix itself. That one is extracted from the
6246 ** main term index here. */
6249 const int f2
= FTS5INDEX_QUERY_SKIPEMPTY
|FTS5INDEX_QUERY_NOOUTPUT
;
6250 pToken
[0] = FTS5_MAIN_PREFIX
;
6251 fts5MultiIterNew(p
, pStruct
, f2
, pColset
, pToken
, nToken
, -1, 0, &p1
);
6252 fts5IterSetOutputCb(&p
->rc
, p1
);
6254 fts5MultiIterEof(p
, p1
)==0;
6255 fts5MultiIterNext2(p
, p1
, &dummy
)
6257 Fts5SegIter
*pSeg
= &p1
->aSeg
[ p1
->aFirst
[1].iFirst
];
6258 p1
->xSetOutputs(p1
, pSeg
);
6259 if( p1
->base
.nData
){
6260 xAppend(p
, (u64
)p1
->base
.iRowid
-(u64
)iLastRowid
, p1
, &doclist
);
6261 iLastRowid
= p1
->base
.iRowid
;
6264 fts5MultiIterFree(p1
);
6267 pToken
[0] = FTS5_MAIN_PREFIX
+ iIdx
;
6268 fts5MultiIterNew(p
, pStruct
, flags
, pColset
, pToken
, nToken
, -1, 0, &p1
);
6269 fts5IterSetOutputCb(&p
->rc
, p1
);
6272 fts5MultiIterEof(p
, p1
)==0;
6273 fts5MultiIterNext2(p
, p1
, &bNewTerm
)
6275 Fts5SegIter
*pSeg
= &p1
->aSeg
[ p1
->aFirst
[1].iFirst
];
6276 int nTerm
= pSeg
->term
.n
;
6277 const u8
*pTerm
= pSeg
->term
.p
;
6278 p1
->xSetOutputs(p1
, pSeg
);
6280 assert_nc( memcmp(pToken
, pTerm
, MIN(nToken
, nTerm
))<=0 );
6282 if( nTerm
<nToken
|| memcmp(pToken
, pTerm
, nToken
) ) break;
6285 if( p1
->base
.nData
==0 ) continue;
6286 if( p1
->base
.iRowid
<=iLastRowid
&& doclist
.n
>0 ){
6287 for(i
=0; p
->rc
==SQLITE_OK
&& doclist
.n
; i
++){
6290 assert( i1
+nMerge
<=nBuf
);
6291 for(iStore
=i1
; iStore
<i1
+nMerge
; iStore
++){
6292 if( aBuf
[iStore
].n
==0 ){
6293 fts5BufferSwap(&doclist
, &aBuf
[iStore
]);
6294 fts5BufferZero(&doclist
);
6298 if( iStore
==i1
+nMerge
){
6299 xMerge(p
, &doclist
, nMerge
, &aBuf
[i1
]);
6300 for(iStore
=i1
; iStore
<i1
+nMerge
; iStore
++){
6301 fts5BufferZero(&aBuf
[iStore
]);
6308 xAppend(p
, (u64
)p1
->base
.iRowid
-(u64
)iLastRowid
, p1
, &doclist
);
6309 iLastRowid
= p1
->base
.iRowid
;
6312 assert( (nBuf
%nMerge
)==0 );
6313 for(i
=0; i
<nBuf
; i
+=nMerge
){
6315 if( p
->rc
==SQLITE_OK
){
6316 xMerge(p
, &doclist
, nMerge
, &aBuf
[i
]);
6318 for(iFree
=i
; iFree
<i
+nMerge
; iFree
++){
6319 fts5BufferFree(&aBuf
[iFree
]);
6322 fts5MultiIterFree(p1
);
6324 pData
= fts5IdxMalloc(p
, sizeof(*pData
)+doclist
.n
+FTS5_DATA_ZERO_PADDING
);
6326 pData
->p
= (u8
*)&pData
[1];
6327 pData
->nn
= pData
->szLeaf
= doclist
.n
;
6328 if( doclist
.n
) memcpy(pData
->p
, doclist
.p
, doclist
.n
);
6329 fts5MultiIterNew2(p
, pData
, bDesc
, ppIter
);
6331 fts5BufferFree(&doclist
);
6334 fts5StructureRelease(pStruct
);
6340 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
6341 ** to the document with rowid iRowid.
6343 int sqlite3Fts5IndexBeginWrite(Fts5Index
*p
, int bDelete
, i64 iRowid
){
6344 assert( p
->rc
==SQLITE_OK
);
6346 /* Allocate the hash table if it has not already been allocated */
6348 p
->rc
= sqlite3Fts5HashNew(p
->pConfig
, &p
->pHash
, &p
->nPendingData
);
6351 /* Flush the hash table to disk if required */
6352 if( iRowid
<p
->iWriteRowid
6353 || (iRowid
==p
->iWriteRowid
&& p
->bDelete
==0)
6354 || (p
->nPendingData
> p
->pConfig
->nHashSize
)
6359 p
->iWriteRowid
= iRowid
;
6360 p
->bDelete
= bDelete
;
6364 return fts5IndexReturn(p
);
6368 ** Commit data to disk.
6370 int sqlite3Fts5IndexSync(Fts5Index
*p
){
6371 assert( p
->rc
==SQLITE_OK
);
6373 sqlite3Fts5IndexCloseReader(p
);
6374 return fts5IndexReturn(p
);
6378 ** Discard any data stored in the in-memory hash tables. Do not write it
6379 ** to the database. Additionally, assume that the contents of the %_data
6380 ** table may have changed on disk. So any in-memory caches of %_data
6381 ** records must be invalidated.
6383 int sqlite3Fts5IndexRollback(Fts5Index
*p
){
6384 sqlite3Fts5IndexCloseReader(p
);
6385 fts5IndexDiscardData(p
);
6386 fts5StructureInvalidate(p
);
6387 /* assert( p->rc==SQLITE_OK ); */
6392 ** The %_data table is completely empty when this function is called. This
6393 ** function populates it with the initial structure objects for each index,
6394 ** and the initial version of the "averages" record (a zero-byte blob).
6396 int sqlite3Fts5IndexReinit(Fts5Index
*p
){
6398 fts5StructureInvalidate(p
);
6399 fts5IndexDiscardData(p
);
6400 memset(&s
, 0, sizeof(Fts5Structure
));
6401 if( p
->pConfig
->bContentlessDelete
){
6404 fts5DataWrite(p
, FTS5_AVERAGES_ROWID
, (const u8
*)"", 0);
6405 fts5StructureWrite(p
, &s
);
6406 return fts5IndexReturn(p
);
6410 ** Open a new Fts5Index handle. If the bCreate argument is true, create
6411 ** and initialize the underlying %_data table.
6413 ** If successful, set *pp to point to the new object and return SQLITE_OK.
6414 ** Otherwise, set *pp to NULL and return an SQLite error code.
6416 int sqlite3Fts5IndexOpen(
6417 Fts5Config
*pConfig
,
6423 Fts5Index
*p
; /* New object */
6425 *pp
= p
= (Fts5Index
*)sqlite3Fts5MallocZero(&rc
, sizeof(Fts5Index
));
6426 if( rc
==SQLITE_OK
){
6427 p
->pConfig
= pConfig
;
6428 p
->nWorkUnit
= FTS5_WORK_UNIT
;
6429 p
->zDataTbl
= sqlite3Fts5Mprintf(&rc
, "%s_data", pConfig
->zName
);
6430 if( p
->zDataTbl
&& bCreate
){
6431 rc
= sqlite3Fts5CreateTable(
6432 pConfig
, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
6434 if( rc
==SQLITE_OK
){
6435 rc
= sqlite3Fts5CreateTable(pConfig
, "idx",
6436 "segid, term, pgno, PRIMARY KEY(segid, term)",
6440 if( rc
==SQLITE_OK
){
6441 rc
= sqlite3Fts5IndexReinit(p
);
6446 assert( rc
!=SQLITE_OK
|| p
->rc
==SQLITE_OK
);
6448 sqlite3Fts5IndexClose(p
);
6455 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
6457 int sqlite3Fts5IndexClose(Fts5Index
*p
){
6460 assert( p
->pReader
==0 );
6461 fts5StructureInvalidate(p
);
6462 sqlite3_finalize(p
->pWriter
);
6463 sqlite3_finalize(p
->pDeleter
);
6464 sqlite3_finalize(p
->pIdxWriter
);
6465 sqlite3_finalize(p
->pIdxDeleter
);
6466 sqlite3_finalize(p
->pIdxSelect
);
6467 sqlite3_finalize(p
->pIdxNextSelect
);
6468 sqlite3_finalize(p
->pDataVersion
);
6469 sqlite3_finalize(p
->pDeleteFromIdx
);
6470 sqlite3Fts5HashFree(p
->pHash
);
6471 sqlite3_free(p
->zDataTbl
);
6478 ** Argument p points to a buffer containing utf-8 text that is n bytes in
6479 ** size. Return the number of bytes in the nChar character prefix of the
6480 ** buffer, or 0 if there are less than nChar characters in total.
6482 int sqlite3Fts5IndexCharlenToBytelen(
6489 for(i
=0; i
<nChar
; i
++){
6490 if( n
>=nByte
) return 0; /* Input contains fewer than nChar chars */
6491 if( (unsigned char)p
[n
++]>=0xc0 ){
6492 if( n
>=nByte
) return 0;
6493 while( (p
[n
] & 0xc0)==0x80 ){
6496 if( i
+1==nChar
) break;
6506 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
6507 ** unicode characters in the string.
6509 static int fts5IndexCharlen(const char *pIn
, int nIn
){
6513 if( (unsigned char)pIn
[i
++]>=0xc0 ){
6514 while( i
<nIn
&& (pIn
[i
] & 0xc0)==0x80 ) i
++;
6522 ** Insert or remove data to or from the index. Each time a document is
6523 ** added to or removed from the index, this function is called one or more
6526 ** For an insert, it must be called once for each token in the new document.
6527 ** If the operation is a delete, it must be called (at least) once for each
6528 ** unique token in the document with an iCol value less than zero. The iPos
6529 ** argument is ignored for a delete.
6531 int sqlite3Fts5IndexWrite(
6532 Fts5Index
*p
, /* Index to write to */
6533 int iCol
, /* Column token appears in (-ve -> delete) */
6534 int iPos
, /* Position of token within column */
6535 const char *pToken
, int nToken
/* Token to add or remove to or from index */
6537 int i
; /* Used to iterate through indexes */
6538 int rc
= SQLITE_OK
; /* Return code */
6539 Fts5Config
*pConfig
= p
->pConfig
;
6541 assert( p
->rc
==SQLITE_OK
);
6542 assert( (iCol
<0)==p
->bDelete
);
6544 /* Add the entry to the main terms index. */
6545 rc
= sqlite3Fts5HashWrite(
6546 p
->pHash
, p
->iWriteRowid
, iCol
, iPos
, FTS5_MAIN_PREFIX
, pToken
, nToken
6549 for(i
=0; i
<pConfig
->nPrefix
&& rc
==SQLITE_OK
; i
++){
6550 const int nChar
= pConfig
->aPrefix
[i
];
6551 int nByte
= sqlite3Fts5IndexCharlenToBytelen(pToken
, nToken
, nChar
);
6553 rc
= sqlite3Fts5HashWrite(p
->pHash
,
6554 p
->iWriteRowid
, iCol
, iPos
, (char)(FTS5_MAIN_PREFIX
+i
+1), pToken
,
6564 ** pToken points to a buffer of size nToken bytes containing a search
6565 ** term, including the index number at the start, used on a tokendata=1
6566 ** table. This function returns true if the term in buffer pBuf matches
6567 ** token pToken/nToken.
6569 static int fts5IsTokendataPrefix(
6576 && 0==memcmp(pBuf
->p
, pToken
, nToken
)
6577 && (pBuf
->n
==nToken
|| pBuf
->p
[nToken
]==0x00)
6582 ** Ensure the segment-iterator passed as the only argument points to EOF.
6584 static void fts5SegIterSetEOF(Fts5SegIter
*pSeg
){
6585 fts5DataRelease(pSeg
->pLeaf
);
6590 ** Usually, a tokendata=1 iterator (struct Fts5TokenDataIter) accumulates an
6591 ** array of these for each row it visits. Or, for an iterator used by an
6592 ** "ORDER BY rank" query, it accumulates an array of these for the entire
6595 ** Each instance in the array indicates the iterator (and therefore term)
6596 ** associated with position iPos of rowid iRowid. This is used by the
6597 ** xInstToken() API.
6599 struct Fts5TokenDataMap
{
6600 i64 iRowid
; /* Row this token is located in */
6601 i64 iPos
; /* Position of token */
6602 int iIter
; /* Iterator token was read from */
6606 ** An object used to supplement Fts5Iter for tokendata=1 iterators.
6608 struct Fts5TokenDataIter
{
6614 Fts5TokenDataMap
*aMap
;
6616 Fts5PoslistReader
*aPoslistReader
;
6617 int *aPoslistToIter
;
6618 Fts5Iter
*apIter
[1];
6622 ** This function appends iterator pAppend to Fts5TokenDataIter pIn and
6623 ** returns the result.
6625 static Fts5TokenDataIter
*fts5AppendTokendataIter(
6626 Fts5Index
*p
, /* Index object (for error code) */
6627 Fts5TokenDataIter
*pIn
, /* Current Fts5TokenDataIter struct */
6628 Fts5Iter
*pAppend
/* Append this iterator */
6630 Fts5TokenDataIter
*pRet
= pIn
;
6632 if( p
->rc
==SQLITE_OK
){
6633 if( pIn
==0 || pIn
->nIter
==pIn
->nIterAlloc
){
6634 int nAlloc
= pIn
? pIn
->nIterAlloc
*2 : 16;
6635 int nByte
= nAlloc
* sizeof(Fts5Iter
*) + sizeof(Fts5TokenDataIter
);
6636 Fts5TokenDataIter
*pNew
= (Fts5TokenDataIter
*)sqlite3_realloc(pIn
, nByte
);
6639 p
->rc
= SQLITE_NOMEM
;
6641 if( pIn
==0 ) memset(pNew
, 0, nByte
);
6643 pNew
->nIterAlloc
= nAlloc
;
6648 sqlite3Fts5IterClose((Fts5IndexIter
*)pAppend
);
6650 pRet
->apIter
[pRet
->nIter
++] = pAppend
;
6652 assert( pRet
==0 || pRet
->nIter
<=pRet
->nIterAlloc
);
6658 ** Delete an Fts5TokenDataIter structure and its contents.
6660 static void fts5TokendataIterDelete(Fts5TokenDataIter
*pSet
){
6663 for(ii
=0; ii
<pSet
->nIter
; ii
++){
6664 fts5MultiIterFree(pSet
->apIter
[ii
]);
6666 sqlite3_free(pSet
->aPoslistReader
);
6667 sqlite3_free(pSet
->aMap
);
6673 ** Append a mapping to the token-map belonging to object pT.
6675 static void fts5TokendataIterAppendMap(
6677 Fts5TokenDataIter
*pT
,
6682 if( p
->rc
==SQLITE_OK
){
6683 if( pT
->nMap
==pT
->nMapAlloc
){
6684 int nNew
= pT
->nMapAlloc
? pT
->nMapAlloc
*2 : 64;
6685 int nByte
= nNew
* sizeof(Fts5TokenDataMap
);
6686 Fts5TokenDataMap
*aNew
;
6688 aNew
= (Fts5TokenDataMap
*)sqlite3_realloc(pT
->aMap
, nByte
);
6690 p
->rc
= SQLITE_NOMEM
;
6695 pT
->nMapAlloc
= nNew
;
6698 pT
->aMap
[pT
->nMap
].iRowid
= iRowid
;
6699 pT
->aMap
[pT
->nMap
].iPos
= iPos
;
6700 pT
->aMap
[pT
->nMap
].iIter
= iIter
;
6706 ** The iterator passed as the only argument must be a tokendata=1 iterator
6707 ** (pIter->pTokenDataIter!=0). This function sets the iterator output
6708 ** variables (pIter->base.*) according to the contents of the current
6711 static void fts5IterSetOutputsTokendata(Fts5Iter
*pIter
){
6714 i64 iRowid
= SMALLEST_INT64
;
6717 Fts5TokenDataIter
*pT
= pIter
->pTokenDataIter
;
6719 pIter
->base
.nData
= 0;
6720 pIter
->base
.pData
= 0;
6722 for(ii
=0; ii
<pT
->nIter
; ii
++){
6723 Fts5Iter
*p
= pT
->apIter
[ii
];
6724 if( p
->base
.bEof
==0 ){
6725 if( nHit
==0 || p
->base
.iRowid
<iRowid
){
6726 iRowid
= p
->base
.iRowid
;
6728 pIter
->base
.pData
= p
->base
.pData
;
6729 pIter
->base
.nData
= p
->base
.nData
;
6731 }else if( p
->base
.iRowid
==iRowid
){
6738 pIter
->base
.bEof
= 1;
6740 int eDetail
= pIter
->pIndex
->pConfig
->eDetail
;
6741 pIter
->base
.bEof
= 0;
6742 pIter
->base
.iRowid
= iRowid
;
6744 if( nHit
==1 && eDetail
==FTS5_DETAIL_FULL
){
6745 fts5TokendataIterAppendMap(pIter
->pIndex
, pT
, iMin
, iRowid
, -1);
6747 if( nHit
>1 && eDetail
!=FTS5_DETAIL_NONE
){
6752 /* Allocate array of iterators if they are not already allocated. */
6753 if( pT
->aPoslistReader
==0 ){
6754 pT
->aPoslistReader
= (Fts5PoslistReader
*)sqlite3Fts5MallocZero(
6756 pT
->nIter
* (sizeof(Fts5PoslistReader
) + sizeof(int))
6758 if( pT
->aPoslistReader
==0 ) return;
6759 pT
->aPoslistToIter
= (int*)&pT
->aPoslistReader
[pT
->nIter
];
6762 /* Populate an iterator for each poslist that will be merged */
6763 for(ii
=0; ii
<pT
->nIter
; ii
++){
6764 Fts5Iter
*p
= pT
->apIter
[ii
];
6765 if( iRowid
==p
->base
.iRowid
){
6766 pT
->aPoslistToIter
[nReader
] = ii
;
6767 sqlite3Fts5PoslistReaderInit(
6768 p
->base
.pData
, p
->base
.nData
, &pT
->aPoslistReader
[nReader
++]
6770 nByte
+= p
->base
.nData
;
6774 /* Ensure the output buffer is large enough */
6775 if( fts5BufferGrow(&pIter
->pIndex
->rc
, &pIter
->poslist
, nByte
+nHit
*10) ){
6779 /* Ensure the token-mapping is large enough */
6780 if( eDetail
==FTS5_DETAIL_FULL
&& pT
->nMapAlloc
<(pT
->nMap
+ nByte
) ){
6781 int nNew
= (pT
->nMapAlloc
+ nByte
) * 2;
6782 Fts5TokenDataMap
*aNew
= (Fts5TokenDataMap
*)sqlite3_realloc(
6783 pT
->aMap
, nNew
*sizeof(Fts5TokenDataMap
)
6786 pIter
->pIndex
->rc
= SQLITE_NOMEM
;
6790 pT
->nMapAlloc
= nNew
;
6793 pIter
->poslist
.n
= 0;
6796 i64 iMinPos
= LARGEST_INT64
;
6798 /* Find smallest position */
6800 for(ii
=0; ii
<nReader
; ii
++){
6801 Fts5PoslistReader
*pReader
= &pT
->aPoslistReader
[ii
];
6802 if( pReader
->bEof
==0 ){
6803 if( pReader
->iPos
<iMinPos
){
6804 iMinPos
= pReader
->iPos
;
6810 /* If all readers were at EOF, break out of the loop. */
6811 if( iMinPos
==LARGEST_INT64
) break;
6813 sqlite3Fts5PoslistSafeAppend(&pIter
->poslist
, &iPrev
, iMinPos
);
6814 sqlite3Fts5PoslistReaderNext(&pT
->aPoslistReader
[iMin
]);
6816 if( eDetail
==FTS5_DETAIL_FULL
){
6817 pT
->aMap
[pT
->nMap
].iPos
= iMinPos
;
6818 pT
->aMap
[pT
->nMap
].iIter
= pT
->aPoslistToIter
[iMin
];
6819 pT
->aMap
[pT
->nMap
].iRowid
= iRowid
;
6824 pIter
->base
.pData
= pIter
->poslist
.p
;
6825 pIter
->base
.nData
= pIter
->poslist
.n
;
6831 ** The iterator passed as the only argument must be a tokendata=1 iterator
6832 ** (pIter->pTokenDataIter!=0). This function advances the iterator. If
6833 ** argument bFrom is false, then the iterator is advanced to the next
6834 ** entry. Or, if bFrom is true, it is advanced to the first entry with
6835 ** a rowid of iFrom or greater.
6837 static void fts5TokendataIterNext(Fts5Iter
*pIter
, int bFrom
, i64 iFrom
){
6839 Fts5TokenDataIter
*pT
= pIter
->pTokenDataIter
;
6840 Fts5Index
*pIndex
= pIter
->pIndex
;
6842 for(ii
=0; ii
<pT
->nIter
; ii
++){
6843 Fts5Iter
*p
= pT
->apIter
[ii
];
6845 && (p
->base
.iRowid
==pIter
->base
.iRowid
|| (bFrom
&& p
->base
.iRowid
<iFrom
))
6847 fts5MultiIterNext(pIndex
, p
, bFrom
, iFrom
);
6848 while( bFrom
&& p
->base
.bEof
==0
6849 && p
->base
.iRowid
<iFrom
6850 && pIndex
->rc
==SQLITE_OK
6852 fts5MultiIterNext(pIndex
, p
, 0, 0);
6857 if( pIndex
->rc
==SQLITE_OK
){
6858 fts5IterSetOutputsTokendata(pIter
);
6863 ** If the segment-iterator passed as the first argument is at EOF, then
6864 ** set pIter->term to a copy of buffer pTerm.
6866 static void fts5TokendataSetTermIfEof(Fts5Iter
*pIter
, Fts5Buffer
*pTerm
){
6867 if( pIter
&& pIter
->aSeg
[0].pLeaf
==0 ){
6868 fts5BufferSet(&pIter
->pIndex
->rc
, &pIter
->aSeg
[0].term
, pTerm
->n
, pTerm
->p
);
6873 ** This function sets up an iterator to use for a non-prefix query on a
6874 ** tokendata=1 table.
6876 static Fts5Iter
*fts5SetupTokendataIter(
6877 Fts5Index
*p
, /* FTS index to query */
6878 const u8
*pToken
, /* Buffer containing query term */
6879 int nToken
, /* Size of buffer pToken in bytes */
6880 Fts5Colset
*pColset
/* Colset to filter on */
6883 Fts5TokenDataIter
*pSet
= 0;
6884 Fts5Structure
*pStruct
= 0;
6885 const int flags
= FTS5INDEX_QUERY_SCANONETERM
| FTS5INDEX_QUERY_SCAN
;
6887 Fts5Buffer bSeek
= {0, 0, 0};
6888 Fts5Buffer
*pSmall
= 0;
6891 pStruct
= fts5StructureRead(p
);
6893 while( p
->rc
==SQLITE_OK
){
6894 Fts5Iter
*pPrev
= pSet
? pSet
->apIter
[pSet
->nIter
-1] : 0;
6896 Fts5SegIter
*pNewIter
= 0;
6897 Fts5SegIter
*pPrevIter
= 0;
6901 pNew
= fts5MultiIterAlloc(p
, pStruct
->nSegment
);
6903 fts5BufferSet(&p
->rc
, &bSeek
, pSmall
->n
, pSmall
->p
);
6904 fts5BufferAppendBlob(&p
->rc
, &bSeek
, 1, (const u8
*)"\0");
6906 fts5BufferSet(&p
->rc
, &bSeek
, nToken
, pToken
);
6909 sqlite3Fts5IterClose((Fts5IndexIter
*)pNew
);
6913 pNewIter
= &pNew
->aSeg
[0];
6914 pPrevIter
= (pPrev
? &pPrev
->aSeg
[0] : 0);
6915 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
6916 for(iSeg
=pStruct
->aLevel
[iLvl
].nSeg
-1; iSeg
>=0; iSeg
--){
6917 Fts5StructureSegment
*pSeg
= &pStruct
->aLevel
[iLvl
].aSeg
[iSeg
];
6921 if( fts5BufferCompare(pSmall
, &pPrevIter
->term
) ){
6922 memcpy(pNewIter
, pPrevIter
, sizeof(Fts5SegIter
));
6923 memset(pPrevIter
, 0, sizeof(Fts5SegIter
));
6925 }else if( pPrevIter
->iEndofDoclist
>pPrevIter
->pLeaf
->szLeaf
){
6926 fts5SegIterNextInit(p
,(const char*)bSeek
.p
,bSeek
.n
-1,pSeg
,pNewIter
);
6932 fts5SegIterSeekInit(p
, bSeek
.p
, bSeek
.n
, flags
, pSeg
, pNewIter
);
6936 if( pPrevIter
->pTombArray
){
6937 pNewIter
->pTombArray
= pPrevIter
->pTombArray
;
6938 pNewIter
->pTombArray
->nRef
++;
6941 fts5SegIterAllocTombstone(p
, pNewIter
);
6945 if( pPrevIter
) pPrevIter
++;
6949 fts5TokendataSetTermIfEof(pPrev
, pSmall
);
6951 pNew
->bSkipEmpty
= 1;
6952 pNew
->pColset
= pColset
;
6953 fts5IterSetOutputCb(&p
->rc
, pNew
);
6955 /* Loop through all segments in the new iterator. Find the smallest
6956 ** term that any segment-iterator points to. Iterator pNew will be
6957 ** used for this term. Also, set any iterator that points to a term that
6958 ** does not match pToken/nToken to point to EOF */
6960 for(ii
=0; ii
<pNew
->nSeg
; ii
++){
6961 Fts5SegIter
*pII
= &pNew
->aSeg
[ii
];
6962 if( 0==fts5IsTokendataPrefix(&pII
->term
, pToken
, nToken
) ){
6963 fts5SegIterSetEOF(pII
);
6965 if( pII
->pLeaf
&& (!pSmall
|| fts5BufferCompare(pSmall
, &pII
->term
)>0) ){
6966 pSmall
= &pII
->term
;
6970 /* If pSmall is still NULL at this point, then the new iterator does
6971 ** not point to any terms that match the query. So delete it and break
6972 ** out of the loop - all required iterators have been collected. */
6974 sqlite3Fts5IterClose((Fts5IndexIter
*)pNew
);
6978 /* Append this iterator to the set and continue. */
6979 pSet
= fts5AppendTokendataIter(p
, pSet
, pNew
);
6982 if( p
->rc
==SQLITE_OK
&& pSet
){
6984 for(ii
=0; ii
<pSet
->nIter
; ii
++){
6985 Fts5Iter
*pIter
= pSet
->apIter
[ii
];
6987 for(iSeg
=0; iSeg
<pIter
->nSeg
; iSeg
++){
6988 pIter
->aSeg
[iSeg
].flags
|= FTS5_SEGITER_ONETERM
;
6990 fts5MultiIterFinishSetup(p
, pIter
);
6994 if( p
->rc
==SQLITE_OK
){
6995 pRet
= fts5MultiIterAlloc(p
, 0);
6998 pRet
->pTokenDataIter
= pSet
;
7000 fts5IterSetOutputsTokendata(pRet
);
7002 pRet
->base
.bEof
= 1;
7005 fts5TokendataIterDelete(pSet
);
7008 fts5StructureRelease(pStruct
);
7009 fts5BufferFree(&bSeek
);
7015 ** Open a new iterator to iterate though all rowid that match the
7016 ** specified token or token prefix.
7018 int sqlite3Fts5IndexQuery(
7019 Fts5Index
*p
, /* FTS index to query */
7020 const char *pToken
, int nToken
, /* Token (or prefix) to query for */
7021 int flags
, /* Mask of FTS5INDEX_QUERY_X flags */
7022 Fts5Colset
*pColset
, /* Match these columns only */
7023 Fts5IndexIter
**ppIter
/* OUT: New iterator object */
7025 Fts5Config
*pConfig
= p
->pConfig
;
7027 Fts5Buffer buf
= {0, 0, 0};
7029 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
7030 assert( (flags
& FTS5INDEX_QUERY_SCAN
)==0 || flags
==FTS5INDEX_QUERY_SCAN
);
7032 if( sqlite3Fts5BufferSize(&p
->rc
, &buf
, nToken
+1)==0 ){
7033 int iIdx
= 0; /* Index to search */
7034 int iPrefixIdx
= 0; /* +1 prefix index */
7035 int bTokendata
= pConfig
->bTokendata
;
7036 if( nToken
>0 ) memcpy(&buf
.p
[1], pToken
, nToken
);
7038 if( flags
& (FTS5INDEX_QUERY_NOTOKENDATA
|FTS5INDEX_QUERY_SCAN
) ){
7042 /* Figure out which index to search and set iIdx accordingly. If this
7043 ** is a prefix query for which there is no prefix index, set iIdx to
7044 ** greater than pConfig->nPrefix to indicate that the query will be
7045 ** satisfied by scanning multiple terms in the main index.
7047 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
7048 ** prefix-query. Instead of using a prefix-index (if one exists),
7049 ** evaluate the prefix query using the main FTS index. This is used
7050 ** for internal sanity checking by the integrity-check in debug
7053 if( pConfig
->bPrefixIndex
==0 || (flags
& FTS5INDEX_QUERY_TEST_NOIDX
) ){
7054 assert( flags
& FTS5INDEX_QUERY_PREFIX
);
7055 iIdx
= 1+pConfig
->nPrefix
;
7058 if( flags
& FTS5INDEX_QUERY_PREFIX
){
7059 int nChar
= fts5IndexCharlen(pToken
, nToken
);
7060 for(iIdx
=1; iIdx
<=pConfig
->nPrefix
; iIdx
++){
7061 int nIdxChar
= pConfig
->aPrefix
[iIdx
-1];
7062 if( nIdxChar
==nChar
) break;
7063 if( nIdxChar
==nChar
+1 ) iPrefixIdx
= iIdx
;
7067 if( bTokendata
&& iIdx
==0 ){
7069 pRet
= fts5SetupTokendataIter(p
, buf
.p
, nToken
+1, pColset
);
7070 }else if( iIdx
<=pConfig
->nPrefix
){
7071 /* Straight index lookup */
7072 Fts5Structure
*pStruct
= fts5StructureRead(p
);
7073 buf
.p
[0] = (u8
)(FTS5_MAIN_PREFIX
+ iIdx
);
7075 fts5MultiIterNew(p
, pStruct
, flags
| FTS5INDEX_QUERY_SKIPEMPTY
,
7076 pColset
, buf
.p
, nToken
+1, -1, 0, &pRet
7078 fts5StructureRelease(pStruct
);
7081 /* Scan multiple terms in the main index */
7082 int bDesc
= (flags
& FTS5INDEX_QUERY_DESC
)!=0;
7083 fts5SetupPrefixIter(p
, bDesc
, iPrefixIdx
, buf
.p
, nToken
+1, pColset
,&pRet
);
7085 assert( p
->rc
!=SQLITE_OK
);
7087 assert( pRet
->pColset
==0 );
7088 fts5IterSetOutputCb(&p
->rc
, pRet
);
7089 if( p
->rc
==SQLITE_OK
){
7090 Fts5SegIter
*pSeg
= &pRet
->aSeg
[pRet
->aFirst
[1].iFirst
];
7091 if( pSeg
->pLeaf
) pRet
->xSetOutputs(pRet
, pSeg
);
7097 sqlite3Fts5IterClose((Fts5IndexIter
*)pRet
);
7099 sqlite3Fts5IndexCloseReader(p
);
7102 *ppIter
= (Fts5IndexIter
*)pRet
;
7103 sqlite3Fts5BufferFree(&buf
);
7105 return fts5IndexReturn(p
);
7109 ** Return true if the iterator passed as the only argument is at EOF.
7112 ** Move to the next matching rowid.
7114 int sqlite3Fts5IterNext(Fts5IndexIter
*pIndexIter
){
7115 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7116 assert( pIter
->pIndex
->rc
==SQLITE_OK
);
7117 if( pIter
->pTokenDataIter
){
7118 fts5TokendataIterNext(pIter
, 0, 0);
7120 fts5MultiIterNext(pIter
->pIndex
, pIter
, 0, 0);
7122 return fts5IndexReturn(pIter
->pIndex
);
7126 ** Move to the next matching term/rowid. Used by the fts5vocab module.
7128 int sqlite3Fts5IterNextScan(Fts5IndexIter
*pIndexIter
){
7129 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7130 Fts5Index
*p
= pIter
->pIndex
;
7132 assert( pIter
->pIndex
->rc
==SQLITE_OK
);
7134 fts5MultiIterNext(p
, pIter
, 0, 0);
7135 if( p
->rc
==SQLITE_OK
){
7136 Fts5SegIter
*pSeg
= &pIter
->aSeg
[ pIter
->aFirst
[1].iFirst
];
7137 if( pSeg
->pLeaf
&& pSeg
->term
.p
[0]!=FTS5_MAIN_PREFIX
){
7138 fts5DataRelease(pSeg
->pLeaf
);
7140 pIter
->base
.bEof
= 1;
7144 return fts5IndexReturn(pIter
->pIndex
);
7148 ** Move to the next matching rowid that occurs at or after iMatch. The
7149 ** definition of "at or after" depends on whether this iterator iterates
7150 ** in ascending or descending rowid order.
7152 int sqlite3Fts5IterNextFrom(Fts5IndexIter
*pIndexIter
, i64 iMatch
){
7153 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7154 if( pIter
->pTokenDataIter
){
7155 fts5TokendataIterNext(pIter
, 1, iMatch
);
7157 fts5MultiIterNextFrom(pIter
->pIndex
, pIter
, iMatch
);
7159 return fts5IndexReturn(pIter
->pIndex
);
7163 ** Return the current term.
7165 const char *sqlite3Fts5IterTerm(Fts5IndexIter
*pIndexIter
, int *pn
){
7167 const char *z
= (const char*)fts5MultiIterTerm((Fts5Iter
*)pIndexIter
, &n
);
7168 assert_nc( z
|| n
<=1 );
7170 return (z
? &z
[1] : 0);
7174 ** This is used by xInstToken() to access the token at offset iOff, column
7175 ** iCol of row iRowid. The token is returned via output variables *ppOut
7176 ** and *pnOut. The iterator passed as the first argument must be a tokendata=1
7177 ** iterator (pIter->pTokenDataIter!=0).
7179 int sqlite3Fts5IterToken(
7180 Fts5IndexIter
*pIndexIter
,
7184 const char **ppOut
, int *pnOut
7186 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7187 Fts5TokenDataIter
*pT
= pIter
->pTokenDataIter
;
7188 Fts5TokenDataMap
*aMap
= pT
->aMap
;
7189 i64 iPos
= (((i64
)iCol
)<<32) + iOff
;
7196 iTest
= (i1
+ i2
) / 2;
7198 if( aMap
[iTest
].iRowid
<iRowid
){
7200 }else if( aMap
[iTest
].iRowid
>iRowid
){
7203 if( aMap
[iTest
].iPos
<iPos
){
7204 if( aMap
[iTest
].iPos
<0 ){
7208 }else if( aMap
[iTest
].iPos
>iPos
){
7217 Fts5Iter
*pMap
= pT
->apIter
[aMap
[iTest
].iIter
];
7218 *ppOut
= (const char*)pMap
->aSeg
[0].term
.p
+1;
7219 *pnOut
= pMap
->aSeg
[0].term
.n
-1;
7226 ** Clear any existing entries from the token-map associated with the
7227 ** iterator passed as the only argument.
7229 void sqlite3Fts5IndexIterClearTokendata(Fts5IndexIter
*pIndexIter
){
7230 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7231 if( pIter
&& pIter
->pTokenDataIter
){
7232 pIter
->pTokenDataIter
->nMap
= 0;
7237 ** Set a token-mapping for the iterator passed as the first argument. This
7238 ** is used in detail=column or detail=none mode when a token is requested
7239 ** using the xInstToken() API. In this case the caller tokenizers the
7240 ** current row and configures the token-mapping via multiple calls to this
7243 int sqlite3Fts5IndexIterWriteTokendata(
7244 Fts5IndexIter
*pIndexIter
,
7245 const char *pToken
, int nToken
,
7246 i64 iRowid
, int iCol
, int iOff
7248 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7249 Fts5TokenDataIter
*pT
= pIter
->pTokenDataIter
;
7250 Fts5Index
*p
= pIter
->pIndex
;
7253 assert( p
->pConfig
->eDetail
!=FTS5_DETAIL_FULL
);
7254 assert( pIter
->pTokenDataIter
);
7256 for(ii
=0; ii
<pT
->nIter
; ii
++){
7257 Fts5Buffer
*pTerm
= &pT
->apIter
[ii
]->aSeg
[0].term
;
7258 if( nToken
==pTerm
->n
-1 && memcmp(pToken
, pTerm
->p
+1, nToken
)==0 ) break;
7261 fts5TokendataIterAppendMap(p
, pT
, ii
, iRowid
, (((i64
)iCol
)<<32) + iOff
);
7263 return fts5IndexReturn(p
);
7267 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
7269 void sqlite3Fts5IterClose(Fts5IndexIter
*pIndexIter
){
7271 Fts5Iter
*pIter
= (Fts5Iter
*)pIndexIter
;
7272 Fts5Index
*pIndex
= pIter
->pIndex
;
7273 fts5TokendataIterDelete(pIter
->pTokenDataIter
);
7274 fts5MultiIterFree(pIter
);
7275 sqlite3Fts5IndexCloseReader(pIndex
);
7280 ** Read and decode the "averages" record from the database.
7282 ** Parameter anSize must point to an array of size nCol, where nCol is
7283 ** the number of user defined columns in the FTS table.
7285 int sqlite3Fts5IndexGetAverages(Fts5Index
*p
, i64
*pnRow
, i64
*anSize
){
7286 int nCol
= p
->pConfig
->nCol
;
7290 memset(anSize
, 0, sizeof(i64
) * nCol
);
7291 pData
= fts5DataRead(p
, FTS5_AVERAGES_ROWID
);
7292 if( p
->rc
==SQLITE_OK
&& pData
->nn
){
7295 i
+= fts5GetVarint(&pData
->p
[i
], (u64
*)pnRow
);
7296 for(iCol
=0; i
<pData
->nn
&& iCol
<nCol
; iCol
++){
7297 i
+= fts5GetVarint(&pData
->p
[i
], (u64
*)&anSize
[iCol
]);
7301 fts5DataRelease(pData
);
7302 return fts5IndexReturn(p
);
7306 ** Replace the current "averages" record with the contents of the buffer
7307 ** supplied as the second argument.
7309 int sqlite3Fts5IndexSetAverages(Fts5Index
*p
, const u8
*pData
, int nData
){
7310 assert( p
->rc
==SQLITE_OK
);
7311 fts5DataWrite(p
, FTS5_AVERAGES_ROWID
, pData
, nData
);
7312 return fts5IndexReturn(p
);
7316 ** Return the total number of blocks this module has read from the %_data
7317 ** table since it was created.
7319 int sqlite3Fts5IndexReads(Fts5Index
*p
){
7324 ** Set the 32-bit cookie value stored at the start of all structure
7325 ** records to the value passed as the second argument.
7327 ** Return SQLITE_OK if successful, or an SQLite error code if an error
7330 int sqlite3Fts5IndexSetCookie(Fts5Index
*p
, int iNew
){
7331 int rc
; /* Return code */
7332 Fts5Config
*pConfig
= p
->pConfig
; /* Configuration object */
7333 u8 aCookie
[4]; /* Binary representation of iNew */
7334 sqlite3_blob
*pBlob
= 0;
7336 assert( p
->rc
==SQLITE_OK
);
7337 sqlite3Fts5Put32(aCookie
, iNew
);
7339 rc
= sqlite3_blob_open(pConfig
->db
, pConfig
->zDb
, p
->zDataTbl
,
7340 "block", FTS5_STRUCTURE_ROWID
, 1, &pBlob
7342 if( rc
==SQLITE_OK
){
7343 sqlite3_blob_write(pBlob
, aCookie
, 4, 0);
7344 rc
= sqlite3_blob_close(pBlob
);
7350 int sqlite3Fts5IndexLoadConfig(Fts5Index
*p
){
7351 Fts5Structure
*pStruct
;
7352 pStruct
= fts5StructureRead(p
);
7353 fts5StructureRelease(pStruct
);
7354 return fts5IndexReturn(p
);
7358 ** Retrieve the origin value that will be used for the segment currently
7359 ** being accumulated in the in-memory hash table when it is flushed to
7360 ** disk. If successful, SQLITE_OK is returned and (*piOrigin) set to
7361 ** the queried value. Or, if an error occurs, an error code is returned
7362 ** and the final value of (*piOrigin) is undefined.
7364 int sqlite3Fts5IndexGetOrigin(Fts5Index
*p
, i64
*piOrigin
){
7365 Fts5Structure
*pStruct
;
7366 pStruct
= fts5StructureRead(p
);
7368 *piOrigin
= pStruct
->nOriginCntr
;
7369 fts5StructureRelease(pStruct
);
7371 return fts5IndexReturn(p
);
7375 ** Buffer pPg contains a page of a tombstone hash table - one of nPg pages
7376 ** associated with the same segment. This function adds rowid iRowid to
7377 ** the hash table. The caller is required to guarantee that there is at
7378 ** least one free slot on the page.
7380 ** If parameter bForce is false and the hash table is deemed to be full
7381 ** (more than half of the slots are occupied), then non-zero is returned
7382 ** and iRowid not inserted. Or, if bForce is true or if the hash table page
7383 ** is not full, iRowid is inserted and zero returned.
7385 static int fts5IndexTombstoneAddToPage(
7391 const int szKey
= TOMBSTONE_KEYSIZE(pPg
);
7392 const int nSlot
= TOMBSTONE_NSLOT(pPg
);
7393 const int nElem
= fts5GetU32(&pPg
->p
[4]);
7394 int iSlot
= (iRowid
/ nPg
) % nSlot
;
7395 int nCollide
= nSlot
;
7397 if( szKey
==4 && iRowid
>0xFFFFFFFF ) return 2;
7403 if( bForce
==0 && nElem
>=(nSlot
/2) ){
7407 fts5PutU32(&pPg
->p
[4], nElem
+1);
7409 u32
*aSlot
= (u32
*)&pPg
->p
[8];
7410 while( aSlot
[iSlot
] ){
7411 iSlot
= (iSlot
+ 1) % nSlot
;
7412 if( nCollide
--==0 ) return 0;
7414 fts5PutU32((u8
*)&aSlot
[iSlot
], (u32
)iRowid
);
7416 u64
*aSlot
= (u64
*)&pPg
->p
[8];
7417 while( aSlot
[iSlot
] ){
7418 iSlot
= (iSlot
+ 1) % nSlot
;
7419 if( nCollide
--==0 ) return 0;
7421 fts5PutU64((u8
*)&aSlot
[iSlot
], iRowid
);
7428 ** This function attempts to build a new hash containing all the keys
7429 ** currently in the tombstone hash table for segment pSeg. The new
7430 ** hash will be stored in the nOut buffers passed in array apOut[].
7431 ** All pages of the new hash use key-size szKey (4 or 8).
7433 ** Return 0 if the hash is successfully rebuilt into the nOut pages.
7434 ** Or non-zero if it is not (because one page became overfull). In this
7435 ** case the caller should retry with a larger nOut parameter.
7437 ** Parameter pData1 is page iPg1 of the hash table being rebuilt.
7439 static int fts5IndexTombstoneRehash(
7441 Fts5StructureSegment
*pSeg
, /* Segment to rebuild hash of */
7442 Fts5Data
*pData1
, /* One page of current hash - or NULL */
7443 int iPg1
, /* Which page of the current hash is pData1 */
7444 int szKey
, /* 4 or 8, the keysize */
7445 int nOut
, /* Number of output pages */
7446 Fts5Data
**apOut
/* Array of output hash pages */
7451 /* Initialize the headers of all the output pages */
7452 for(ii
=0; ii
<nOut
; ii
++){
7453 apOut
[ii
]->p
[0] = szKey
;
7454 fts5PutU32(&apOut
[ii
]->p
[4], 0);
7457 /* Loop through the current pages of the hash table. */
7458 for(ii
=0; res
==0 && ii
<pSeg
->nPgTombstone
; ii
++){
7459 Fts5Data
*pData
= 0; /* Page ii of the current hash table */
7460 Fts5Data
*pFree
= 0; /* Free this at the end of the loop */
7465 pFree
= pData
= fts5DataRead(p
, FTS5_TOMBSTONE_ROWID(pSeg
->iSegid
, ii
));
7469 int szKeyIn
= TOMBSTONE_KEYSIZE(pData
);
7470 int nSlotIn
= (pData
->nn
- 8) / szKeyIn
;
7472 for(iIn
=0; iIn
<nSlotIn
; iIn
++){
7475 /* Read the value from slot iIn of the input page into iVal. */
7477 u32
*aSlot
= (u32
*)&pData
->p
[8];
7478 if( aSlot
[iIn
] ) iVal
= fts5GetU32((u8
*)&aSlot
[iIn
]);
7480 u64
*aSlot
= (u64
*)&pData
->p
[8];
7481 if( aSlot
[iIn
] ) iVal
= fts5GetU64((u8
*)&aSlot
[iIn
]);
7484 /* If iVal is not 0 at this point, insert it into the new hash table */
7486 Fts5Data
*pPg
= apOut
[(iVal
% nOut
)];
7487 res
= fts5IndexTombstoneAddToPage(pPg
, 0, nOut
, iVal
);
7492 /* If this is page 0 of the old hash, copy the rowid-0-flag from the
7493 ** old hash to the new. */
7495 apOut
[0]->p
[1] = pData
->p
[1];
7498 fts5DataRelease(pFree
);
7505 ** This is called to rebuild the hash table belonging to segment pSeg.
7506 ** If parameter pData1 is not NULL, then one page of the existing hash table
7507 ** has already been loaded - pData1, which is page iPg1. The key-size for
7508 ** the new hash table is szKey (4 or 8).
7510 ** If successful, the new hash table is not written to disk. Instead,
7511 ** output parameter (*pnOut) is set to the number of pages in the new
7512 ** hash table, and (*papOut) to point to an array of buffers containing
7513 ** the new page data.
7515 ** If an error occurs, an error code is left in the Fts5Index object and
7516 ** both output parameters set to 0 before returning.
7518 static void fts5IndexTombstoneRebuild(
7520 Fts5StructureSegment
*pSeg
, /* Segment to rebuild hash of */
7521 Fts5Data
*pData1
, /* One page of current hash - or NULL */
7522 int iPg1
, /* Which page of the current hash is pData1 */
7523 int szKey
, /* 4 or 8, the keysize */
7524 int *pnOut
, /* OUT: Number of output pages */
7525 Fts5Data
***papOut
/* OUT: Output hash pages */
7527 const int MINSLOT
= 32;
7528 int nSlotPerPage
= MAX(MINSLOT
, (p
->pConfig
->pgsz
- 8) / szKey
);
7529 int nSlot
= 0; /* Number of slots in each output page */
7532 /* Figure out how many output pages (nOut) and how many slots per
7533 ** page (nSlot). There are three possibilities:
7535 ** 1. The hash table does not yet exist. In this case the new hash
7536 ** table will consist of a single page with MINSLOT slots.
7538 ** 2. The hash table exists but is currently a single page. In this
7539 ** case an attempt is made to grow the page to accommodate the new
7540 ** entry. The page is allowed to grow up to nSlotPerPage (see above)
7543 ** 3. The hash table already consists of more than one page, or of
7544 ** a single page already so large that it cannot be grown. In this
7545 ** case the new hash consists of (nPg*2+1) pages of nSlotPerPage
7546 ** slots each, where nPg is the current number of pages in the
7549 if( pSeg
->nPgTombstone
==0 ){
7553 }else if( pSeg
->nPgTombstone
==1 ){
7555 int nElem
= (int)fts5GetU32(&pData1
->p
[4]);
7556 assert( pData1
&& iPg1
==0 );
7558 nSlot
= MAX(nElem
*4, MINSLOT
);
7559 if( nSlot
>nSlotPerPage
) nOut
= 0;
7563 nOut
= (pSeg
->nPgTombstone
* 2 + 1);
7564 nSlot
= nSlotPerPage
;
7567 /* Allocate the required array and output pages */
7572 Fts5Data
**apOut
= 0;
7574 /* Allocate space for the new hash table */
7575 assert( nSlot
>=MINSLOT
);
7576 apOut
= (Fts5Data
**)sqlite3Fts5MallocZero(&p
->rc
, sizeof(Fts5Data
*) * nOut
);
7577 szPage
= 8 + nSlot
*szKey
;
7578 for(ii
=0; ii
<nOut
; ii
++){
7579 Fts5Data
*pNew
= (Fts5Data
*)sqlite3Fts5MallocZero(&p
->rc
,
7580 sizeof(Fts5Data
)+szPage
7584 pNew
->p
= (u8
*)&pNew
[1];
7589 /* Rebuild the hash table. */
7590 if( p
->rc
==SQLITE_OK
){
7591 res
= fts5IndexTombstoneRehash(p
, pSeg
, pData1
, iPg1
, szKey
, nOut
, apOut
);
7595 fts5IndexFreeArray(apOut
, nOut
);
7604 /* If control flows to here, it was not possible to rebuild the hash
7605 ** table. Free all buffers and then try again with more pages. */
7606 assert( p
->rc
==SQLITE_OK
);
7607 fts5IndexFreeArray(apOut
, nOut
);
7608 nSlot
= nSlotPerPage
;
7615 ** Add a tombstone for rowid iRowid to segment pSeg.
7617 static void fts5IndexTombstoneAdd(
7619 Fts5StructureSegment
*pSeg
,
7626 Fts5Data
**apHash
= 0;
7628 p
->nContentlessDelete
++;
7630 if( pSeg
->nPgTombstone
>0 ){
7631 iPg
= iRowid
% pSeg
->nPgTombstone
;
7632 pPg
= fts5DataRead(p
, FTS5_TOMBSTONE_ROWID(pSeg
->iSegid
,iPg
));
7634 assert( p
->rc
!=SQLITE_OK
);
7638 if( 0==fts5IndexTombstoneAddToPage(pPg
, 0, pSeg
->nPgTombstone
, iRowid
) ){
7639 fts5DataWrite(p
, FTS5_TOMBSTONE_ROWID(pSeg
->iSegid
,iPg
), pPg
->p
, pPg
->nn
);
7640 fts5DataRelease(pPg
);
7645 /* Have to rebuild the hash table. First figure out the key-size (4 or 8). */
7646 szKey
= pPg
? TOMBSTONE_KEYSIZE(pPg
) : 4;
7647 if( iRowid
>0xFFFFFFFF ) szKey
= 8;
7649 /* Rebuild the hash table */
7650 fts5IndexTombstoneRebuild(p
, pSeg
, pPg
, iPg
, szKey
, &nHash
, &apHash
);
7651 assert( p
->rc
==SQLITE_OK
|| (nHash
==0 && apHash
==0) );
7653 /* If all has succeeded, write the new rowid into one of the new hash
7654 ** table pages, then write them all out to disk. */
7657 fts5IndexTombstoneAddToPage(apHash
[iRowid
% nHash
], 1, nHash
, iRowid
);
7658 for(ii
=0; ii
<nHash
; ii
++){
7659 i64 iTombstoneRowid
= FTS5_TOMBSTONE_ROWID(pSeg
->iSegid
, ii
);
7660 fts5DataWrite(p
, iTombstoneRowid
, apHash
[ii
]->p
, apHash
[ii
]->nn
);
7662 pSeg
->nPgTombstone
= nHash
;
7663 fts5StructureWrite(p
, p
->pStruct
);
7666 fts5DataRelease(pPg
);
7667 fts5IndexFreeArray(apHash
, nHash
);
7671 ** Add iRowid to the tombstone list of the segment or segments that contain
7672 ** rows from origin iOrigin. Return SQLITE_OK if successful, or an SQLite
7673 ** error code otherwise.
7675 int sqlite3Fts5IndexContentlessDelete(Fts5Index
*p
, i64 iOrigin
, i64 iRowid
){
7676 Fts5Structure
*pStruct
;
7677 pStruct
= fts5StructureRead(p
);
7679 int bFound
= 0; /* True after pSeg->nEntryTombstone incr. */
7681 for(iLvl
=pStruct
->nLevel
-1; iLvl
>=0; iLvl
--){
7683 for(iSeg
=pStruct
->aLevel
[iLvl
].nSeg
-1; iSeg
>=0; iSeg
--){
7684 Fts5StructureSegment
*pSeg
= &pStruct
->aLevel
[iLvl
].aSeg
[iSeg
];
7685 if( pSeg
->iOrigin1
<=(u64
)iOrigin
&& pSeg
->iOrigin2
>=(u64
)iOrigin
){
7687 pSeg
->nEntryTombstone
++;
7690 fts5IndexTombstoneAdd(p
, pSeg
, iRowid
);
7694 fts5StructureRelease(pStruct
);
7696 return fts5IndexReturn(p
);
7699 /*************************************************************************
7700 **************************************************************************
7701 ** Below this point is the implementation of the integrity-check
7706 ** Return a simple checksum value based on the arguments.
7708 u64
sqlite3Fts5IndexEntryCksum(
7718 ret
+= (ret
<<3) + iCol
;
7719 ret
+= (ret
<<3) + iPos
;
7720 if( iIdx
>=0 ) ret
+= (ret
<<3) + (FTS5_MAIN_PREFIX
+ iIdx
);
7721 for(i
=0; i
<nTerm
; i
++) ret
+= (ret
<<3) + pTerm
[i
];
7727 ** This function is purely an internal test. It does not contribute to
7728 ** FTS functionality, or even the integrity-check, in any way.
7730 ** Instead, it tests that the same set of pgno/rowid combinations are
7731 ** visited regardless of whether the doclist-index identified by parameters
7732 ** iSegid/iLeaf is iterated in forwards or reverse order.
7734 static void fts5TestDlidxReverse(
7736 int iSegid
, /* Segment id to load from */
7737 int iLeaf
/* Load doclist-index for this leaf */
7739 Fts5DlidxIter
*pDlidx
= 0;
7743 for(pDlidx
=fts5DlidxIterInit(p
, 0, iSegid
, iLeaf
);
7744 fts5DlidxIterEof(p
, pDlidx
)==0;
7745 fts5DlidxIterNext(p
, pDlidx
)
7747 i64 iRowid
= fts5DlidxIterRowid(pDlidx
);
7748 int pgno
= fts5DlidxIterPgno(pDlidx
);
7749 assert( pgno
>iLeaf
);
7750 cksum1
+= iRowid
+ ((i64
)pgno
<<32);
7752 fts5DlidxIterFree(pDlidx
);
7755 for(pDlidx
=fts5DlidxIterInit(p
, 1, iSegid
, iLeaf
);
7756 fts5DlidxIterEof(p
, pDlidx
)==0;
7757 fts5DlidxIterPrev(p
, pDlidx
)
7759 i64 iRowid
= fts5DlidxIterRowid(pDlidx
);
7760 int pgno
= fts5DlidxIterPgno(pDlidx
);
7761 assert( fts5DlidxIterPgno(pDlidx
)>iLeaf
);
7762 cksum2
+= iRowid
+ ((i64
)pgno
<<32);
7764 fts5DlidxIterFree(pDlidx
);
7767 if( p
->rc
==SQLITE_OK
&& cksum1
!=cksum2
) p
->rc
= FTS5_CORRUPT
;
7770 static int fts5QueryCksum(
7771 Fts5Index
*p
, /* Fts5 index object */
7773 const char *z
, /* Index key to query for */
7774 int n
, /* Size of index key in bytes */
7775 int flags
, /* Flags for Fts5IndexQuery */
7776 u64
*pCksum
/* IN/OUT: Checksum value */
7778 int eDetail
= p
->pConfig
->eDetail
;
7779 u64 cksum
= *pCksum
;
7780 Fts5IndexIter
*pIter
= 0;
7781 int rc
= sqlite3Fts5IndexQuery(
7782 p
, z
, n
, (flags
| FTS5INDEX_QUERY_NOTOKENDATA
), 0, &pIter
7785 while( rc
==SQLITE_OK
&& ALWAYS(pIter
!=0) && 0==sqlite3Fts5IterEof(pIter
) ){
7786 i64 rowid
= pIter
->iRowid
;
7788 if( eDetail
==FTS5_DETAIL_NONE
){
7789 cksum
^= sqlite3Fts5IndexEntryCksum(rowid
, 0, 0, iIdx
, z
, n
);
7791 Fts5PoslistReader sReader
;
7792 for(sqlite3Fts5PoslistReaderInit(pIter
->pData
, pIter
->nData
, &sReader
);
7794 sqlite3Fts5PoslistReaderNext(&sReader
)
7796 int iCol
= FTS5_POS2COLUMN(sReader
.iPos
);
7797 int iOff
= FTS5_POS2OFFSET(sReader
.iPos
);
7798 cksum
^= sqlite3Fts5IndexEntryCksum(rowid
, iCol
, iOff
, iIdx
, z
, n
);
7801 if( rc
==SQLITE_OK
){
7802 rc
= sqlite3Fts5IterNext(pIter
);
7805 sqlite3Fts5IterClose(pIter
);
7812 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
7813 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
7814 ** contain valid utf-8, return non-zero.
7816 static int fts5TestUtf8(const char *z
, int n
){
7820 if( (z
[i
] & 0x80)==0x00 ){
7823 if( (z
[i
] & 0xE0)==0xC0 ){
7824 if( i
+1>=n
|| (z
[i
+1] & 0xC0)!=0x80 ) return 1;
7827 if( (z
[i
] & 0xF0)==0xE0 ){
7828 if( i
+2>=n
|| (z
[i
+1] & 0xC0)!=0x80 || (z
[i
+2] & 0xC0)!=0x80 ) return 1;
7831 if( (z
[i
] & 0xF8)==0xF0 ){
7832 if( i
+3>=n
|| (z
[i
+1] & 0xC0)!=0x80 || (z
[i
+2] & 0xC0)!=0x80 ) return 1;
7833 if( (z
[i
+2] & 0xC0)!=0x80 ) return 1;
7844 ** This function is also purely an internal test. It does not contribute to
7845 ** FTS functionality, or even the integrity-check, in any way.
7847 static void fts5TestTerm(
7849 Fts5Buffer
*pPrev
, /* Previous term */
7850 const char *z
, int n
, /* Possibly new term to test */
7856 fts5BufferSet(&rc
, pPrev
, n
, (const u8
*)z
);
7858 if( rc
==SQLITE_OK
&& (pPrev
->n
!=n
|| memcmp(pPrev
->p
, z
, n
)) ){
7859 u64 cksum3
= *pCksum
;
7860 const char *zTerm
= (const char*)&pPrev
->p
[1]; /* term sans prefix-byte */
7861 int nTerm
= pPrev
->n
-1; /* Size of zTerm in bytes */
7862 int iIdx
= (pPrev
->p
[0] - FTS5_MAIN_PREFIX
);
7863 int flags
= (iIdx
==0 ? 0 : FTS5INDEX_QUERY_PREFIX
);
7867 /* Check that the results returned for ASC and DESC queries are
7868 ** the same. If not, call this corruption. */
7869 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, flags
, &ck1
);
7870 if( rc
==SQLITE_OK
){
7871 int f
= flags
|FTS5INDEX_QUERY_DESC
;
7872 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, f
, &ck2
);
7874 if( rc
==SQLITE_OK
&& ck1
!=ck2
) rc
= FTS5_CORRUPT
;
7876 /* If this is a prefix query, check that the results returned if the
7877 ** the index is disabled are the same. In both ASC and DESC order.
7879 ** This check may only be performed if the hash table is empty. This
7880 ** is because the hash table only supports a single scan query at
7881 ** a time, and the multi-iter loop from which this function is called
7882 ** is already performing such a scan.
7884 ** Also only do this if buffer zTerm contains nTerm bytes of valid
7885 ** utf-8. Otherwise, the last part of the buffer contents might contain
7886 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
7887 ** character stored in the main fts index, which will cause the
7889 if( p
->nPendingData
==0 && 0==fts5TestUtf8(zTerm
, nTerm
) ){
7890 if( iIdx
>0 && rc
==SQLITE_OK
){
7891 int f
= flags
|FTS5INDEX_QUERY_TEST_NOIDX
;
7893 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, f
, &ck2
);
7894 if( rc
==SQLITE_OK
&& ck1
!=ck2
) rc
= FTS5_CORRUPT
;
7896 if( iIdx
>0 && rc
==SQLITE_OK
){
7897 int f
= flags
|FTS5INDEX_QUERY_TEST_NOIDX
|FTS5INDEX_QUERY_DESC
;
7899 rc
= fts5QueryCksum(p
, iIdx
, zTerm
, nTerm
, f
, &ck2
);
7900 if( rc
==SQLITE_OK
&& ck1
!=ck2
) rc
= FTS5_CORRUPT
;
7905 fts5BufferSet(&rc
, pPrev
, n
, (const u8
*)z
);
7907 if( rc
==SQLITE_OK
&& cksum3
!=expected
){
7916 # define fts5TestDlidxReverse(x,y,z)
7917 # define fts5TestTerm(u,v,w,x,y,z)
7923 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
7924 ** contain zero terms.
7925 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
7926 ** contain zero rowids.
7928 static void fts5IndexIntegrityCheckEmpty(
7930 Fts5StructureSegment
*pSeg
, /* Segment to check internal consistency */
7937 /* Now check that the iter.nEmpty leaves following the current leaf
7938 ** (a) exist and (b) contain no terms. */
7939 for(i
=iFirst
; p
->rc
==SQLITE_OK
&& i
<=iLast
; i
++){
7940 Fts5Data
*pLeaf
= fts5DataRead(p
, FTS5_SEGMENT_ROWID(pSeg
->iSegid
, i
));
7942 if( !fts5LeafIsTermless(pLeaf
) ) p
->rc
= FTS5_CORRUPT
;
7943 if( i
>=iNoRowid
&& 0!=fts5LeafFirstRowidOff(pLeaf
) ) p
->rc
= FTS5_CORRUPT
;
7945 fts5DataRelease(pLeaf
);
7949 static void fts5IntegrityCheckPgidx(Fts5Index
*p
, Fts5Data
*pLeaf
){
7953 Fts5Buffer buf1
= {0,0,0};
7954 Fts5Buffer buf2
= {0,0,0};
7957 while( ii
<pLeaf
->nn
&& p
->rc
==SQLITE_OK
){
7962 ii
+= fts5GetVarint32(&pLeaf
->p
[ii
], nIncr
);
7966 if( iOff
>=pLeaf
->szLeaf
){
7967 p
->rc
= FTS5_CORRUPT
;
7968 }else if( iTermOff
==nIncr
){
7970 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nByte
);
7971 if( (iOff
+nByte
)>pLeaf
->szLeaf
){
7972 p
->rc
= FTS5_CORRUPT
;
7974 fts5BufferSet(&p
->rc
, &buf1
, nByte
, &pLeaf
->p
[iOff
]);
7978 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nKeep
);
7979 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nByte
);
7980 if( nKeep
>buf1
.n
|| (iOff
+nByte
)>pLeaf
->szLeaf
){
7981 p
->rc
= FTS5_CORRUPT
;
7984 fts5BufferAppendBlob(&p
->rc
, &buf1
, nByte
, &pLeaf
->p
[iOff
]);
7987 if( p
->rc
==SQLITE_OK
){
7988 res
= fts5BufferCompare(&buf1
, &buf2
);
7989 if( res
<=0 ) p
->rc
= FTS5_CORRUPT
;
7992 fts5BufferSet(&p
->rc
, &buf2
, buf1
.n
, buf1
.p
);
7995 fts5BufferFree(&buf1
);
7996 fts5BufferFree(&buf2
);
7999 static void fts5IndexIntegrityCheckSegment(
8000 Fts5Index
*p
, /* FTS5 backend object */
8001 Fts5StructureSegment
*pSeg
/* Segment to check internal consistency */
8003 Fts5Config
*pConfig
= p
->pConfig
;
8004 int bSecureDelete
= (pConfig
->iVersion
==FTS5_CURRENT_VERSION_SECUREDELETE
);
8005 sqlite3_stmt
*pStmt
= 0;
8007 int iIdxPrevLeaf
= pSeg
->pgnoFirst
-1;
8008 int iDlidxPrevLeaf
= pSeg
->pgnoLast
;
8010 if( pSeg
->pgnoFirst
==0 ) return;
8012 fts5IndexPrepareStmt(p
, &pStmt
, sqlite3_mprintf(
8013 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
8015 pConfig
->zDb
, pConfig
->zName
, pSeg
->iSegid
8018 /* Iterate through the b-tree hierarchy. */
8019 while( p
->rc
==SQLITE_OK
&& SQLITE_ROW
==sqlite3_step(pStmt
) ){
8020 i64 iRow
; /* Rowid for this leaf */
8021 Fts5Data
*pLeaf
; /* Data for this leaf */
8023 const char *zIdxTerm
= (const char*)sqlite3_column_blob(pStmt
, 1);
8024 int nIdxTerm
= sqlite3_column_bytes(pStmt
, 1);
8025 int iIdxLeaf
= sqlite3_column_int(pStmt
, 2);
8026 int bIdxDlidx
= sqlite3_column_int(pStmt
, 3);
8028 /* If the leaf in question has already been trimmed from the segment,
8029 ** ignore this b-tree entry. Otherwise, load it into memory. */
8030 if( iIdxLeaf
<pSeg
->pgnoFirst
) continue;
8031 iRow
= FTS5_SEGMENT_ROWID(pSeg
->iSegid
, iIdxLeaf
);
8032 pLeaf
= fts5LeafRead(p
, iRow
);
8033 if( pLeaf
==0 ) break;
8035 /* Check that the leaf contains at least one term, and that it is equal
8036 ** to or larger than the split-key in zIdxTerm. Also check that if there
8037 ** is also a rowid pointer within the leaf page header, it points to a
8038 ** location before the term. */
8039 if( pLeaf
->nn
<=pLeaf
->szLeaf
){
8042 && pConfig
->iVersion
==FTS5_CURRENT_VERSION_SECUREDELETE
8043 && pLeaf
->nn
==pLeaf
->szLeaf
8046 /* special case - the very first page in a segment keeps its %_idx
8047 ** entry even if all the terms are removed from it by secure-delete
8050 p
->rc
= FTS5_CORRUPT
;
8054 int iOff
; /* Offset of first term on leaf */
8055 int iRowidOff
; /* Offset of first rowid on leaf */
8056 int nTerm
; /* Size of term on leaf in bytes */
8057 int res
; /* Comparison of term and split-key */
8059 iOff
= fts5LeafFirstTermOff(pLeaf
);
8060 iRowidOff
= fts5LeafFirstRowidOff(pLeaf
);
8061 if( iRowidOff
>=iOff
|| iOff
>=pLeaf
->szLeaf
){
8062 p
->rc
= FTS5_CORRUPT
;
8064 iOff
+= fts5GetVarint32(&pLeaf
->p
[iOff
], nTerm
);
8065 res
= fts5Memcmp(&pLeaf
->p
[iOff
], zIdxTerm
, MIN(nTerm
, nIdxTerm
));
8066 if( res
==0 ) res
= nTerm
- nIdxTerm
;
8067 if( res
<0 ) p
->rc
= FTS5_CORRUPT
;
8070 fts5IntegrityCheckPgidx(p
, pLeaf
);
8072 fts5DataRelease(pLeaf
);
8075 /* Now check that the iter.nEmpty leaves following the current leaf
8076 ** (a) exist and (b) contain no terms. */
8077 fts5IndexIntegrityCheckEmpty(
8078 p
, pSeg
, iIdxPrevLeaf
+1, iDlidxPrevLeaf
+1, iIdxLeaf
-1
8082 /* If there is a doclist-index, check that it looks right. */
8084 Fts5DlidxIter
*pDlidx
= 0; /* For iterating through doclist index */
8085 int iPrevLeaf
= iIdxLeaf
;
8086 int iSegid
= pSeg
->iSegid
;
8090 for(pDlidx
=fts5DlidxIterInit(p
, 0, iSegid
, iIdxLeaf
);
8091 fts5DlidxIterEof(p
, pDlidx
)==0;
8092 fts5DlidxIterNext(p
, pDlidx
)
8095 /* Check any rowid-less pages that occur before the current leaf. */
8096 for(iPg
=iPrevLeaf
+1; iPg
<fts5DlidxIterPgno(pDlidx
); iPg
++){
8097 iKey
= FTS5_SEGMENT_ROWID(iSegid
, iPg
);
8098 pLeaf
= fts5DataRead(p
, iKey
);
8100 if( fts5LeafFirstRowidOff(pLeaf
)!=0 ) p
->rc
= FTS5_CORRUPT
;
8101 fts5DataRelease(pLeaf
);
8104 iPrevLeaf
= fts5DlidxIterPgno(pDlidx
);
8106 /* Check that the leaf page indicated by the iterator really does
8107 ** contain the rowid suggested by the same. */
8108 iKey
= FTS5_SEGMENT_ROWID(iSegid
, iPrevLeaf
);
8109 pLeaf
= fts5DataRead(p
, iKey
);
8112 int iRowidOff
= fts5LeafFirstRowidOff(pLeaf
);
8113 ASSERT_SZLEAF_OK(pLeaf
);
8114 if( iRowidOff
>=pLeaf
->szLeaf
){
8115 p
->rc
= FTS5_CORRUPT
;
8116 }else if( bSecureDelete
==0 || iRowidOff
>0 ){
8117 i64 iDlRowid
= fts5DlidxIterRowid(pDlidx
);
8118 fts5GetVarint(&pLeaf
->p
[iRowidOff
], (u64
*)&iRowid
);
8119 if( iRowid
<iDlRowid
|| (bSecureDelete
==0 && iRowid
!=iDlRowid
) ){
8120 p
->rc
= FTS5_CORRUPT
;
8123 fts5DataRelease(pLeaf
);
8127 iDlidxPrevLeaf
= iPg
;
8128 fts5DlidxIterFree(pDlidx
);
8129 fts5TestDlidxReverse(p
, iSegid
, iIdxLeaf
);
8131 iDlidxPrevLeaf
= pSeg
->pgnoLast
;
8132 /* TODO: Check there is no doclist index */
8135 iIdxPrevLeaf
= iIdxLeaf
;
8138 rc2
= sqlite3_finalize(pStmt
);
8139 if( p
->rc
==SQLITE_OK
) p
->rc
= rc2
;
8141 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
8143 if( p
->rc
==SQLITE_OK
&& iter
.iLeaf
!=pSeg
->pgnoLast
){
8144 p
->rc
= FTS5_CORRUPT
;
8151 ** Run internal checks to ensure that the FTS index (a) is internally
8152 ** consistent and (b) contains entries for which the XOR of the checksums
8153 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
8155 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
8156 ** checksum does not match. Return SQLITE_OK if all checks pass without
8157 ** error, or some other SQLite error code if another error (e.g. OOM)
8160 int sqlite3Fts5IndexIntegrityCheck(Fts5Index
*p
, u64 cksum
, int bUseCksum
){
8161 int eDetail
= p
->pConfig
->eDetail
;
8162 u64 cksum2
= 0; /* Checksum based on contents of indexes */
8163 Fts5Buffer poslist
= {0,0,0}; /* Buffer used to hold a poslist */
8164 Fts5Iter
*pIter
; /* Used to iterate through entire index */
8165 Fts5Structure
*pStruct
; /* Index structure */
8169 /* Used by extra internal tests only run if NDEBUG is not defined */
8170 u64 cksum3
= 0; /* Checksum based on contents of indexes */
8171 Fts5Buffer term
= {0,0,0}; /* Buffer used to hold most recent term */
8173 const int flags
= FTS5INDEX_QUERY_NOOUTPUT
;
8175 /* Load the FTS index structure */
8176 pStruct
= fts5StructureRead(p
);
8178 assert( p
->rc
!=SQLITE_OK
);
8179 return fts5IndexReturn(p
);
8182 /* Check that the internal nodes of each segment match the leaves */
8183 for(iLvl
=0; iLvl
<pStruct
->nLevel
; iLvl
++){
8184 for(iSeg
=0; iSeg
<pStruct
->aLevel
[iLvl
].nSeg
; iSeg
++){
8185 Fts5StructureSegment
*pSeg
= &pStruct
->aLevel
[iLvl
].aSeg
[iSeg
];
8186 fts5IndexIntegrityCheckSegment(p
, pSeg
);
8190 /* The cksum argument passed to this function is a checksum calculated
8191 ** based on all expected entries in the FTS index (including prefix index
8192 ** entries). This block checks that a checksum calculated based on the
8193 ** actual contents of FTS index is identical.
8195 ** Two versions of the same checksum are calculated. The first (stack
8196 ** variable cksum2) based on entries extracted from the full-text index
8197 ** while doing a linear scan of each individual index in turn.
8199 ** As each term visited by the linear scans, a separate query for the
8200 ** same term is performed. cksum3 is calculated based on the entries
8201 ** extracted by these queries.
8203 for(fts5MultiIterNew(p
, pStruct
, flags
, 0, 0, 0, -1, 0, &pIter
);
8204 fts5MultiIterEof(p
, pIter
)==0;
8205 fts5MultiIterNext(p
, pIter
, 0, 0)
8207 int n
; /* Size of term in bytes */
8208 i64 iPos
= 0; /* Position read from poslist */
8209 int iOff
= 0; /* Offset within poslist */
8210 i64 iRowid
= fts5MultiIterRowid(pIter
);
8211 char *z
= (char*)fts5MultiIterTerm(pIter
, &n
);
8213 /* If this is a new term, query for it. Update cksum3 with the results. */
8214 fts5TestTerm(p
, &term
, z
, n
, cksum2
, &cksum3
);
8217 if( eDetail
==FTS5_DETAIL_NONE
){
8218 if( 0==fts5MultiIterIsEmpty(p
, pIter
) ){
8219 cksum2
^= sqlite3Fts5IndexEntryCksum(iRowid
, 0, 0, -1, z
, n
);
8223 fts5SegiterPoslist(p
, &pIter
->aSeg
[pIter
->aFirst
[1].iFirst
], 0, &poslist
);
8224 fts5BufferAppendBlob(&p
->rc
, &poslist
, 4, (const u8
*)"\0\0\0\0");
8225 while( 0==sqlite3Fts5PoslistNext64(poslist
.p
, poslist
.n
, &iOff
, &iPos
) ){
8226 int iCol
= FTS5_POS2COLUMN(iPos
);
8227 int iTokOff
= FTS5_POS2OFFSET(iPos
);
8228 cksum2
^= sqlite3Fts5IndexEntryCksum(iRowid
, iCol
, iTokOff
, -1, z
, n
);
8232 fts5TestTerm(p
, &term
, 0, 0, cksum2
, &cksum3
);
8234 fts5MultiIterFree(pIter
);
8235 if( p
->rc
==SQLITE_OK
&& bUseCksum
&& cksum
!=cksum2
) p
->rc
= FTS5_CORRUPT
;
8237 fts5StructureRelease(pStruct
);
8239 fts5BufferFree(&term
);
8241 fts5BufferFree(&poslist
);
8242 return fts5IndexReturn(p
);
8245 /*************************************************************************
8246 **************************************************************************
8247 ** Below this point is the implementation of the fts5_decode() scalar
8251 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8253 ** Decode a segment-data rowid from the %_data table. This function is
8254 ** the opposite of macro FTS5_SEGMENT_ROWID().
8256 static void fts5DecodeRowid(
8257 i64 iRowid
, /* Rowid from %_data table */
8258 int *pbTombstone
, /* OUT: Tombstone hash flag */
8259 int *piSegid
, /* OUT: Segment id */
8260 int *pbDlidx
, /* OUT: Dlidx flag */
8261 int *piHeight
, /* OUT: Height */
8262 int *piPgno
/* OUT: Page number */
8264 *piPgno
= (int)(iRowid
& (((i64
)1 << FTS5_DATA_PAGE_B
) - 1));
8265 iRowid
>>= FTS5_DATA_PAGE_B
;
8267 *piHeight
= (int)(iRowid
& (((i64
)1 << FTS5_DATA_HEIGHT_B
) - 1));
8268 iRowid
>>= FTS5_DATA_HEIGHT_B
;
8270 *pbDlidx
= (int)(iRowid
& 0x0001);
8271 iRowid
>>= FTS5_DATA_DLI_B
;
8273 *piSegid
= (int)(iRowid
& (((i64
)1 << FTS5_DATA_ID_B
) - 1));
8274 iRowid
>>= FTS5_DATA_ID_B
;
8276 *pbTombstone
= (int)(iRowid
& 0x0001);
8278 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8280 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8281 static void fts5DebugRowid(int *pRc
, Fts5Buffer
*pBuf
, i64 iKey
){
8282 int iSegid
, iHeight
, iPgno
, bDlidx
, bTomb
; /* Rowid compenents */
8283 fts5DecodeRowid(iKey
, &bTomb
, &iSegid
, &bDlidx
, &iHeight
, &iPgno
);
8286 if( iKey
==FTS5_AVERAGES_ROWID
){
8287 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "{averages} ");
8289 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "{structure}");
8293 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "{%s%ssegid=%d h=%d pgno=%d}",
8294 bDlidx
? "dlidx " : "",
8295 bTomb
? "tombstone " : "",
8296 iSegid
, iHeight
, iPgno
8300 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8302 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8303 static void fts5DebugStructure(
8304 int *pRc
, /* IN/OUT: error code */
8308 int iLvl
, iSeg
; /* Iterate through levels, segments */
8310 for(iLvl
=0; iLvl
<p
->nLevel
; iLvl
++){
8311 Fts5StructureLevel
*pLvl
= &p
->aLevel
[iLvl
];
8312 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
,
8313 " {lvl=%d nMerge=%d nSeg=%d", iLvl
, pLvl
->nMerge
, pLvl
->nSeg
8315 for(iSeg
=0; iSeg
<pLvl
->nSeg
; iSeg
++){
8316 Fts5StructureSegment
*pSeg
= &pLvl
->aSeg
[iSeg
];
8317 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " {id=%d leaves=%d..%d",
8318 pSeg
->iSegid
, pSeg
->pgnoFirst
, pSeg
->pgnoLast
8320 if( pSeg
->iOrigin1
>0 ){
8321 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " origin=%lld..%lld",
8322 pSeg
->iOrigin1
, pSeg
->iOrigin2
8325 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "}");
8327 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "}");
8330 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8332 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8334 ** This is part of the fts5_decode() debugging aid.
8336 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
8337 ** function appends a human-readable representation of the same object
8338 ** to the buffer passed as the second argument.
8340 static void fts5DecodeStructure(
8341 int *pRc
, /* IN/OUT: error code */
8343 const u8
*pBlob
, int nBlob
8345 int rc
; /* Return code */
8346 Fts5Structure
*p
= 0; /* Decoded structure object */
8348 rc
= fts5StructureDecode(pBlob
, nBlob
, 0, &p
);
8349 if( rc
!=SQLITE_OK
){
8354 fts5DebugStructure(pRc
, pBuf
, p
);
8355 fts5StructureRelease(p
);
8357 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8359 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8361 ** This is part of the fts5_decode() debugging aid.
8363 ** Arguments pBlob/nBlob contain an "averages" record. This function
8364 ** appends a human-readable representation of record to the buffer passed
8365 ** as the second argument.
8367 static void fts5DecodeAverages(
8368 int *pRc
, /* IN/OUT: error code */
8370 const u8
*pBlob
, int nBlob
8373 const char *zSpace
= "";
8377 i
+= sqlite3Fts5GetVarint(&pBlob
[i
], &iVal
);
8378 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, "%s%d", zSpace
, (int)iVal
);
8382 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8384 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8386 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
8387 ** each varint and append its string representation to buffer pBuf. Return
8388 ** after either the input buffer is exhausted or a 0 value is read.
8390 ** The return value is the number of bytes read from the input buffer.
8392 static int fts5DecodePoslist(int *pRc
, Fts5Buffer
*pBuf
, const u8
*a
, int n
){
8396 iOff
+= fts5GetVarint32(&a
[iOff
], iVal
);
8397 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " %d", iVal
);
8401 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8403 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8405 ** The start of buffer (a/n) contains the start of a doclist. The doclist
8406 ** may or may not finish within the buffer. This function appends a text
8407 ** representation of the part of the doclist that is present to buffer
8410 ** The return value is the number of bytes read from the input buffer.
8412 static int fts5DecodeDoclist(int *pRc
, Fts5Buffer
*pBuf
, const u8
*a
, int n
){
8417 iOff
= sqlite3Fts5GetVarint(a
, (u64
*)&iDocid
);
8418 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " id=%lld", iDocid
);
8423 iOff
+= fts5GetPoslistSize(&a
[iOff
], &nPos
, &bDel
);
8424 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " nPos=%d%s", nPos
, bDel
?"*":"");
8425 iOff
+= fts5DecodePoslist(pRc
, pBuf
, &a
[iOff
], MIN(n
-iOff
, nPos
));
8428 iOff
+= sqlite3Fts5GetVarint(&a
[iOff
], (u64
*)&iDelta
);
8430 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " id=%lld", iDocid
);
8436 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8438 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8440 ** This function is part of the fts5_decode() debugging function. It is
8441 ** only ever used with detail=none tables.
8443 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
8444 ** tables. This function appends a human-readable version of that list to
8447 ** If *pRc is other than SQLITE_OK when this function is called, it is a
8448 ** no-op. If an OOM or other error occurs within this function, *pRc is
8449 ** set to an SQLite error code before returning. The final state of buffer
8450 ** pBuf is undefined in this case.
8452 static void fts5DecodeRowidList(
8453 int *pRc
, /* IN/OUT: Error code */
8454 Fts5Buffer
*pBuf
, /* Buffer to append text to */
8455 const u8
*pData
, int nData
/* Data to decode list-of-rowids from */
8461 const char *zApp
= "";
8463 i
+= sqlite3Fts5GetVarint(&pData
[i
], &iVal
);
8466 if( i
<nData
&& pData
[i
]==0x00 ){
8468 if( i
<nData
&& pData
[i
]==0x00 ){
8476 sqlite3Fts5BufferAppendPrintf(pRc
, pBuf
, " %lld%s", iRowid
, zApp
);
8479 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8481 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8482 static void fts5BufferAppendTerm(int *pRc
, Fts5Buffer
*pBuf
, Fts5Buffer
*pTerm
){
8484 fts5BufferGrow(pRc
, pBuf
, pTerm
->n
*2 + 1);
8485 if( *pRc
==SQLITE_OK
){
8486 for(ii
=0; ii
<pTerm
->n
; ii
++){
8487 if( pTerm
->p
[ii
]==0x00 ){
8488 pBuf
->p
[pBuf
->n
++] = '\\';
8489 pBuf
->p
[pBuf
->n
++] = '0';
8491 pBuf
->p
[pBuf
->n
++] = pTerm
->p
[ii
];
8494 pBuf
->p
[pBuf
->n
] = 0x00;
8497 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8499 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8501 ** The implementation of user-defined scalar function fts5_decode().
8503 static void fts5DecodeFunction(
8504 sqlite3_context
*pCtx
, /* Function call context */
8505 int nArg
, /* Number of args (always 2) */
8506 sqlite3_value
**apVal
/* Function arguments */
8508 i64 iRowid
; /* Rowid for record being decoded */
8509 int iSegid
,iHeight
,iPgno
,bDlidx
;/* Rowid components */
8511 const u8
*aBlob
; int n
; /* Record to decode */
8513 Fts5Buffer s
; /* Build up text to return here */
8514 int rc
= SQLITE_OK
; /* Return code */
8515 sqlite3_int64 nSpace
= 0;
8516 int eDetailNone
= (sqlite3_user_data(pCtx
)!=0);
8520 memset(&s
, 0, sizeof(Fts5Buffer
));
8521 iRowid
= sqlite3_value_int64(apVal
[0]);
8523 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
8524 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
8525 ** buffer overreads even if the record is corrupt. */
8526 n
= sqlite3_value_bytes(apVal
[1]);
8527 aBlob
= sqlite3_value_blob(apVal
[1]);
8528 nSpace
= n
+ FTS5_DATA_ZERO_PADDING
;
8529 a
= (u8
*)sqlite3Fts5MallocZero(&rc
, nSpace
);
8530 if( a
==0 ) goto decode_out
;
8531 if( n
>0 ) memcpy(a
, aBlob
, n
);
8533 fts5DecodeRowid(iRowid
, &bTomb
, &iSegid
, &bDlidx
, &iHeight
, &iPgno
);
8535 fts5DebugRowid(&rc
, &s
, iRowid
);
8543 memset(&lvl
, 0, sizeof(Fts5DlidxLvl
));
8545 lvl
.iLeafPgno
= iPgno
;
8547 for(fts5DlidxLvlNext(&lvl
); lvl
.bEof
==0; fts5DlidxLvlNext(&lvl
)){
8548 sqlite3Fts5BufferAppendPrintf(&rc
, &s
,
8549 " %d(%lld)", lvl
.iLeafPgno
, lvl
.iRowid
8553 u32 nElem
= fts5GetU32(&a
[4]);
8554 int szKey
= (aBlob
[0]==4 || aBlob
[0]==8) ? aBlob
[0] : 8;
8555 int nSlot
= (n
- 8) / szKey
;
8557 sqlite3Fts5BufferAppendPrintf(&rc
, &s
, " nElem=%d", (int)nElem
);
8559 sqlite3Fts5BufferAppendPrintf(&rc
, &s
, " 0");
8561 for(ii
=0; ii
<nSlot
; ii
++){
8564 u32
*aSlot
= (u32
*)&aBlob
[8];
8565 if( aSlot
[ii
] ) iVal
= fts5GetU32((u8
*)&aSlot
[ii
]);
8567 u64
*aSlot
= (u64
*)&aBlob
[8];
8568 if( aSlot
[ii
] ) iVal
= fts5GetU64((u8
*)&aSlot
[ii
]);
8571 sqlite3Fts5BufferAppendPrintf(&rc
, &s
, " %lld", (i64
)iVal
);
8574 }else if( iSegid
==0 ){
8575 if( iRowid
==FTS5_AVERAGES_ROWID
){
8576 fts5DecodeAverages(&rc
, &s
, a
, n
);
8578 fts5DecodeStructure(&rc
, &s
, a
, n
);
8580 }else if( eDetailNone
){
8581 Fts5Buffer term
; /* Current term read from page */
8583 int iPgidxOff
= szLeaf
= fts5GetU16(&a
[2]);
8588 memset(&term
, 0, sizeof(Fts5Buffer
));
8590 /* Decode any entries that occur before the first term. */
8592 iPgidxOff
+= fts5GetVarint32(&a
[iPgidxOff
], iTermOff
);
8596 fts5DecodeRowidList(&rc
, &s
, &a
[4], iTermOff
-4);
8599 while( iOff
<szLeaf
&& rc
==SQLITE_OK
){
8602 /* Read the term data for the next term*/
8603 iOff
+= fts5GetVarint32(&a
[iOff
], nAppend
);
8605 fts5BufferAppendBlob(&rc
, &term
, nAppend
, &a
[iOff
]);
8606 sqlite3Fts5BufferAppendPrintf(&rc
, &s
, " term=");
8607 fts5BufferAppendTerm(&rc
, &s
, &term
);
8610 /* Figure out where the doclist for this term ends */
8613 iPgidxOff
+= fts5GetVarint32(&a
[iPgidxOff
], nIncr
);
8618 if( iTermOff
>szLeaf
){
8621 fts5DecodeRowidList(&rc
, &s
, &a
[iOff
], iTermOff
-iOff
);
8625 iOff
+= fts5GetVarint32(&a
[iOff
], nKeep
);
8629 fts5BufferFree(&term
);
8631 Fts5Buffer term
; /* Current term read from page */
8632 int szLeaf
; /* Offset of pgidx in a[] */
8634 int iPgidxPrev
= 0; /* Previous value read from pgidx */
8640 memset(&term
, 0, sizeof(Fts5Buffer
));
8643 sqlite3Fts5BufferSet(&rc
, &s
, 7, (const u8
*)"corrupt");
8646 iRowidOff
= fts5GetU16(&a
[0]);
8647 iPgidxOff
= szLeaf
= fts5GetU16(&a
[2]);
8649 fts5GetVarint32(&a
[iPgidxOff
], iTermOff
);
8650 }else if( iPgidxOff
>n
){
8656 /* Decode the position list tail at the start of the page */
8659 }else if( iTermOff
!=0 ){
8668 fts5DecodePoslist(&rc
, &s
, &a
[4], iOff
-4);
8670 /* Decode any more doclist data that appears on the page before the
8672 nDoclist
= (iTermOff
? iTermOff
: szLeaf
) - iOff
;
8673 if( nDoclist
+iOff
>n
){
8677 fts5DecodeDoclist(&rc
, &s
, &a
[iOff
], nDoclist
);
8679 while( iPgidxOff
<n
&& rc
==SQLITE_OK
){
8680 int bFirst
= (iPgidxOff
==szLeaf
); /* True for first term on page */
8681 int nByte
; /* Bytes of data */
8684 iPgidxOff
+= fts5GetVarint32(&a
[iPgidxOff
], nByte
);
8685 iPgidxPrev
+= nByte
;
8689 fts5GetVarint32(&a
[iPgidxOff
], nByte
);
8690 iEnd
= iPgidxPrev
+ nByte
;
8700 iOff
+= fts5GetVarint32(&a
[iOff
], nByte
);
8707 iOff
+= fts5GetVarint32(&a
[iOff
], nByte
);
8712 fts5BufferAppendBlob(&rc
, &term
, nByte
, &a
[iOff
]);
8715 sqlite3Fts5BufferAppendPrintf(&rc
, &s
, " term=");
8716 fts5BufferAppendTerm(&rc
, &s
, &term
);
8717 iOff
+= fts5DecodeDoclist(&rc
, &s
, &a
[iOff
], iEnd
-iOff
);
8720 fts5BufferFree(&term
);
8725 if( rc
==SQLITE_OK
){
8726 sqlite3_result_text(pCtx
, (const char*)s
.p
, s
.n
, SQLITE_TRANSIENT
);
8728 sqlite3_result_error_code(pCtx
, rc
);
8732 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8734 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8736 ** The implementation of user-defined scalar function fts5_rowid().
8738 static void fts5RowidFunction(
8739 sqlite3_context
*pCtx
, /* Function call context */
8740 int nArg
, /* Number of args (always 2) */
8741 sqlite3_value
**apVal
/* Function arguments */
8745 sqlite3_result_error(pCtx
, "should be: fts5_rowid(subject, ....)", -1);
8747 zArg
= (const char*)sqlite3_value_text(apVal
[0]);
8748 if( 0==sqlite3_stricmp(zArg
, "segment") ){
8752 sqlite3_result_error(pCtx
,
8753 "should be: fts5_rowid('segment', segid, pgno))", -1
8756 segid
= sqlite3_value_int(apVal
[1]);
8757 pgno
= sqlite3_value_int(apVal
[2]);
8758 iRowid
= FTS5_SEGMENT_ROWID(segid
, pgno
);
8759 sqlite3_result_int64(pCtx
, iRowid
);
8762 sqlite3_result_error(pCtx
,
8763 "first arg to fts5_rowid() must be 'segment'" , -1
8768 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8770 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
8772 typedef struct Fts5StructVtab Fts5StructVtab
;
8773 struct Fts5StructVtab
{
8777 typedef struct Fts5StructVcsr Fts5StructVcsr
;
8778 struct Fts5StructVcsr
{
8779 sqlite3_vtab_cursor base
;
8780 Fts5Structure
*pStruct
;
8787 ** Create a new fts5_structure() table-valued function.
8789 static int fts5structConnectMethod(
8792 int argc
, const char *const*argv
,
8793 sqlite3_vtab
**ppVtab
,
8796 Fts5StructVtab
*pNew
= 0;
8799 rc
= sqlite3_declare_vtab(db
,
8801 "level, segment, merge, segid, leaf1, leaf2, loc1, loc2, "
8802 "npgtombstone, nentrytombstone, nentry, struct HIDDEN);"
8804 if( rc
==SQLITE_OK
){
8805 pNew
= sqlite3Fts5MallocZero(&rc
, sizeof(*pNew
));
8808 *ppVtab
= (sqlite3_vtab
*)pNew
;
8813 ** We must have a single struct=? constraint that will be passed through
8814 ** into the xFilter method. If there is no valid stmt=? constraint,
8815 ** then return an SQLITE_CONSTRAINT error.
8817 static int fts5structBestIndexMethod(
8819 sqlite3_index_info
*pIdxInfo
8822 int rc
= SQLITE_CONSTRAINT
;
8823 struct sqlite3_index_constraint
*p
;
8824 pIdxInfo
->estimatedCost
= (double)100;
8825 pIdxInfo
->estimatedRows
= 100;
8826 pIdxInfo
->idxNum
= 0;
8827 for(i
=0, p
=pIdxInfo
->aConstraint
; i
<pIdxInfo
->nConstraint
; i
++, p
++){
8828 if( p
->usable
==0 ) continue;
8829 if( p
->op
==SQLITE_INDEX_CONSTRAINT_EQ
&& p
->iColumn
==11 ){
8831 pIdxInfo
->aConstraintUsage
[i
].omit
= 1;
8832 pIdxInfo
->aConstraintUsage
[i
].argvIndex
= 1;
8840 ** This method is the destructor for bytecodevtab objects.
8842 static int fts5structDisconnectMethod(sqlite3_vtab
*pVtab
){
8843 Fts5StructVtab
*p
= (Fts5StructVtab
*)pVtab
;
8849 ** Constructor for a new bytecodevtab_cursor object.
8851 static int fts5structOpenMethod(sqlite3_vtab
*p
, sqlite3_vtab_cursor
**ppCsr
){
8853 Fts5StructVcsr
*pNew
= 0;
8855 pNew
= sqlite3Fts5MallocZero(&rc
, sizeof(*pNew
));
8856 *ppCsr
= (sqlite3_vtab_cursor
*)pNew
;
8862 ** Destructor for a bytecodevtab_cursor.
8864 static int fts5structCloseMethod(sqlite3_vtab_cursor
*cur
){
8865 Fts5StructVcsr
*pCsr
= (Fts5StructVcsr
*)cur
;
8866 fts5StructureRelease(pCsr
->pStruct
);
8873 ** Advance a bytecodevtab_cursor to its next row of output.
8875 static int fts5structNextMethod(sqlite3_vtab_cursor
*cur
){
8876 Fts5StructVcsr
*pCsr
= (Fts5StructVcsr
*)cur
;
8877 Fts5Structure
*p
= pCsr
->pStruct
;
8879 assert( pCsr
->pStruct
);
8882 while( pCsr
->iLevel
<p
->nLevel
&& pCsr
->iSeg
>=p
->aLevel
[pCsr
->iLevel
].nSeg
){
8886 if( pCsr
->iLevel
>=p
->nLevel
){
8887 fts5StructureRelease(pCsr
->pStruct
);
8894 ** Return TRUE if the cursor has been moved off of the last
8897 static int fts5structEofMethod(sqlite3_vtab_cursor
*cur
){
8898 Fts5StructVcsr
*pCsr
= (Fts5StructVcsr
*)cur
;
8899 return pCsr
->pStruct
==0;
8902 static int fts5structRowidMethod(
8903 sqlite3_vtab_cursor
*cur
,
8904 sqlite_int64
*piRowid
8906 Fts5StructVcsr
*pCsr
= (Fts5StructVcsr
*)cur
;
8907 *piRowid
= pCsr
->iRowid
;
8912 ** Return values of columns for the row at which the bytecodevtab_cursor
8913 ** is currently pointing.
8915 static int fts5structColumnMethod(
8916 sqlite3_vtab_cursor
*cur
, /* The cursor */
8917 sqlite3_context
*ctx
, /* First argument to sqlite3_result_...() */
8918 int i
/* Which column to return */
8920 Fts5StructVcsr
*pCsr
= (Fts5StructVcsr
*)cur
;
8921 Fts5Structure
*p
= pCsr
->pStruct
;
8922 Fts5StructureSegment
*pSeg
= &p
->aLevel
[pCsr
->iLevel
].aSeg
[pCsr
->iSeg
];
8926 sqlite3_result_int(ctx
, pCsr
->iLevel
);
8928 case 1: /* segment */
8929 sqlite3_result_int(ctx
, pCsr
->iSeg
);
8932 sqlite3_result_int(ctx
, pCsr
->iSeg
< p
->aLevel
[pCsr
->iLevel
].nMerge
);
8935 sqlite3_result_int(ctx
, pSeg
->iSegid
);
8938 sqlite3_result_int(ctx
, pSeg
->pgnoFirst
);
8941 sqlite3_result_int(ctx
, pSeg
->pgnoLast
);
8943 case 6: /* origin1 */
8944 sqlite3_result_int64(ctx
, pSeg
->iOrigin1
);
8946 case 7: /* origin2 */
8947 sqlite3_result_int64(ctx
, pSeg
->iOrigin2
);
8949 case 8: /* npgtombstone */
8950 sqlite3_result_int(ctx
, pSeg
->nPgTombstone
);
8952 case 9: /* nentrytombstone */
8953 sqlite3_result_int64(ctx
, pSeg
->nEntryTombstone
);
8955 case 10: /* nentry */
8956 sqlite3_result_int64(ctx
, pSeg
->nEntry
);
8963 ** Initialize a cursor.
8965 ** idxNum==0 means show all subprograms
8966 ** idxNum==1 means show only the main bytecode and omit subprograms.
8968 static int fts5structFilterMethod(
8969 sqlite3_vtab_cursor
*pVtabCursor
,
8970 int idxNum
, const char *idxStr
,
8971 int argc
, sqlite3_value
**argv
8973 Fts5StructVcsr
*pCsr
= (Fts5StructVcsr
*)pVtabCursor
;
8976 const u8
*aBlob
= 0;
8980 fts5StructureRelease(pCsr
->pStruct
);
8983 nBlob
= sqlite3_value_bytes(argv
[0]);
8984 aBlob
= (const u8
*)sqlite3_value_blob(argv
[0]);
8985 rc
= fts5StructureDecode(aBlob
, nBlob
, 0, &pCsr
->pStruct
);
8986 if( rc
==SQLITE_OK
){
8990 rc
= fts5structNextMethod(pVtabCursor
);
8996 #endif /* SQLITE_TEST || SQLITE_FTS5_DEBUG */
8999 ** This is called as part of registering the FTS5 module with database
9000 ** connection db. It registers several user-defined scalar functions useful
9003 ** If successful, SQLITE_OK is returned. If an error occurs, some other
9004 ** SQLite error code is returned instead.
9006 int sqlite3Fts5IndexInit(sqlite3
*db
){
9007 #if defined(SQLITE_TEST) || defined(SQLITE_FTS5_DEBUG)
9008 int rc
= sqlite3_create_function(
9009 db
, "fts5_decode", 2, SQLITE_UTF8
, 0, fts5DecodeFunction
, 0, 0
9012 if( rc
==SQLITE_OK
){
9013 rc
= sqlite3_create_function(
9014 db
, "fts5_decode_none", 2,
9015 SQLITE_UTF8
, (void*)db
, fts5DecodeFunction
, 0, 0
9019 if( rc
==SQLITE_OK
){
9020 rc
= sqlite3_create_function(
9021 db
, "fts5_rowid", -1, SQLITE_UTF8
, 0, fts5RowidFunction
, 0, 0
9025 if( rc
==SQLITE_OK
){
9026 static const sqlite3_module fts5structure_module
= {
9029 fts5structConnectMethod
, /* xConnect */
9030 fts5structBestIndexMethod
, /* xBestIndex */
9031 fts5structDisconnectMethod
, /* xDisconnect */
9033 fts5structOpenMethod
, /* xOpen */
9034 fts5structCloseMethod
, /* xClose */
9035 fts5structFilterMethod
, /* xFilter */
9036 fts5structNextMethod
, /* xNext */
9037 fts5structEofMethod
, /* xEof */
9038 fts5structColumnMethod
, /* xColumn */
9039 fts5structRowidMethod
, /* xRowid */
9045 0, /* xFindFunction */
9049 0, /* xRollbackTo */
9050 0, /* xShadowName */
9053 rc
= sqlite3_create_module(db
, "fts5_structure", &fts5structure_module
, 0);
9063 int sqlite3Fts5IndexReset(Fts5Index
*p
){
9064 assert( p
->pStruct
==0 || p
->iStructVersion
!=0 );
9065 if( fts5IndexDataVersion(p
)!=p
->iStructVersion
){
9066 fts5StructureInvalidate(p
);
9068 return fts5IndexReturn(p
);